diff --git a/gcc/common.opt b/gcc/common.opt index ed4696b7ada40e06fccf7581335b45a48209bf78..219eb2b69901076c85346b543a95b6e65f45b659 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1134,6 +1134,10 @@ floop-crc Common Var(flag_loop_crc) Optimization Do the loop crc conversion. +floop-sve-mode-opt +Common Var(flag_loop_sve_mode_opt) Optimization +Optimization of adding sve mode for some loop + fauto-inc-dec Common Var(flag_auto_inc_dec) Init(1) Optimization Generate auto-inc/dec instructions. diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc index 52cac0b82d79393c30c1281a935ff5db5947a831..394c4d1e3191bccc79817275ae8d5c5c0e61472d 100644 --- a/gcc/config/aarch64/aarch64.cc +++ b/gcc/config/aarch64/aarch64.cc @@ -19123,6 +19123,7 @@ override_C_optimize_options (struct gcc_options *opts) opts->x_flag_ipa_prefetch = 1; opts->x_flag_ipa_ic = 1; opts->x_flag_cmlt_arith = 1; + opts->x_flag_loop_sve_mode_opt = 1; } /* Check whether in CPP language or LTO with only CPP language. */ diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c new file mode 100644 index 0000000000000000000000000000000000000000..4beca21df73edb041a83cbf0a9cb88fc6ab4f435 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8-a+sve" } */ +#include + +void foo(unsigned int* dest, uint8_t* src, unsigned int len, unsigned int* mul) +{ + for(int i = 0; i < len; ++i) + dest[i] = src[i] * (*mul) + 8; +} + +/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c new file mode 100644 index 0000000000000000000000000000000000000000..60941ec5c1b5ee5c015cd9220f1f8db6d05fb4df --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-2.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8-a+sve" } */ +#include + +void foo(uint8_t* dest, uint8_t* src, unsigned int len, unsigned int* mul) +{ + for(int i = 0; i < len; ++i) + dest[i] = src[i] * (*mul) + 8; +} + +/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c new file mode 100644 index 0000000000000000000000000000000000000000..5075f6ce84137fff607df3909ddecf71ef5aa9a3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-3.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8-a+sve" } */ +#include + +void foo(unsigned int* dest, uint8_t* src, unsigned int len) +{ + for(int i = 0; i < len; ++i) + dest[i] = src[i] + 8; +} + +/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c new file mode 100644 index 0000000000000000000000000000000000000000..56af868461246b03284224df8c765bdd4c0df837 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-4.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8-a+sve" } */ +#include + +void foo(unsigned int* dest, uint8_t* src, unsigned int len, unsigned int* mul) +{ + for(int i = 0; i < len; ++i) + dest[i] = src[i] * (*mul); +} + +/* { dg-final { scan-tree-dump-not "Loop sve mode optimization success" "vect" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c new file mode 100644 index 0000000000000000000000000000000000000000..a6c0e23bedf9c4fc7a635ae80b1ced19a36da79a --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-5.c @@ -0,0 +1,12 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8-a+sve" } */ +#include + +void foo(unsigned int* dest, uint8_t* src, unsigned int len, + unsigned int* mul, unsigned int* append) +{ + for(int i = 0; i < len; ++i) + dest[i] = ((unsigned int)src[i]) * (*mul) + (*append); +} + +/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c new file mode 100644 index 0000000000000000000000000000000000000000..545cc69cc64a7fb5deae8cc5c7b22dd7ee01a37f --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/sve-mode-opt-6.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-march=armv8-a+sve" } */ +#include + +void foo(unsigned int* dest, uint8_t* src, unsigned int len, + unsigned int* mul, unsigned int* append) +{ + for(int i = 0; i < len; ++i) { + dest[i] = src[i] * (*mul); + dest[i] -= 8; + dest[i] += *append; + } +} + +/* { dg-final { scan-tree-dump-times "Loop sve mode optimization success" 1 "vect" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp index ae52124111326b4c1c46b7999fbacf52daa246ae..a61c37a534e6366fb64c53ce6f394b04f1293c21 100644 --- a/gcc/testsuite/gcc.dg/vect/vect.exp +++ b/gcc/testsuite/gcc.dg/vect/vect.exp @@ -124,6 +124,13 @@ et-dg-runtest dg-runtest [lsort \ [glob -nocomplain $srcdir/$subdir/transpose-*.\[cS\]]] \ "" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3" +# -floop-sve-mode-opt tests +set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS +lappend VECT_SLP_CFLAGS "-floop-sve-mode-opt" +et-dg-runtest dg-runtest [lsort \ + [glob -nocomplain $srcdir/$subdir/sve-mode-opt*.\[cS\]]] \ + "" "-floop-sve-mode-opt -fdump-tree-vect-details -O3" + # -ffast-math tests set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS lappend DEFAULT_VECTCFLAGS "-ffast-math" diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index 7f75779519a2b82e5ee31cffbff0de8186ac3fbd..a01404087a9c43901390d81727b345ea498d0b82 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -2958,6 +2958,161 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared, return opt_loop_vec_info::success (loop_vinfo); } +bool load_by_specific_width_p (gimple *stmt, unsigned int width) +{ + if (!is_gimple_assign (stmt)) + return false; + + if (gimple_assign_rhs_code (stmt) != MEM_REF + && gimple_assign_rhs_code (stmt) != COMPONENT_REF) + return false; + + tree rhs = gimple_assign_rhs1 (stmt); + tree type = TREE_TYPE (rhs); + return TYPE_PRECISION (type) == width; +} + +bool converse_by_specific_width_p (gimple *stmt, + unsigned int w1, unsigned int w2) +{ + if (!is_gimple_assign (stmt)) + return false; + + enum tree_code code = gimple_assign_rhs_code (stmt); + + if (code != NOP_EXPR && code != CONVERT_EXPR) + return false; + + tree rhs = gimple_assign_rhs1 (stmt); + tree lhs = gimple_assign_lhs (stmt); + tree rhs_type = TREE_TYPE (rhs); + tree lhs_type = TREE_TYPE (lhs); + bool rhs_is_width1 = INTEGRAL_TYPE_P (rhs_type) + && TYPE_PRECISION (rhs_type) == w1; + bool lhs_is_width2 = INTEGRAL_TYPE_P (lhs_type) + && TYPE_PRECISION (lhs_type) == w2; + + return rhs_is_width1 && lhs_is_width2; +} + +bool multiply_by_specific_nodes_p (gimple *stmt, tree n1, tree n2) +{ + if (!is_gimple_assign (stmt)) + return false; + + enum tree_code code = gimple_assign_rhs_code (stmt); + + if (code != MULT_EXPR) + return false; + + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); + + if (TREE_CODE (type) != INTEGER_TYPE) + return false; + + tree mul1 = gimple_assign_rhs1 (stmt); + tree mul2 = gimple_assign_rhs2 (stmt); + + return ((mul1 == n1 && mul2 == n2) + || (mul1 == n2 && mul2 == n1)); +} + +bool plus_by_specific_node_p (gimple *stmt, tree n1) +{ + if (!is_gimple_assign (stmt)) + return false; + + enum tree_code code = gimple_assign_rhs_code (stmt); + + if (code != PLUS_EXPR) + return false; + + tree type = TREE_TYPE (gimple_assign_lhs (stmt)); + + if (TREE_CODE (type) != INTEGER_TYPE) + return false; + + tree addend1 = gimple_assign_rhs1 (stmt); + tree addend2 = gimple_assign_rhs2 (stmt); + + return ((addend1 == n1) || (addend2 == n1)); +} + +bool converse_and_multiply_p (loop_p loop, gimple_stmt_iterator gsi) +{ + gimple *stmt = gsi_stmt (gsi); + if(!load_by_specific_width_p (stmt, 8)) + return false; + + tree load1_lhs = gimple_assign_lhs (stmt); + tree conv_lhs = NULL_TREE, load2_lhs = NULL_TREE, mul_lhs = NULL_TREE; + + while (!gsi_end_p (gsi)) + { + stmt = gsi_stmt (gsi); + if (converse_by_specific_width_p (stmt, 8, 32)) + { + if (load1_lhs == gimple_assign_rhs1 (stmt)) + break; + } + gsi_next (&gsi); + } + if (gsi_end_p (gsi)) + return false; + + conv_lhs = gimple_assign_lhs (stmt); + + while (!gsi_end_p (gsi)) + { + stmt = gsi_stmt (gsi); + if (load_by_specific_width_p (stmt, 32)) + { + load2_lhs = gimple_assign_lhs (stmt); + break; + } + gsi_next (&gsi); + } + if (gsi_end_p (gsi)) + return false; + + while (!gsi_end_p (gsi)) + { + stmt = gsi_stmt (gsi); + if (multiply_by_specific_nodes_p (stmt, conv_lhs, load2_lhs)) + { + mul_lhs = gimple_assign_lhs (stmt); + break; + } + gsi_next (&gsi); + } + if (gsi_end_p (gsi)) + return false; + + while (!gsi_end_p (gsi)) + { + stmt = gsi_stmt (gsi); + if (plus_by_specific_node_p (stmt, mul_lhs)) + break; + gsi_next (&gsi); + } + + return !gsi_end_p (gsi); +} + +bool sve_mode_opt_analyze_loop (loop_p loop) +{ + basic_block *bbs = get_loop_body (loop); + for (int i = 0; i < loop->num_nodes; i++) { + basic_block bb = bbs[i]; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) { + if (converse_and_multiply_p (loop, gsi)) + return true; + } + } + + return false; +} + /* Function vect_analyze_loop. Apply a set of analyses on LOOP, and create a loop_vec_info struct @@ -3007,10 +3162,29 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared, auto_vector_modes vector_modes; /* Autodetect first vector size we try. */ vector_modes.safe_push (VOIDmode); - unsigned int autovec_flags - = targetm.vectorize.autovectorize_vector_modes (&vector_modes, + +#if !defined (CROSS_DIRECTORY_STRUCTURE) && defined (__aarch64__) + bool sve_chance = false; + if (flag_loop_sve_mode_opt && TARGET_SVE + && targetm.vector_mode_supported_p (VNx4QImode) + && sve_mode_opt_analyze_loop (loop)) + { + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "Loop sve mode optimization success\n"); + sve_chance = true; + vector_modes.safe_push (VNx4QImode); + } +#endif + + unsigned int autovec_flags = targetm.vectorize.autovectorize_vector_modes (&vector_modes, loop->simdlen != 0); - bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) + +#if !defined (CROSS_DIRECTORY_STRUCTURE) && defined (__aarch64__) + if (sve_chance) + autovec_flags |= VECT_COMPARE_COSTS; +#endif + + bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS) && !unlimited_cost_model (loop)); machine_mode autodetected_vector_mode = VOIDmode; opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);