From fb345632b69d8ae56eac4aec99303556270f25c8 Mon Sep 17 00:00:00 2001 From: eastb233 Date: Fri, 28 May 2021 21:05:39 +0800 Subject: [PATCH] [Sync] Sync from master branch - Sync patch from master branch --- Fix-type-mismatch-in-SLPed-constructors.patch | 118 ++ PR92429-do-not-fold-when-updating.patch | 70 ++ Simplify-X-C1-C2.patch | 197 ++++ add-check-for-pressure-in-sche1.patch | 52 + add-fp-model-options.patch | 376 ++++++ enable-simd-math.patch | 236 +++- fix-CTOR-vectorization.patch | 18 + fix-ICE-in-eliminate-stmt.patch | 79 ++ fix-ICE-in-vect.patch | 1037 +++++++++++++++++ ...ge-set-by-vectorization-on-niter-IVs.patch | 74 ++ gcc.spec | 66 +- medium-code-mode.patch | 350 +++--- ...t-use-scalar-conversions-for-vectors.patch | 69 ++ redundant-loop-elimination.patch | 22 +- revert-moutline-atomics.patch | 418 +++++++ revise-type-before-build-MULT.patch | 80 ++ 16 files changed, 3077 insertions(+), 185 deletions(-) create mode 100644 Fix-type-mismatch-in-SLPed-constructors.patch create mode 100644 PR92429-do-not-fold-when-updating.patch create mode 100644 Simplify-X-C1-C2.patch create mode 100644 add-check-for-pressure-in-sche1.patch create mode 100644 add-fp-model-options.patch create mode 100644 fix-CTOR-vectorization.patch create mode 100644 fix-ICE-in-eliminate-stmt.patch create mode 100644 fix-ICE-in-vect.patch create mode 100644 fix-range-set-by-vectorization-on-niter-IVs.patch create mode 100644 optabs-Dont-use-scalar-conversions-for-vectors.patch create mode 100644 revert-moutline-atomics.patch create mode 100644 revise-type-before-build-MULT.patch diff --git a/Fix-type-mismatch-in-SLPed-constructors.patch b/Fix-type-mismatch-in-SLPed-constructors.patch new file mode 100644 index 0000000..b073299 --- /dev/null +++ b/Fix-type-mismatch-in-SLPed-constructors.patch @@ -0,0 +1,118 @@ +This backport contains 2 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Fix-type-mismatch-in-SLPed-constructors.patch +86c3a7d891f9f175d09d61f5ce163c6dc5ce681f +0001-re-PR-fortran-91003-ICE-when-compiling-LAPACK-CGEGV-.patch +d005f61e7a0dbb2c991f13b4b61b1a27ca2d8b73 + +diff -urpN a/gcc/testsuite/gfortran.dg/pr91003.f90 b/gcc/testsuite/gfortran.dg/pr91003.f90 +--- a/gcc/testsuite/gfortran.dg/pr91003.f90 1969-12-31 19:00:00.000000000 -0500 ++++ b/gcc/testsuite/gfortran.dg/pr91003.f90 2021-02-22 03:02:39.484000000 -0500 +@@ -0,0 +1,33 @@ ++! { dg-do compile } ++! { dg-options "-Ofast" } ++ SUBROUTINE FOO(N, A, B, C, D, E, F, G) ++ COMPLEX A(*) ++ LOGICAL H ++ INTEGER G ++ REAL I, C, J, F, F1, F2, K, E, L, M, B, D ++ DO JC = 1, N ++ K = F*REAL(A(JC)) ++ Z = F*AIMAG(A(JC)) ++ H = .FALSE. ++ L = G ++ IF(ABS(Z).LT.D .AND. I.GE. MAX(D, B*C, B*J)) THEN ++ H = .TRUE. ++ L = (D / F1) / MAX(D, F2*I) ++ END IF ++ IF(ABS(K).LT.D .AND. C.GE. MAX(D, B*I, B*J)) THEN ++ L = MAX(L, (D / F1) / MAX(D, F2*C)) ++ END IF ++ IF(ABS(E).LT.D .AND. J.GE. MAX(D, B*C, B*I)) THEN ++ H = .TRUE. ++ L = MAX(L, (D / BNRM1) / MAX(D, BNRM2*J)) ++ END IF ++ IF(H) THEN ++ M = (L*D)*MAX(ABS(K), ABS(Z), ABS(E)) ++ END IF ++ IF(H) THEN ++ K = (L*REAL(A(JC)))*F ++ Z = (L*AIMAG(A(JC)))*F ++ END IF ++ A(JC) = CMPLX(K, Z) ++ END DO ++ END +diff -urpN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +--- a/gcc/tree-vect-slp.c 2021-02-22 02:56:51.328000000 -0500 ++++ b/gcc/tree-vect-slp.c 2021-02-22 03:03:22.676000000 -0500 +@@ -3442,7 +3442,7 @@ vect_slp_bb (basic_block bb) + /* Return 1 if vector type STMT_VINFO is a boolean vector. */ + + static bool +-vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo) ++vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num) + { + enum tree_code code = gimple_expr_code (stmt_vinfo->stmt); + tree op, vectype; +@@ -3467,9 +3467,17 @@ vect_mask_constant_operand_p (stmt_vec_i + tree cond = gimple_assign_rhs1 (stmt); + + if (TREE_CODE (cond) == SSA_NAME) +- op = cond; ++ { ++ if (op_num > 0) ++ return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); ++ op = cond; ++ } + else +- op = TREE_OPERAND (cond, 0); ++ { ++ if (op_num > 1) ++ return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); ++ op = TREE_OPERAND (cond, 0); ++ } + + if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype)) + gcc_unreachable (); +@@ -3600,9 +3608,10 @@ duplicate_and_interleave (vec_info *vinf + operands. */ + + static void +-vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node, ++vect_get_constant_vectors (slp_tree slp_node, unsigned op_num, + vec *vec_oprnds) + { ++ slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num]; + stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0]; + vec_info *vinfo = stmt_vinfo->vinfo; + unsigned HOST_WIDE_INT nunits; +@@ -3624,7 +3633,7 @@ vect_get_constant_vectors (slp_tree op_n + /* Check if vector type is a boolean vector. */ + tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo); + if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) +- && vect_mask_constant_operand_p (stmt_vinfo)) ++ && vect_mask_constant_operand_p (stmt_vinfo, op_num)) + vector_type = truth_type_for (stmt_vectype); + else + vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node); +@@ -3848,7 +3857,7 @@ vect_get_slp_defs (slp_tree slp_node, ve + vect_get_slp_vect_defs (child, &vec_defs); + } + else +- vect_get_constant_vectors (child, slp_node, &vec_defs); ++ vect_get_constant_vectors (slp_node, i, &vec_defs); + + vec_oprnds->quick_push (vec_defs); + } +@@ -4269,6 +4278,10 @@ vectorize_slp_instance_root_stmt (slp_tr + { + tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt); + tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt); ++ if (!useless_type_conversion_p (TREE_TYPE (root_lhs), ++ TREE_TYPE (vect_lhs))) ++ vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs), ++ vect_lhs); + rstmt = gimple_build_assign (root_lhs, vect_lhs); + break; + } diff --git a/PR92429-do-not-fold-when-updating.patch b/PR92429-do-not-fold-when-updating.patch new file mode 100644 index 0000000..65749ee --- /dev/null +++ b/PR92429-do-not-fold-when-updating.patch @@ -0,0 +1,70 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-PR-tree-optimization-92429-do-not-fold-when-updating.patch +f7dff7699fd70d3b8c3e637818e18c86f93ccfec + +diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c +index 4d5e0494511..6e6df0bfdb8 100644 +--- a/gcc/tree-ssa-loop-niter.c ++++ b/gcc/tree-ssa-loop-niter.c +@@ -1934,7 +1934,8 @@ number_of_iterations_cond (class loop *loop, + + tree + simplify_replace_tree (tree expr, tree old, tree new_tree, +- tree (*valueize) (tree, void*), void *context) ++ tree (*valueize) (tree, void*), void *context, ++ bool do_fold) + { + unsigned i, n; + tree ret = NULL_TREE, e, se; +@@ -1966,7 +1967,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree, + for (i = 0; i < n; i++) + { + e = TREE_OPERAND (expr, i); +- se = simplify_replace_tree (e, old, new_tree, valueize, context); ++ se = simplify_replace_tree (e, old, new_tree, valueize, context, do_fold); + if (e == se) + continue; + +@@ -1976,7 +1977,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree, + TREE_OPERAND (ret, i) = se; + } + +- return (ret ? fold (ret) : expr); ++ return (ret ? (do_fold ? fold (ret) : ret) : expr); + } + + /* Expand definitions of ssa names in EXPR as long as they are simple +diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h +index 621e2c2e28d..eb8d1579479 100644 +--- a/gcc/tree-ssa-loop-niter.h ++++ b/gcc/tree-ssa-loop-niter.h +@@ -58,7 +58,7 @@ extern void free_numbers_of_iterations_estimates (class loop *); + extern void free_numbers_of_iterations_estimates (function *); + extern tree simplify_replace_tree (tree, tree, + tree, tree (*)(tree, void *) = NULL, +- void * = NULL); ++ void * = NULL, bool do_fold = true); + extern void substitute_in_loop_info (struct loop *, tree, tree); + + #endif /* GCC_TREE_SSA_LOOP_NITER_H */ +diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +index 8e318a037a7..e5fb434bd4e 100644 +--- a/gcc/tree-vect-loop.c ++++ b/gcc/tree-vect-loop.c +@@ -8434,8 +8434,13 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance) + gimple_set_op (stmt, j, *new_op); + else + { ++ /* PR92429: The last argument of simplify_replace_tree disables ++ folding when replacing arguments. This is required as ++ otherwise you might end up with different statements than the ++ ones analyzed in vect_loop_analyze, leading to different ++ vectorization. */ + op = simplify_replace_tree (op, NULL_TREE, NULL_TREE, +- &find_in_mapping, &mapping); ++ &find_in_mapping, &mapping, false); + gimple_set_op (stmt, j, op); + } + } diff --git a/Simplify-X-C1-C2.patch b/Simplify-X-C1-C2.patch new file mode 100644 index 0000000..0997a00 --- /dev/null +++ b/Simplify-X-C1-C2.patch @@ -0,0 +1,197 @@ +This backport contains 2 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Simplify-X-C1-C2-with-undefined-overflow.patch +ca2b8c082c4f16919071c9f8de8db0b33b54c405 + +0002-Simplify-X-C1-C2-with-wrapping-overflow.patch +287522613d661b4c5ba8403b051eb470c1674cba + +diff -Nurp a/gcc/expr.c b/gcc/expr.c +--- a/gcc/expr.c 2021-03-17 16:34:24.700000000 +0800 ++++ b/gcc/expr.c 2021-03-17 10:30:11.500000000 +0800 +@@ -11706,38 +11706,6 @@ string_constant (tree arg, tree *ptr_off + return init; + } + +-/* Compute the modular multiplicative inverse of A modulo M +- using extended Euclid's algorithm. Assumes A and M are coprime. */ +-static wide_int +-mod_inv (const wide_int &a, const wide_int &b) +-{ +- /* Verify the assumption. */ +- gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); +- +- unsigned int p = a.get_precision () + 1; +- gcc_checking_assert (b.get_precision () + 1 == p); +- wide_int c = wide_int::from (a, p, UNSIGNED); +- wide_int d = wide_int::from (b, p, UNSIGNED); +- wide_int x0 = wide_int::from (0, p, UNSIGNED); +- wide_int x1 = wide_int::from (1, p, UNSIGNED); +- +- if (wi::eq_p (b, 1)) +- return wide_int::from (1, p, UNSIGNED); +- +- while (wi::gt_p (c, 1, UNSIGNED)) +- { +- wide_int t = d; +- wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); +- c = t; +- wide_int s = x0; +- x0 = wi::sub (x1, wi::mul (q, x0)); +- x1 = s; +- } +- if (wi::lt_p (x1, 0, SIGNED)) +- x1 += d; +- return x1; +-} +- + /* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2 + is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)): + for C2 > 0 to x & C3 == C2 +@@ -11948,7 +11916,7 @@ maybe_optimize_mod_cmp (enum tree_code c + w = wi::lrshift (w, shift); + wide_int a = wide_int::from (w, prec + 1, UNSIGNED); + wide_int b = wi::shifted_mask (prec, 1, false, prec + 1); +- wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED); ++ wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED); + tree c3 = wide_int_to_tree (type, m); + tree c5 = NULL_TREE; + wide_int d, e; +diff -Nurp a/gcc/match.pd b/gcc/match.pd +--- a/gcc/match.pd 2021-03-17 16:34:19.320000000 +0800 ++++ b/gcc/match.pd 2021-03-17 10:30:11.500000000 +0800 +@@ -3290,6 +3290,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + (scmp @0 @2) + (cmp @0 @2)))))) + ++/* For integral types with undefined overflow fold ++ x * C1 == C2 into x == C2 / C1 or false. ++ If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring ++ Z / 2^n Z. */ ++(for cmp (eq ne) ++ (simplify ++ (cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2) ++ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0)) ++ && wi::to_wide (@1) != 0) ++ (with { widest_int quot; } ++ (if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1), ++ TYPE_SIGN (TREE_TYPE (@0)), ")) ++ (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); }) ++ { constant_boolean_node (cmp == NE_EXPR, type); })) ++ (if (INTEGRAL_TYPE_P (TREE_TYPE (@0)) ++ && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)) ++ && (wi::bit_and (wi::to_wide (@1), 1) == 1)) ++ (cmp @0 ++ { ++ tree itype = TREE_TYPE (@0); ++ int p = TYPE_PRECISION (itype); ++ wide_int m = wi::one (p + 1) << p; ++ wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED); ++ wide_int i = wide_int::from (wi::mod_inv (a, m), ++ p, TYPE_SIGN (itype)); ++ wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2))); ++ }))))) ++ + /* Simplify comparison of something with itself. For IEEE + floating-point, we can only do some of these simplifications. */ + (for cmp (eq ge le) +diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr23135.c b/gcc/testsuite/gcc.c-torture/execute/pr23135.c +--- a/gcc/testsuite/gcc.c-torture/execute/pr23135.c 2021-03-17 16:34:24.016000000 +0800 ++++ b/gcc/testsuite/gcc.c-torture/execute/pr23135.c 2021-03-17 10:30:13.572000000 +0800 +@@ -1,7 +1,7 @@ + /* Based on execute/simd-1.c, modified by joern.rennecke@st.com to + trigger a reload bug. Verified for gcc mainline from 20050722 13:00 UTC + for sh-elf -m4 -O2. */ +-/* { dg-options "-Wno-psabi" } */ ++/* { dg-options "-Wno-psabi -fwrapv" } */ + /* { dg-add-options stack_size } */ + + #ifndef STACK_SIZE +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c 2021-03-17 10:30:13.276000000 +0800 +@@ -0,0 +1,15 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */ ++ ++typedef __INT32_TYPE__ int32_t; ++typedef unsigned __INT32_TYPE__ uint32_t; ++ ++int e(int32_t x){return 3*x==5;} ++int f(int32_t x){return 3*x==-5;} ++int g(int32_t x){return -3*x==5;} ++int h(int32_t x){return 7*x==3;} ++int i(uint32_t x){return 7*x==3;} ++ ++/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */ ++/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */ ++/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c +--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c 2021-03-17 10:30:13.276000000 +0800 +@@ -0,0 +1,8 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O -fdump-tree-optimized" } */ ++ ++int f(int x){return x*7==17;} ++int g(int x){return x*3==15;} ++ ++/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */ ++/* { dg-final { scan-tree-dump "== 5;" "optimized" } } */ +diff -Nurp a/gcc/wide-int.cc b/gcc/wide-int.cc +--- a/gcc/wide-int.cc 2021-03-17 16:34:24.488000000 +0800 ++++ b/gcc/wide-int.cc 2021-03-17 10:30:11.500000000 +0800 +@@ -2223,6 +2223,39 @@ wi::round_up_for_mask (const wide_int &v + return (val | tmp) & -tmp; + } + ++/* Compute the modular multiplicative inverse of A modulo B ++ using extended Euclid's algorithm. Assumes A and B are coprime, ++ and that A and B have the same precision. */ ++wide_int ++wi::mod_inv (const wide_int &a, const wide_int &b) ++{ ++ /* Verify the assumption. */ ++ gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1)); ++ ++ unsigned int p = a.get_precision () + 1; ++ gcc_checking_assert (b.get_precision () + 1 == p); ++ wide_int c = wide_int::from (a, p, UNSIGNED); ++ wide_int d = wide_int::from (b, p, UNSIGNED); ++ wide_int x0 = wide_int::from (0, p, UNSIGNED); ++ wide_int x1 = wide_int::from (1, p, UNSIGNED); ++ ++ if (wi::eq_p (b, 1)) ++ return wide_int::from (1, p, UNSIGNED); ++ ++ while (wi::gt_p (c, 1, UNSIGNED)) ++ { ++ wide_int t = d; ++ wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d); ++ c = t; ++ wide_int s = x0; ++ x0 = wi::sub (x1, wi::mul (q, x0)); ++ x1 = s; ++ } ++ if (wi::lt_p (x1, 0, SIGNED)) ++ x1 += d; ++ return x1; ++} ++ + /* + * Private utilities. + */ +diff -Nurp a/gcc/wide-int.h b/gcc/wide-int.h +--- a/gcc/wide-int.h 2021-03-17 16:34:14.792000000 +0800 ++++ b/gcc/wide-int.h 2021-03-17 10:30:11.500000000 +0800 +@@ -3368,6 +3368,8 @@ namespace wi + wide_int round_down_for_mask (const wide_int &, const wide_int &); + wide_int round_up_for_mask (const wide_int &, const wide_int &); + ++ wide_int mod_inv (const wide_int &a, const wide_int &b); ++ + template + T mask (unsigned int, bool); + diff --git a/add-check-for-pressure-in-sche1.patch b/add-check-for-pressure-in-sche1.patch new file mode 100644 index 0000000..b57a685 --- /dev/null +++ b/add-check-for-pressure-in-sche1.patch @@ -0,0 +1,52 @@ +--- a/gcc/haifa-sched.c 2021-03-08 14:46:59.204000000 +0800 ++++ b/gcc/haifa-sched.c 2021-03-09 13:32:40.656000000 +0800 +@@ -2036,8 +2036,10 @@ model_start_update_pressure (struct mode + /* The instruction wasn't part of the model schedule; it was moved + from a different block. Update the pressure for the end of + the model schedule. */ +- MODEL_REF_PRESSURE (group, point, pci) += delta; +- MODEL_MAX_PRESSURE (group, point, pci) += delta; ++ if (MODEL_REF_PRESSURE (group, point, pci) != -1 || delta > 0) ++ MODEL_REF_PRESSURE (group, point, pci) += delta; ++ if (MODEL_MAX_PRESSURE (group, point, pci) != -1 || delta > 0) ++ MODEL_MAX_PRESSURE (group, point, pci) += delta; + } + else + { +diff -uprN a/gcc/testsuite/gcc.dg/sche1-pressure-check.c b/gcc/testsuite/gcc.dg/sche1-pressure-check.c +--- a/gcc/testsuite/gcc.dg/sche1-pressure-check.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/sche1-pressure-check.c 2021-03-09 13:40:34.036000000 +0800 +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++int a, g, h; ++char b, c; ++short d; ++static int e; ++int *volatile f; ++void i() { ++ int j = 0; ++ int *k = &a; ++ for (; c; c--) { ++ g && (d = 0); ++ j ^= 10; ++ { ++ int l[2]; ++ l; ++ h = l[1]; ++ } ++ e = 1; ++ for (; e <= 7; e++) { ++ *k = 6; ++ *f = b = 0; ++ for (; b <= 7; b++) { ++ int m = 5; ++ if (g) ++ *k &= m ^= j; ++ } ++ } ++ } ++} ++int main() {} ++ diff --git a/add-fp-model-options.patch b/add-fp-model-options.patch new file mode 100644 index 0000000..8d23b99 --- /dev/null +++ b/add-fp-model-options.patch @@ -0,0 +1,376 @@ +diff -Nurp a/gcc/common.opt b/gcc/common.opt +--- a/gcc/common.opt 2021-02-18 21:22:07.216000000 +0800 ++++ b/gcc/common.opt 2021-02-19 16:04:17.876000000 +0800 +@@ -1506,6 +1506,32 @@ ffp-int-builtin-inexact + Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization + Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions. + ++fftz ++Common Report Var(flag_ftz) Optimization ++Control fpcr register for flush to zero. ++ ++fp-model= ++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization ++-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control. ++ ++Enum ++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs) ++ ++EnumValue ++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL) ++ ++EnumValue ++Enum(fp_model) String(fast) Value(FP_MODEL_FAST) ++ ++EnumValue ++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE) ++ ++EnumValue ++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT) ++ ++EnumValue ++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT) ++ + ; Nonzero means don't put addresses of constant functions in registers. + ; Used for compiling the Unix kernel, where strange substitutions are + ; done on the assembly output. +diff -Nurp a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h +--- a/gcc/config/aarch64/aarch64-linux.h 2021-02-18 21:22:07.220000000 +0800 ++++ b/gcc/config/aarch64/aarch64-linux.h 2021-02-18 21:23:55.932000000 +0800 +@@ -50,7 +50,8 @@ + #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC + + #define GNU_USER_TARGET_MATHFILE_SPEC \ +- "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}" ++ "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\ ++ %{!fno-ftz:crtfastmath.o%s}}" + + #undef ENDFILE_SPEC + #define ENDFILE_SPEC \ +diff -Nurp a/gcc/flag-types.h b/gcc/flag-types.h +--- a/gcc/flag-types.h 2020-03-12 19:07:21.000000000 +0800 ++++ b/gcc/flag-types.h 2021-02-18 21:23:55.932000000 +0800 +@@ -207,6 +207,15 @@ enum fp_contract_mode { + FP_CONTRACT_FAST = 2 + }; + ++/* Floating-point precision mode. */ ++enum fp_model { ++ FP_MODEL_NORMAL = 0, ++ FP_MODEL_FAST = 1, ++ FP_MODEL_PRECISE = 2, ++ FP_MODEL_EXCEPT = 3, ++ FP_MODEL_STRICT = 4 ++}; ++ + /* Scalar storage order kind. */ + enum scalar_storage_order_kind { + SSO_NATIVE = 0, +diff -Nurp a/gcc/fortran/options.c b/gcc/fortran/options.c +--- a/gcc/fortran/options.c 2020-03-12 19:07:21.000000000 +0800 ++++ b/gcc/fortran/options.c 2021-02-18 21:23:55.932000000 +0800 +@@ -247,6 +247,7 @@ form_from_filename (const char *filename + return f_form; + } + ++static void gfc_handle_fpe_option (const char *arg, bool trap); + + /* Finalize commandline options. */ + +@@ -274,6 +275,13 @@ gfc_post_options (const char **pfilename + if (flag_protect_parens == -1) + flag_protect_parens = !optimize_fast; + ++ /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary. */ ++ if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT) ++ { ++ gfc_handle_fpe_option ("all", false); ++ gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true); ++ } ++ + /* -Ofast sets implies -fstack-arrays unless an explicit size is set for + stack arrays. */ + if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2) +diff -Nurp a/gcc/opts.c b/gcc/opts.c +--- a/gcc/opts.c 2021-02-18 21:22:07.424000000 +0800 ++++ b/gcc/opts.c 2021-02-19 16:00:08.628000000 +0800 +@@ -196,6 +196,7 @@ static void set_debug_level (enum debug_ + struct gcc_options *opts_set, + location_t loc); + static void set_fast_math_flags (struct gcc_options *opts, int set); ++static void set_fp_model_flags (struct gcc_options *opts, int set); + static void decode_d_option (const char *arg, struct gcc_options *opts, + location_t loc, diagnostic_context *dc); + static void set_unsafe_math_optimizations_flags (struct gcc_options *opts, +@@ -2433,6 +2434,10 @@ common_handle_option (struct gcc_options + set_fast_math_flags (opts, value); + break; + ++ case OPT_fp_model_: ++ set_fp_model_flags (opts, value); ++ break; ++ + case OPT_funsafe_math_optimizations: + set_unsafe_math_optimizations_flags (opts, value); + break; +@@ -2905,6 +2910,69 @@ set_fast_math_flags (struct gcc_options + } + } + ++/* Handle fp-model options. */ ++static void ++set_fp_model_flags (struct gcc_options *opts, int set) ++{ ++ enum fp_model model = (enum fp_model) set; ++ switch (model) ++ { ++ case FP_MODEL_FAST: ++ /* Equivalent to open ffast-math. */ ++ set_fast_math_flags (opts, 1); ++ break; ++ ++ case FP_MODEL_PRECISE: ++ /* Equivalent to close ffast-math. */ ++ set_fast_math_flags (opts, 0); ++ /* Turn on -frounding-math -fsignaling-nans. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_EXCEPT: ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ /* Also turn on ffpe-trap in fortran. */ ++ break; ++ ++ case FP_MODEL_STRICT: ++ /* Turn on both precise and except. */ ++ if (!opts->frontend_set_flag_signaling_nans) ++ opts->x_flag_signaling_nans = 1; ++ if (!opts->frontend_set_flag_rounding_math) ++ opts->x_flag_rounding_math = 1; ++ opts->x_flag_expensive_optimizations = 0; ++ opts->x_flag_code_hoisting = 0; ++ opts->x_flag_predictive_commoning = 0; ++ if (!opts->frontend_set_flag_errno_math) ++ opts->x_flag_errno_math = 1; ++ if (!opts->frontend_set_flag_trapping_math) ++ opts->x_flag_trapping_math = 1; ++ opts->x_flag_fp_int_builtin_inexact = 1; ++ opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF; ++ break; ++ ++ case FP_MODEL_NORMAL: ++ /* Do nothing. */ ++ break; ++ ++ default: ++ gcc_unreachable (); ++ } ++} ++ + /* When -funsafe-math-optimizations is set the following + flags are set as well. */ + static void +diff -Nurp a/gcc/opts-common.c b/gcc/opts-common.c +--- a/gcc/opts-common.c 2020-03-12 19:07:21.000000000 +0800 ++++ b/gcc/opts-common.c 2021-02-19 09:49:18.880000000 +0800 +@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3. + #include "diagnostic.h" + #include "spellcheck.h" + +-static void prune_options (struct cl_decoded_option **, unsigned int *); ++static void prune_options (struct cl_decoded_option **, unsigned int *, ++ unsigned int); + + /* An option that is undocumented, that takes a joined argument, and + that doesn't fit any of the classes of uses (language/common, +@@ -968,7 +969,7 @@ decode_cmdline_options_to_array (unsigne + + *decoded_options = opt_array; + *decoded_options_count = num_decoded_options; +- prune_options (decoded_options, decoded_options_count); ++ prune_options (decoded_options, decoded_options_count, lang_mask); + } + + /* Return true if NEXT_OPT_IDX cancels OPT_IDX. Return false if the +@@ -989,11 +990,108 @@ cancel_option (int opt_idx, int next_opt + return false; + } + ++/* Check whether opt_idx exists in decoded_options array bewteen index ++ start and end. If found, return its index in decoded_options, ++ else return end. */ ++static unsigned int ++find_opt_idx (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int start, unsigned int end, unsigned int opt_idx) ++{ ++ gcc_assert (end <= decoded_options_count); ++ gcc_assert (opt_idx < cl_options_count); ++ unsigned int k; ++ for (k = start; k < end; k++) ++ { ++ if (decoded_options[k].opt_index == opt_idx) ++ { ++ return k; ++ } ++ } ++ return k; ++} ++ ++/* remove the opt_index element from decoded_options array. */ ++static unsigned int ++remove_option (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int opt_index) ++{ ++ gcc_assert (opt_index < decoded_options_count); ++ unsigned int i; ++ for (i = opt_index; i < decoded_options_count - 1; i++) ++ { ++ decoded_options[i] = decoded_options[i + 1]; ++ } ++ return decoded_options_count - 1; ++} ++ ++/* Handle the priority between fp-model, Ofast, and ++ ffast-math. */ ++static unsigned int ++handle_fp_model_driver (struct cl_decoded_option *decoded_options, ++ unsigned int decoded_options_count, ++ unsigned int fp_model_index, ++ unsigned int lang_mask) ++{ ++ struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index]; ++ enum fp_model model = (enum fp_model) fp_model_opt.value; ++ if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT) ++ { ++ /* If found Ofast, override Ofast with O3. */ ++ unsigned int Ofast_index; ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ while (Ofast_index != decoded_options_count) ++ { ++ const char *tmp_argv = "-O3"; ++ decode_cmdline_option (&tmp_argv, lang_mask, ++ &decoded_options[Ofast_index]); ++ warning (0, "'-Ofast' is degraded to '-O3' due to %qs", ++ fp_model_opt.orig_option_with_args_text); ++ Ofast_index = find_opt_idx (decoded_options, decoded_options_count, ++ 0, decoded_options_count, OPT_Ofast); ++ } ++ /* If found ffast-math before fp-model=precise/strict ++ it, cancel it. */ ++ unsigned int ffast_math_index; ++ ffast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, 0, ++ fp_model_index, OPT_ffast_math); ++ if (ffast_math_index != fp_model_index) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ ffast_math_index); ++ warning (0, "'-ffast-math' before %qs is canceled", ++ fp_model_opt.orig_option_with_args_text); ++ } ++ } ++ if (model == FP_MODEL_FAST) ++ { ++ /* If found -fno-fast-math after fp-model=fast, cancel this one. */ ++ unsigned int fno_fast_math_index; ++ fno_fast_math_index ++ = find_opt_idx (decoded_options, decoded_options_count, fp_model_index, ++ decoded_options_count, OPT_ffast_math); ++ if (fno_fast_math_index != decoded_options_count ++ && decoded_options[fno_fast_math_index].value == 0) ++ { ++ decoded_options_count ++ = remove_option (decoded_options, decoded_options_count, ++ fp_model_index); ++ warning (0, "'-fp-model=fast' before '-fno-fast-math' is canceled"); ++ } ++ } ++ return decoded_options_count; ++} ++ + /* Filter out options canceled by the ones after them. */ + + static void + prune_options (struct cl_decoded_option **decoded_options, +- unsigned int *decoded_options_count) ++ unsigned int *decoded_options_count, ++ unsigned int lang_mask) + { + unsigned int old_decoded_options_count = *decoded_options_count; + struct cl_decoded_option *old_decoded_options = *decoded_options; +@@ -1005,6 +1103,8 @@ prune_options (struct cl_decoded_option + unsigned int fdiagnostics_color_idx = 0; + + /* Remove arguments which are negated by others after them. */ ++ ++ unsigned int fp_model_index = old_decoded_options_count; + new_decoded_options_count = 0; + for (i = 0; i < old_decoded_options_count; i++) + { +@@ -1028,6 +1128,34 @@ prune_options (struct cl_decoded_option + fdiagnostics_color_idx = i; + continue; + ++ case OPT_fp_model_: ++ /* Only the last fp-model option will take effect. */ ++ unsigned int next_fp_model_idx; ++ next_fp_model_idx = find_opt_idx (old_decoded_options, ++ old_decoded_options_count, ++ i + 1, ++ old_decoded_options_count, ++ OPT_fp_model_); ++ if (next_fp_model_idx != old_decoded_options_count) ++ { ++ /* Found more than one fp-model, cancel this one. */ ++ if (old_decoded_options[i].value ++ != old_decoded_options[next_fp_model_idx].value) ++ { ++ warning (0, "%qs is overrided by %qs", ++ old_decoded_options[i]. ++ orig_option_with_args_text, ++ old_decoded_options[next_fp_model_idx]. ++ orig_option_with_args_text); ++ } ++ break; ++ } ++ else ++ { ++ /* Found the last fp-model option. */ ++ fp_model_index = new_decoded_options_count; ++ } ++ /* FALLTHRU. */ + default: + gcc_assert (opt_idx < cl_options_count); + option = &cl_options[opt_idx]; +@@ -1067,6 +1195,14 @@ keep: + break; + } + } ++ if (fp_model_index < new_decoded_options_count) ++ { ++ new_decoded_options_count ++ = handle_fp_model_driver (new_decoded_options, ++ new_decoded_options_count, ++ fp_model_index, ++ lang_mask); ++ } + + if (fdiagnostics_color_idx >= 1) + { diff --git a/enable-simd-math.patch b/enable-simd-math.patch index 46f7d3d..7658fb8 100644 --- a/enable-simd-math.patch +++ b/enable-simd-math.patch @@ -1,7 +1,35 @@ +diff -Nurp a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c +--- a/gcc/c-family/c-opts.c 2021-01-07 17:32:31.856000000 +0800 ++++ b/gcc/c-family/c-opts.c 2021-01-07 17:05:02.524000000 +0800 +@@ -783,6 +783,10 @@ c_common_post_options (const char **pfil + if (cpp_opts->deps.style == DEPS_NONE) + check_deps_environment_vars (); + ++ if (flag_simdmath) ++ { ++ defer_opt (OPT_include, "simdmath.h"); ++ } + handle_deferred_opts (); + + sanitize_cpp_opts (); +diff -Nurp a/gcc/common.opt b/gcc/common.opt +--- a/gcc/common.opt 2021-01-07 17:30:43.912000000 +0800 ++++ b/gcc/common.opt 2021-01-07 17:38:38.612000000 +0800 +@@ -1935,6 +1935,10 @@ fmath-errno + Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined + Set errno after built-in math functions. + ++fsimdmath ++Common Report Var(flag_simdmath) Init(0) Optimization ++Enable auto-vectorize math functions for mathlib. This option will turn on -fno-math-errno and -fopenmp-simd. ++ + fmax-errors= + Common Joined RejectNegative UInteger Var(flag_max_errors) + -fmax-errors= Maximum number of errors to report. diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2020-07-06 17:20:30.368000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2020-07-06 20:02:39.480000000 +0800 -@@ -18860,8 +18860,12 @@ aarch64_simd_clone_compute_vecsize_and_s +--- a/gcc/config/aarch64/aarch64.c 2021-01-07 17:30:43.912000000 +0800 ++++ b/gcc/config/aarch64/aarch64.c 2021-01-05 15:17:21.580000000 +0800 +@@ -21588,8 +21588,12 @@ aarch64_simd_clone_compute_vecsize_and_s elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type)); if (clonei->simdlen == 0) { @@ -17,9 +45,9 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c } else diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt ---- a/gcc/config/aarch64/aarch64.opt 2020-07-06 17:20:30.364000000 +0800 -+++ b/gcc/config/aarch64/aarch64.opt 2020-07-06 20:02:39.480000000 +0800 -@@ -186,6 +186,12 @@ precision of square root results to abou +--- a/gcc/config/aarch64/aarch64.opt 2021-01-07 17:30:43.912000000 +0800 ++++ b/gcc/config/aarch64/aarch64.opt 2021-01-05 15:17:21.448000000 +0800 +@@ -197,6 +197,12 @@ precision of square root results to abou single precision and to 32 bits for double precision. If enabled, it implies -mlow-precision-recip-sqrt. @@ -32,3 +60,199 @@ diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt mlow-precision-div Target Var(flag_mlow_precision_div) Optimization Enable the division approximation. Enabling this reduces +diff -Nurp a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c +--- a/gcc/fortran/scanner.c 2021-01-07 17:31:59.264000000 +0800 ++++ b/gcc/fortran/scanner.c 2021-01-07 17:05:28.776000000 +0800 +@@ -2702,6 +2702,10 @@ gfc_new_file (void) + && !load_file (flag_pre_include, NULL, false)) + exit (FATAL_EXIT_CODE); + ++ if (flag_simdmath ++ && !load_file ("simdmath_f.h", NULL, false)) ++ exit (FATAL_EXIT_CODE); ++ + if (gfc_cpp_enabled ()) + { + result = gfc_cpp_preprocess (gfc_source_file); +diff -Nurp a/gcc/opts.c b/gcc/opts.c +--- a/gcc/opts.c 2021-01-07 17:30:57.740000000 +0800 ++++ b/gcc/opts.c 2021-01-05 15:17:21.068000000 +0800 +@@ -190,6 +190,7 @@ typedef char *char_p; /* For DEF_VEC_P. + static void handle_param (struct gcc_options *opts, + struct gcc_options *opts_set, location_t loc, + const char *carg); ++static void set_simdmath_flags (struct gcc_options *opts, int set); + static void set_debug_level (enum debug_info_type type, int extended, + const char *arg, struct gcc_options *opts, + struct gcc_options *opts_set, +@@ -2420,6 +2421,10 @@ common_handle_option (struct gcc_options + dc->min_margin_width = value; + break; + ++ case OPT_fsimdmath: ++ set_simdmath_flags (opts, value); ++ break; ++ + case OPT_fdump_: + /* Deferred. */ + break; +@@ -2843,6 +2848,18 @@ handle_param (struct gcc_options *opts, + free (arg); + } + ++/* The following routines are used to set -fno-math-errno and -fopenmp-simd ++ to enable vector mathlib. */ ++static void ++set_simdmath_flags (struct gcc_options *opts, int set) ++{ ++ if (set) ++ { ++ opts->x_flag_errno_math = 0; ++ opts->x_flag_openmp_simd = 1; ++ } ++} ++ + /* Used to set the level of strict aliasing warnings in OPTS, + when no level is specified (i.e., when -Wstrict-aliasing, and not + -Wstrict-aliasing=level was given). +diff -Nurp a/libgomp/configure b/libgomp/configure +--- a/libgomp/configure 2021-01-07 17:40:08.216000000 +0800 ++++ b/libgomp/configure 2021-01-07 16:29:45.628000000 +0800 +@@ -17258,7 +17258,7 @@ fi + + + +-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h" ++ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h" + + ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec" + +@@ -18426,6 +18426,8 @@ do + "gstdint.h") CONFIG_COMMANDS="$CONFIG_COMMANDS gstdint.h" ;; + "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;; + "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;; ++ "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;; ++ "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;; + "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;; + "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;; + "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; +diff -Nurp a/libgomp/configure.ac b/libgomp/configure.ac +--- a/libgomp/configure.ac 2021-01-07 17:40:08.216000000 +0800 ++++ b/libgomp/configure.ac 2021-01-07 16:26:26.560000000 +0800 +@@ -422,7 +422,7 @@ CFLAGS="$save_CFLAGS" + # Determine what GCC version number to use in filesystem paths. + GCC_BASE_VER + +-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h) ++AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h) + AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec) + AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in]) + AC_OUTPUT +diff -Nurp a/libgomp/Makefile.am b/libgomp/Makefile.am +--- a/libgomp/Makefile.am 2021-01-07 17:40:08.168000000 +0800 ++++ b/libgomp/Makefile.am 2021-01-07 16:27:39.776000000 +0800 +@@ -74,9 +74,9 @@ libgomp_la_SOURCES += openacc.f90 + endif + + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h ++nodist_libsubinclude_HEADERS = omp.h openacc.h simdmath.h + if USE_FORTRAN +-nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ ++nodist_finclude_HEADERS = omp_lib.h simdmath_f.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ + openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod + endif + +diff -Nurp a/libgomp/Makefile.in b/libgomp/Makefile.in +--- a/libgomp/Makefile.in 2021-01-07 17:40:08.208000000 +0800 ++++ b/libgomp/Makefile.in 2021-01-07 16:50:28.820000000 +0800 +@@ -145,7 +145,7 @@ am__CONFIG_DISTCLEAN_FILES = config.stat + configure.lineno config.status.lineno + mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs + CONFIG_HEADER = config.h +-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \ ++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \ + libgomp.spec + CONFIG_CLEAN_VPATH_FILES = + am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +@@ -575,8 +575,8 @@ libgomp_la_SOURCES = alloc.c atomic.c ba + @PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS) + @PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static + nodist_noinst_HEADERS = libgomp_f.h +-nodist_libsubinclude_HEADERS = omp.h openacc.h +-@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ ++nodist_libsubinclude_HEADERS = omp.h openacc.h simdmath.h ++@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h simdmath_f.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \ + @USE_FORTRAN_TRUE@ openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod + + LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS)) +@@ -668,6 +668,10 @@ omp.h: $(top_builddir)/config.status $(s + cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in + cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ ++simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in ++ cd $(top_builddir) && $(SHELL) ./config.status $@ + omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in + cd $(top_builddir) && $(SHELL) ./config.status $@ + libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in +diff -Nurp a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in +--- a/libgomp/simdmath_f.h.in 1970-01-01 08:00:00.000000000 +0800 ++++ b/libgomp/simdmath_f.h.in 2021-01-07 16:13:23.196000000 +0800 +@@ -0,0 +1,11 @@ ++!GCC$ builtin (cos) attributes simd (notinbranch) ++!GCC$ builtin (cosf) attributes simd (notinbranch) ++!GCC$ builtin (sin) attributes simd (notinbranch) ++!GCC$ builtin (sinf) attributes simd (notinbranch) ++!GCC$ builtin (exp) attributes simd (notinbranch) ++!GCC$ builtin (expf) attributes simd (notinbranch) ++!GCC$ builtin (exp2f) attributes simd (notinbranch) ++!GCC$ builtin (log) attributes simd (notinbranch) ++!GCC$ builtin (logf) attributes simd (notinbranch) ++!GCC$ builtin (pow) attributes simd (notinbranch) ++!GCC$ builtin (powf) attributes simd (notinbranch) +diff -Nurp a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in +--- a/libgomp/simdmath.h.in 1970-01-01 08:00:00.000000000 +0800 ++++ b/libgomp/simdmath.h.in 2021-01-07 16:13:56.144000000 +0800 +@@ -0,0 +1,40 @@ ++#ifdef __cplusplus ++extern "C" { ++#endif ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double cos (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float cosf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double sin (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float sinf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double exp (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float expf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double log (double x); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float logf (float x); ++ ++#pragma omp declare simd simdlen(2) notinbranch ++double pow (double x, double y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float powf (float x, float y); ++ ++#pragma omp declare simd simdlen(4) notinbranch ++float exp2f (float x); ++ ++#ifdef __cplusplus ++} // extern "C" ++#endif diff --git a/fix-CTOR-vectorization.patch b/fix-CTOR-vectorization.patch new file mode 100644 index 0000000..3cb30ad --- /dev/null +++ b/fix-CTOR-vectorization.patch @@ -0,0 +1,18 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-fix-CTOR-vectorization.patch +3d42842c07f4143042f3dcc39a050b262bcf1b55 + +diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +index 9d17e3386fa..fb13af7965e 100644 +--- a/gcc/tree-vect-slp.c ++++ b/gcc/tree-vect-slp.c +@@ -2257,6 +2257,7 @@ vect_analyze_slp_instance (vec_info *vinfo, + /* Value is defined in another basic block. */ + if (!def_info) + return false; ++ def_info = vect_stmt_to_vectorize (def_info); + scalar_stmts.safe_push (def_info); + } + else diff --git a/fix-ICE-in-eliminate-stmt.patch b/fix-ICE-in-eliminate-stmt.patch new file mode 100644 index 0000000..7c5ee13 --- /dev/null +++ b/fix-ICE-in-eliminate-stmt.patch @@ -0,0 +1,79 @@ +commit ee80f0c6ba50ebf0300fb0cfe1079a1321295749 +Author: Richard Biener +Date: Thu Oct 24 11:23:54 2019 +0000 + + re PR tree-optimization/92203 (ICE in eliminate_stmt, at tree-ssa-sccvn.c:5492) + + 2019-10-24 Richard Biener + + PR tree-optimization/92203 + * treee-ssa-sccvn.c (eliminate_dom_walker::eliminate_stmt): + Skip eliminating conversion stmts inserted by insertion. + + * gcc.dg/torture/pr92203.c: New testcase. + + From-SVN: r277374 + +diff --git a/gcc/testsuite/gcc.dg/torture/pr92203.c b/gcc/testsuite/gcc.dg/torture/pr92203.c +new file mode 100644 +index 00000000000..c752969d5e5 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr92203.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-Wno-div-by-zero" } */ ++ ++unsigned long int rr; ++ ++void ++cw (int z9) ++{ ++ int m5; ++ unsigned long int vz = 0; ++ long int *na; ++ ++ if (z9 == 0) ++ rr = 0; ++ else ++ { ++ na = (long int *) &m5; ++ for (*na = 0; *na < 1; ++*na) ++ { ++ na = (long int *) &vz; ++ rr /= 0; ++ } ++ } ++ ++ m5 = rr / 5; ++ ++vz; ++ if (vz != 0) ++ while (z9 < 1) ++ { ++ if (m5 >= 0) ++ rr += m5; ++ ++ na = (long int *) &rr; ++ if (*na >= 0) ++ rr = 0; ++ } ++} +diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c +index 57331ab44dc..3872168a4ed 100644 +--- a/gcc/tree-ssa-sccvn.c ++++ b/gcc/tree-ssa-sccvn.c +@@ -5459,8 +5459,13 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi) + + /* If this is an assignment from our leader (which + happens in the case the value-number is a constant) +- then there is nothing to do. */ +- if (gimple_assign_single_p (stmt) ++ then there is nothing to do. Likewise if we run into ++ inserted code that needed a conversion because of ++ our type-agnostic value-numbering of loads. */ ++ if ((gimple_assign_single_p (stmt) ++ || (is_gimple_assign (stmt) ++ && (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt)) ++ || gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR))) + && sprime == gimple_assign_rhs1 (stmt)) + return; + diff --git a/fix-ICE-in-vect.patch b/fix-ICE-in-vect.patch new file mode 100644 index 0000000..3b59dd2 --- /dev/null +++ b/fix-ICE-in-vect.patch @@ -0,0 +1,1037 @@ +This backport contains 5 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-Improve-tree-vect-patterns.c-handling-of-boolean-com.patch +ce19a4822794992097deab96bf15bf78ff481ea1 +0002-Make-vectorizable_operation-punt-early-on-codes-it-d.patch +4177e933b309408e69eb5561fee7a3cc5e6f8899 +0003-Make-vect_get_mask_type_for_stmt-take-a-group-size.patch +1c5d68a677b076262c5508e6d4fbdb765cba2d2f +0004-Record-the-vector-mask-precision-in-stmt_vec_info.patch +0c3ea6b3424ee4d32d97ca5d7453891b587b3132 +0005-Don-t-defer-choice-of-vector-type-for-bools-PR-92596.patch +02d895504cc59be06fc3f7ec0cfd4eb160561211 + +diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-43.c b/gcc/testsuite/gcc.dg/vect/bb-slp-43.c +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-43.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-43.c 2021-02-08 09:21:04.487633230 +0800 +@@ -0,0 +1,17 @@ ++/* { dg-do compile } */ ++ ++void ++f (int *restrict x, short *restrict y) ++{ ++ x[0] = x[0] == 1 & y[0] == 2; ++ x[1] = x[1] == 1 & y[1] == 2; ++ x[2] = x[2] == 1 & y[2] == 2; ++ x[3] = x[3] == 1 & y[3] == 2; ++ x[4] = x[4] == 1 & y[4] == 2; ++ x[5] = x[5] == 1 & y[5] == 2; ++ x[6] = x[6] == 1 & y[6] == 2; ++ x[7] = x[7] == 1 & y[7] == 2; ++} ++ ++/* { dg-final { scan-tree-dump-not "mixed mask and nonmask" "slp2" } } */ ++/* { dg-final { scan-tree-dump-not "vector operands from scalars" "slp2" { target { { vect_int && vect_bool_cmp } && { vect_unpack && vect_hw_misalign } } xfail vect_variable_length } } } */ +diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c +--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c 2021-02-08 09:21:04.487633230 +0800 +@@ -0,0 +1,14 @@ ++/* { dg-do compile } */ ++/* { dg-additional-options "-O3" } */ ++ ++typedef struct { ++ long n[5]; ++} secp256k1_fe; ++ ++secp256k1_fe a; ++ ++void fn1(int p1) { a.n[0] = a.n[1] = a.n[2] = p1; } ++void fn2() { ++ int b; ++ fn1(!b); ++} +diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c +--- a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c 2021-02-08 09:21:04.487633230 +0800 +@@ -0,0 +1,10 @@ ++/* { dg-do compile } */ ++ ++void ++f (_Bool *restrict x, _Bool *restrict y) ++{ ++ for (int i = 0; i < 128; ++i) ++ x[i] = x[i] == y[i]; ++} ++ ++/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target vect_bool_cmp } } } */ +diff -Nurp a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp +--- a/gcc/testsuite/lib/target-supports.exp 2021-02-08 09:24:27.611633230 +0800 ++++ b/gcc/testsuite/lib/target-supports.exp 2021-02-08 09:21:03.859633230 +0800 +@@ -5668,6 +5668,16 @@ proc check_effective_target_vect_bswap { + || [istarget amdgcn-*-*] }}] + } + ++# Return 1 if the target supports comparison of bool vectors for at ++# least one vector length. ++ ++proc check_effective_target_vect_bool_cmp { } { ++ return [check_cached_effective_target_indexed vect_bool_cmp { ++ expr { [istarget i?86-*-*] || [istarget x86_64-*-*] ++ || [istarget aarch64*-*-*] ++ || [is-effective-target arm_neon] }}] ++} ++ + # Return 1 if the target supports hardware vector shift operation for char. + + proc check_effective_target_vect_shift_char { } { +diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c +--- a/gcc/tree-vect-loop.c 2021-02-08 09:24:26.471633230 +0800 ++++ b/gcc/tree-vect-loop.c 2021-02-08 09:21:02.719633230 +0800 +@@ -164,8 +164,7 @@ static stmt_vec_info vect_is_simple_redu + static opt_result + vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info, + bool vectype_maybe_set_p, +- poly_uint64 *vf, +- vec *mask_producers) ++ poly_uint64 *vf) + { + gimple *stmt = stmt_info->stmt; + +@@ -193,8 +192,6 @@ vect_determine_vf_for_stmt_1 (stmt_vec_i + gcc_assert ((STMT_VINFO_DATA_REF (stmt_info) + || vectype_maybe_set_p) + && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype); +- else if (stmt_vectype == boolean_type_node) +- mask_producers->safe_push (stmt_info); + else + STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype; + } +@@ -207,21 +204,17 @@ vect_determine_vf_for_stmt_1 (stmt_vec_i + + /* Subroutine of vect_determine_vectorization_factor. Set the vector + types of STMT_INFO and all attached pattern statements and update +- the vectorization factor VF accordingly. If some of the statements +- produce a mask result whose vector type can only be calculated later, +- add them to MASK_PRODUCERS. Return true on success or false if +- something prevented vectorization. */ ++ the vectorization factor VF accordingly. Return true on success ++ or false if something prevented vectorization. */ + + static opt_result +-vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf, +- vec *mask_producers) ++vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf) + { + vec_info *vinfo = stmt_info->vinfo; + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", + stmt_info->stmt); +- opt_result res +- = vect_determine_vf_for_stmt_1 (stmt_info, false, vf, mask_producers); ++ opt_result res = vect_determine_vf_for_stmt_1 (stmt_info, false, vf); + if (!res) + return res; + +@@ -240,10 +233,7 @@ vect_determine_vf_for_stmt (stmt_vec_inf + dump_printf_loc (MSG_NOTE, vect_location, + "==> examining pattern def stmt: %G", + def_stmt_info->stmt); +- if (!vect_determine_vf_for_stmt_1 (def_stmt_info, true, +- vf, mask_producers)) +- res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, +- vf, mask_producers); ++ res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, vf); + if (!res) + return res; + } +@@ -252,7 +242,7 @@ vect_determine_vf_for_stmt (stmt_vec_inf + dump_printf_loc (MSG_NOTE, vect_location, + "==> examining pattern statement: %G", + stmt_info->stmt); +- res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf, mask_producers); ++ res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf); + if (!res) + return res; + } +@@ -297,7 +287,6 @@ vect_determine_vectorization_factor (loo + tree vectype; + stmt_vec_info stmt_info; + unsigned i; +- auto_vec mask_producers; + + DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); + +@@ -355,8 +344,7 @@ vect_determine_vectorization_factor (loo + { + stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); + opt_result res +- = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor, +- &mask_producers); ++ = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor); + if (!res) + return res; + } +@@ -374,16 +362,6 @@ vect_determine_vectorization_factor (loo + return opt_result::failure_at (vect_location, + "not vectorized: unsupported data-type\n"); + LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; +- +- for (i = 0; i < mask_producers.length (); i++) +- { +- stmt_info = mask_producers[i]; +- opt_tree mask_type = vect_get_mask_type_for_stmt (stmt_info); +- if (!mask_type) +- return opt_result::propagate_failure (mask_type); +- STMT_VINFO_VECTYPE (stmt_info) = mask_type; +- } +- + return opt_result::success (); + } + +diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h +--- a/gcc/tree-vectorizer.h 2021-02-08 09:24:26.463633230 +0800 ++++ b/gcc/tree-vectorizer.h 2021-02-08 09:21:02.619633230 +0800 +@@ -1080,6 +1080,23 @@ struct _stmt_vec_info { + unsigned int operation_precision; + signop operation_sign; + ++ /* If the statement produces a boolean result, this value describes ++ how we should choose the associated vector type. The possible ++ values are: ++ ++ - an integer precision N if we should use the vector mask type ++ associated with N-bit integers. This is only used if all relevant ++ input booleans also want the vector mask type for N-bit integers, ++ or if we can convert them into that form by pattern-matching. ++ ++ - ~0U if we considered choosing a vector mask type but decided ++ to treat the boolean as a normal integer type instead. ++ ++ - 0 otherwise. This means either that the operation isn't one that ++ could have a vector mask type (and so should have a normal vector ++ type instead) or that we simply haven't made a choice either way. */ ++ unsigned int mask_precision; ++ + /* True if this is only suitable for SLP vectorization. */ + bool slp_vect_only_p; + }; +@@ -1236,6 +1253,15 @@ nested_in_vect_loop_p (struct loop *loop + && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); + } + ++/* Return true if STMT_INFO should produce a vector mask type rather than ++ a normal nonmask type. */ ++ ++static inline bool ++vect_use_mask_type_p (stmt_vec_info stmt_info) ++{ ++ return stmt_info->mask_precision && stmt_info->mask_precision != ~0U; ++} ++ + /* Return TRUE if a statement represented by STMT_INFO is a part of a + pattern. */ + +@@ -1620,7 +1646,7 @@ extern tree get_related_vectype_for_scal + poly_uint64 = 0); + extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0); + extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree); +-extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0); ++extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0); + extern tree get_same_sized_vectype (tree, tree); + extern bool vect_chooses_same_modes_p (vec_info *, machine_mode); + extern bool vect_get_loop_mask_type (loop_vec_info); +@@ -1673,7 +1699,7 @@ extern gcall *vect_gen_while (tree, tree + extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); + extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, + tree *, unsigned int = 0); +-extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0); ++extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0); + + /* In tree-vect-data-refs.c. */ + extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64); +diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c +--- a/gcc/tree-vect-patterns.c 2021-02-08 09:24:26.467633230 +0800 ++++ b/gcc/tree-vect-patterns.c 2021-02-08 09:21:02.543633230 +0800 +@@ -112,7 +112,12 @@ vect_init_pattern_stmt (gimple *pattern_ + STMT_VINFO_DEF_TYPE (pattern_stmt_info) + = STMT_VINFO_DEF_TYPE (orig_stmt_info); + if (!STMT_VINFO_VECTYPE (pattern_stmt_info)) +- STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; ++ { ++ gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype) ++ == vect_use_mask_type_p (orig_stmt_info)); ++ STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype; ++ pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision; ++ } + return pattern_stmt_info; + } + +@@ -131,17 +136,25 @@ vect_set_pattern_stmt (gimple *pattern_s + + /* Add NEW_STMT to STMT_INFO's pattern definition statements. If VECTYPE + is nonnull, record that NEW_STMT's vector type is VECTYPE, which might +- be different from the vector type of the final pattern statement. */ ++ be different from the vector type of the final pattern statement. ++ If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type ++ from which it was derived. */ + + static inline void + append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt, +- tree vectype = NULL_TREE) ++ tree vectype = NULL_TREE, ++ tree scalar_type_for_mask = NULL_TREE) + { ++ gcc_assert (!scalar_type_for_mask ++ == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype))); + vec_info *vinfo = stmt_info->vinfo; + if (vectype) + { + stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt); + STMT_VINFO_VECTYPE (new_stmt_info) = vectype; ++ if (scalar_type_for_mask) ++ new_stmt_info->mask_precision ++ = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask)); + } + gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), + new_stmt); +@@ -3875,107 +3888,22 @@ adjust_bool_stmts (hash_set & + return gimple_assign_lhs (pattern_stmt); + } + +-/* Helper for search_type_for_mask. */ ++/* Return the proper type for converting bool VAR into ++ an integer value or NULL_TREE if no such type exists. ++ The type is chosen so that the converted value has the ++ same number of elements as VAR's vector type. */ + + static tree +-search_type_for_mask_1 (tree var, vec_info *vinfo, +- hash_map &cache) ++integer_type_for_mask (tree var, vec_info *vinfo) + { +- tree rhs1; +- enum tree_code rhs_code; +- tree res = NULL_TREE, res2; +- + if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var))) + return NULL_TREE; + + stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var); +- if (!def_stmt_info) ++ if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info)) + return NULL_TREE; + +- gassign *def_stmt = dyn_cast (def_stmt_info->stmt); +- if (!def_stmt) +- return NULL_TREE; +- +- tree *c = cache.get (def_stmt); +- if (c) +- return *c; +- +- rhs_code = gimple_assign_rhs_code (def_stmt); +- rhs1 = gimple_assign_rhs1 (def_stmt); +- +- switch (rhs_code) +- { +- case SSA_NAME: +- case BIT_NOT_EXPR: +- CASE_CONVERT: +- res = search_type_for_mask_1 (rhs1, vinfo, cache); +- break; +- +- case BIT_AND_EXPR: +- case BIT_IOR_EXPR: +- case BIT_XOR_EXPR: +- res = search_type_for_mask_1 (rhs1, vinfo, cache); +- res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo, +- cache); +- if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) +- res = res2; +- break; +- +- default: +- if (TREE_CODE_CLASS (rhs_code) == tcc_comparison) +- { +- tree comp_vectype, mask_type; +- +- if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) +- { +- res = search_type_for_mask_1 (rhs1, vinfo, cache); +- res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), +- vinfo, cache); +- if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2))) +- res = res2; +- break; +- } +- +- comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1)); +- if (comp_vectype == NULL_TREE) +- { +- res = NULL_TREE; +- break; +- } +- +- mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1)); +- if (!mask_type +- || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code)) +- { +- res = NULL_TREE; +- break; +- } +- +- if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE +- || !TYPE_UNSIGNED (TREE_TYPE (rhs1))) +- { +- scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1)); +- res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1); +- } +- else +- res = TREE_TYPE (rhs1); +- } +- } +- +- cache.put (def_stmt, res); +- return res; +-} +- +-/* Return the proper type for converting bool VAR into +- an integer value or NULL_TREE if no such type exists. +- The type is chosen so that converted value has the +- same number of elements as VAR's vector type. */ +- +-static tree +-search_type_for_mask (tree var, vec_info *vinfo) +-{ +- hash_map cache; +- return search_type_for_mask_1 (var, vinfo, cache); ++ return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1); + } + + /* Function vect_recog_bool_pattern +@@ -4067,7 +3995,7 @@ vect_recog_bool_pattern (stmt_vec_info s + } + else + { +- tree type = search_type_for_mask (var, vinfo); ++ tree type = integer_type_for_mask (var, vinfo); + tree cst0, cst1, tmp; + + if (!type) +@@ -4152,7 +4080,7 @@ vect_recog_bool_pattern (stmt_vec_info s + rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo); + else + { +- tree type = search_type_for_mask (var, vinfo); ++ tree type = integer_type_for_mask (var, vinfo); + tree cst0, cst1, new_vectype; + + if (!type) +@@ -4207,7 +4135,7 @@ build_mask_conversion (tree mask, tree v + masktype = truth_type_for (vectype); + tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL); + stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask); +- append_pattern_def_seq (stmt_vinfo, stmt, masktype); ++ append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype)); + + return tmp; + } +@@ -4275,7 +4203,7 @@ vect_recog_mask_conversion_pattern (stmt + } + + tree mask_arg = gimple_call_arg (last_stmt, mask_argno); +- tree mask_arg_type = search_type_for_mask (mask_arg, vinfo); ++ tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo); + if (!mask_arg_type) + return NULL; + vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type); +@@ -4328,7 +4256,7 @@ vect_recog_mask_conversion_pattern (stmt + + if (TREE_CODE (rhs1) == SSA_NAME) + { +- rhs1_type = search_type_for_mask (rhs1, vinfo); ++ rhs1_type = integer_type_for_mask (rhs1, vinfo); + if (!rhs1_type) + return NULL; + } +@@ -4352,8 +4280,8 @@ vect_recog_mask_conversion_pattern (stmt + rhs1_op1 = TREE_OPERAND (rhs1, 1); + if (!rhs1_op0 || !rhs1_op1) + return NULL; +- rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo); +- rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo); ++ rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo); ++ rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo); + + if (!rhs1_op0_type) + rhs1_type = TREE_TYPE (rhs1_op0); +@@ -4441,7 +4369,8 @@ vect_recog_mask_conversion_pattern (stmt + pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1), + rhs1_op0, rhs1_op1); + rhs1 = tmp; +- append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2); ++ append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2, ++ rhs1_type); + } + + if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), +@@ -4474,8 +4403,8 @@ vect_recog_mask_conversion_pattern (stmt + + rhs2 = gimple_assign_rhs2 (last_stmt); + +- rhs1_type = search_type_for_mask (rhs1, vinfo); +- rhs2_type = search_type_for_mask (rhs2, vinfo); ++ rhs1_type = integer_type_for_mask (rhs1, vinfo); ++ rhs2_type = integer_type_for_mask (rhs2, vinfo); + + if (!rhs1_type || !rhs2_type + || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type)) +@@ -4558,7 +4487,7 @@ static tree + vect_convert_mask_for_vectype (tree mask, tree vectype, + stmt_vec_info stmt_info, vec_info *vinfo) + { +- tree mask_type = search_type_for_mask (mask, vinfo); ++ tree mask_type = integer_type_for_mask (mask, vinfo); + if (mask_type) + { + tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type); +@@ -4997,6 +4926,148 @@ vect_determine_precisions_from_users (st + vect_set_min_input_precision (stmt_info, type, min_input_precision); + } + ++/* Return true if the statement described by STMT_INFO sets a boolean ++ SSA_NAME and if we know how to vectorize this kind of statement using ++ vector mask types. */ ++ ++static bool ++possible_vector_mask_operation_p (stmt_vec_info stmt_info) ++{ ++ tree lhs = gimple_get_lhs (stmt_info->stmt); ++ if (!lhs ++ || TREE_CODE (lhs) != SSA_NAME ++ || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs))) ++ return false; ++ ++ if (gassign *assign = dyn_cast (stmt_info->stmt)) ++ { ++ tree_code rhs_code = gimple_assign_rhs_code (assign); ++ switch (rhs_code) ++ { ++ CASE_CONVERT: ++ case SSA_NAME: ++ case BIT_NOT_EXPR: ++ case BIT_IOR_EXPR: ++ case BIT_XOR_EXPR: ++ case BIT_AND_EXPR: ++ return true; ++ ++ default: ++ return TREE_CODE_CLASS (rhs_code) == tcc_comparison; ++ } ++ } ++ return false; ++} ++ ++/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use ++ a vector mask type instead of a normal vector type. Record the ++ result in STMT_INFO->mask_precision. */ ++ ++static void ++vect_determine_mask_precision (stmt_vec_info stmt_info) ++{ ++ vec_info *vinfo = stmt_info->vinfo; ++ ++ if (!possible_vector_mask_operation_p (stmt_info) ++ || stmt_info->mask_precision) ++ return; ++ ++ auto_vec worklist; ++ worklist.quick_push (stmt_info); ++ while (!worklist.is_empty ()) ++ { ++ stmt_info = worklist.last (); ++ unsigned int orig_length = worklist.length (); ++ ++ /* If at least one boolean input uses a vector mask type, ++ pick the mask type with the narrowest elements. ++ ++ ??? This is the traditional behavior. It should always produce ++ the smallest number of operations, but isn't necessarily the ++ optimal choice. For example, if we have: ++ ++ a = b & c ++ ++ where: ++ ++ - the user of a wants it to have a mask type for 16-bit elements (M16) ++ - b also uses M16 ++ - c uses a mask type for 8-bit elements (M8) ++ ++ then picking M8 gives: ++ ++ - 1 M16->M8 pack for b ++ - 1 M8 AND for a ++ - 2 M8->M16 unpacks for the user of a ++ ++ whereas picking M16 would have given: ++ ++ - 2 M8->M16 unpacks for c ++ - 2 M16 ANDs for a ++ ++ The number of operations are equal, but M16 would have given ++ a shorter dependency chain and allowed more ILP. */ ++ unsigned int precision = ~0U; ++ gassign *assign = as_a (stmt_info->stmt); ++ unsigned int nops = gimple_num_ops (assign); ++ for (unsigned int i = 1; i < nops; ++i) ++ { ++ tree rhs = gimple_op (assign, i); ++ if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs))) ++ continue; ++ ++ stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs); ++ if (!def_stmt_info) ++ /* Don't let external or constant operands influence the choice. ++ We can convert them to whichever vector type we pick. */ ++ continue; ++ ++ if (def_stmt_info->mask_precision) ++ { ++ if (precision > def_stmt_info->mask_precision) ++ precision = def_stmt_info->mask_precision; ++ } ++ else if (possible_vector_mask_operation_p (def_stmt_info)) ++ worklist.safe_push (def_stmt_info); ++ } ++ ++ /* Defer the choice if we need to visit operands first. */ ++ if (orig_length != worklist.length ()) ++ continue; ++ ++ /* If the statement compares two values that shouldn't use vector masks, ++ try comparing the values as normal scalars instead. */ ++ tree_code rhs_code = gimple_assign_rhs_code (assign); ++ if (precision == ~0U ++ && TREE_CODE_CLASS (rhs_code) == tcc_comparison) ++ { ++ tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign)); ++ scalar_mode mode; ++ tree vectype, mask_type; ++ if (is_a (TYPE_MODE (rhs1_type), &mode) ++ && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type)) ++ && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type)) ++ && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code)) ++ precision = GET_MODE_BITSIZE (mode); ++ } ++ ++ if (dump_enabled_p ()) ++ { ++ if (precision == ~0U) ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "using normal nonmask vectors for %G", ++ stmt_info->stmt); ++ else ++ dump_printf_loc (MSG_NOTE, vect_location, ++ "using boolean precision %d for %G", ++ precision, stmt_info->stmt); ++ } ++ ++ stmt_info->mask_precision = precision; ++ worklist.pop (); ++ } ++} ++ + /* Handle vect_determine_precisions for STMT_INFO, given that we + have already done so for the users of its result. */ + +@@ -5009,6 +5080,7 @@ vect_determine_stmt_precisions (stmt_vec + vect_determine_precisions_from_range (stmt_info, stmt); + vect_determine_precisions_from_users (stmt_info, stmt); + } ++ vect_determine_mask_precision (stmt_info); + } + + /* Walk backwards through the vectorizable region to determine the ++-search_type_for_mask (tree var, vec_info *vinfo) ++-{ ++- hash_map cache; ++- return search_type_for_mask_1 (var, vinfo, cache); +++ return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1); ++ } ++ ++ /* Function vect_recog_bool_pattern ++@@ -4371,7 +4298,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out) ++ ++ it is better for b1 and b2 to use the mask type associated ++ with int elements rather bool (byte) elements. */ ++- rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo); +++ rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo); ++ if (!rhs1_type) ++ rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0)); ++ } ++@@ -4427,7 +4354,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out) ++ tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL); ++ pattern_stmt = gimple_build_assign (tmp, rhs1); ++ rhs1 = tmp; ++- append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2); +++ append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2, +++ rhs1_type); ++ } ++ ++ if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1), +diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c +--- a/gcc/tree-vect-slp.c 2021-02-08 09:24:26.471633230 +0800 ++++ b/gcc/tree-vect-slp.c 2021-02-08 09:21:02.719633230 +0800 +@@ -906,17 +906,6 @@ vect_build_slp_tree_1 (unsigned char *sw + || rhs_code == LROTATE_EXPR + || rhs_code == RROTATE_EXPR) + { +- if (vectype == boolean_type_node) +- { +- if (dump_enabled_p ()) +- dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +- "Build SLP failed: shift of a" +- " boolean.\n"); +- /* Fatal mismatch. */ +- matches[0] = false; +- return false; +- } +- + vec_mode = TYPE_MODE (vectype); + + /* First see if we have a vector/vector shift. */ +@@ -1137,9 +1126,8 @@ vect_build_slp_tree_1 (unsigned char *sw + if (alt_stmt_code != ERROR_MARK + && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) + { +- if (vectype == boolean_type_node +- || !vect_two_operations_perm_ok_p (stmts, group_size, +- vectype, alt_stmt_code)) ++ if (!vect_two_operations_perm_ok_p (stmts, group_size, ++ vectype, alt_stmt_code)) + { + for (i = 0; i < group_size; ++i) + if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code) +@@ -2746,24 +2734,6 @@ vect_slp_analyze_node_operations_1 (vec_ + stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; + gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); + +- /* For BB vectorization vector types are assigned here. +- Memory accesses already got their vector type assigned +- in vect_analyze_data_refs. */ +- bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); +- if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node) +- { +- tree vectype = vect_get_mask_type_for_stmt (stmt_info, node); +- if (!vectype) +- /* vect_get_mask_type_for_stmt has already explained the +- failure. */ +- return false; +- +- stmt_vec_info sstmt_info; +- unsigned int i; +- FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info) +- STMT_VINFO_VECTYPE (sstmt_info) = vectype; +- } +- + /* Calculate the number of vector statements to be created for the + scalar stmts in this node. For SLP reductions it is equal to the + number of vector statements in the children (which has already been +diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c +--- a/gcc/tree-vect-stmts.c 2021-02-08 09:24:26.371633230 +0800 ++++ b/gcc/tree-vect-stmts.c 2021-02-08 09:21:02.543633230 +0800 +@@ -3334,6 +3334,15 @@ vectorizable_call (stmt_vec_info stmt_in + return false; + } + ++ if (VECTOR_BOOLEAN_TYPE_P (vectype_out) ++ != VECTOR_BOOLEAN_TYPE_P (vectype_in)) ++ { ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "mixed mask and nonmask vector types\n"); ++ return false; ++ } ++ + /* FORNOW */ + nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in); + nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); +@@ -5938,6 +5947,21 @@ vectorizable_operation (stmt_vec_info st + + orig_code = code = gimple_assign_rhs_code (stmt); + ++ /* Shifts are handled in vectorizable_shift. */ ++ if (code == LSHIFT_EXPR ++ || code == RSHIFT_EXPR ++ || code == LROTATE_EXPR ++ || code == RROTATE_EXPR) ++ return false; ++ ++ /* Comparisons are handled in vectorizable_comparison. */ ++ if (TREE_CODE_CLASS (code) == tcc_comparison) ++ return false; ++ ++ /* Conditions are handled in vectorizable_condition. */ ++ if (code == COND_EXPR) ++ return false; ++ + /* For pointer addition and subtraction, we should use the normal + plus and minus for the vector operation. */ + if (code == POINTER_PLUS_EXPR) +@@ -5961,7 +5985,8 @@ vectorizable_operation (stmt_vec_info st + + /* Most operations cannot handle bit-precision types without extra + truncations. */ +- if (!VECTOR_BOOLEAN_TYPE_P (vectype_out) ++ bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out); ++ if (!mask_op_p + && !type_has_mode_precision_p (TREE_TYPE (scalar_dest)) + /* Exception are bitwise binary operations. */ + && code != BIT_IOR_EXPR +@@ -6023,10 +6048,11 @@ vectorizable_operation (stmt_vec_info st + if (maybe_ne (nunits_out, nunits_in)) + return false; + ++ tree vectype2 = NULL_TREE, vectype3 = NULL_TREE; + if (op_type == binary_op || op_type == ternary_op) + { + op1 = gimple_assign_rhs2 (stmt); +- if (!vect_is_simple_use (op1, vinfo, &dt[1])) ++ if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +@@ -6037,7 +6063,7 @@ vectorizable_operation (stmt_vec_info st + if (op_type == ternary_op) + { + op2 = gimple_assign_rhs3 (stmt); +- if (!vect_is_simple_use (op2, vinfo, &dt[2])) ++ if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3)) + { + if (dump_enabled_p ()) + dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, +@@ -6062,10 +6088,20 @@ vectorizable_operation (stmt_vec_info st + + gcc_assert (ncopies >= 1); + +- /* Shifts are handled in vectorizable_shift (). */ +- if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR +- || code == RROTATE_EXPR) +- return false; ++ /* Reject attempts to combine mask types with nonmask types, e.g. if ++ we have an AND between a (nonmask) boolean loaded from memory and ++ a (mask) boolean result of a comparison. ++ ++ TODO: We could easily fix these cases up using pattern statements. */ ++ if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p ++ || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p) ++ || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p)) ++ { ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, ++ "mixed mask and nonmask vector types\n"); ++ return false; ++ } + + /* Supportable by target? */ + +@@ -10410,14 +10446,15 @@ get_vectype_for_scalar_type (vec_info *v + + Returns the mask type corresponding to a result of comparison + of vectors of specified SCALAR_TYPE as supported by target. +- NODE, if nonnull, is the SLP tree node that will use the returned +- vector type. */ ++ If GROUP_SIZE is nonzero and we're performing BB vectorization, ++ make sure that the number of elements in the vector is no bigger ++ than GROUP_SIZE. */ + + tree + get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type, +- slp_tree node) ++ unsigned int group_size) + { +- tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node); ++ tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size); + + if (!vectype) + return NULL; +@@ -11112,9 +11149,6 @@ vect_gen_while_not (gimple_seq *seq, tre + + - Set *STMT_VECTYPE_OUT to: + - NULL_TREE if the statement doesn't need to be vectorized; +- - boolean_type_node if the statement is a boolean operation whose +- vector type can only be determined once all the other vector types +- are known; and + - the equivalent of STMT_VINFO_VECTYPE otherwise. + + - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum +@@ -11171,11 +11205,22 @@ vect_get_vector_types_for_stmt (stmt_vec + tree scalar_type = NULL_TREE; + if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info)) + { +- *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info); ++ vectype = STMT_VINFO_VECTYPE (stmt_info); + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, + "precomputed vectype: %T\n", vectype); + } ++ else if (vect_use_mask_type_p (stmt_info)) ++ { ++ unsigned int precision = stmt_info->mask_precision; ++ scalar_type = build_nonstandard_integer_type (precision, 1); ++ vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size); ++ if (!vectype) ++ return opt_result::failure_at (stmt, "not vectorized: unsupported" ++ " data-type %T\n", scalar_type); ++ if (dump_enabled_p ()) ++ dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); ++ } + else + { + if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info)) +@@ -11185,28 +11230,6 @@ vect_get_vector_types_for_stmt (stmt_vec + else + scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); + +- /* Pure bool ops don't participate in number-of-units computation. +- For comparisons use the types being compared. */ +- if (!STMT_VINFO_DATA_REF (stmt_info) +- && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) +- && is_gimple_assign (stmt) +- && gimple_assign_rhs_code (stmt) != COND_EXPR) +- { +- *stmt_vectype_out = boolean_type_node; +- +- tree rhs1 = gimple_assign_rhs1 (stmt); +- if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison +- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1))) +- scalar_type = TREE_TYPE (rhs1); +- else +- { +- if (dump_enabled_p ()) +- dump_printf_loc (MSG_NOTE, vect_location, +- "pure bool operation.\n"); +- return opt_result::success (); +- } +- } +- + if (dump_enabled_p ()) + { + if (group_size) +@@ -11224,18 +11247,15 @@ vect_get_vector_types_for_stmt (stmt_vec + " unsupported data-type %T\n", + scalar_type); + +- if (!*stmt_vectype_out) +- *stmt_vectype_out = vectype; +- + if (dump_enabled_p ()) + dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype); + } ++ *stmt_vectype_out = vectype; + + /* Don't try to compute scalar types if the stmt produces a boolean + vector; use the existing vector type instead. */ + tree nunits_vectype = vectype; +- if (!VECTOR_BOOLEAN_TYPE_P (vectype) +- && *stmt_vectype_out != boolean_type_node) ++ if (!VECTOR_BOOLEAN_TYPE_P (vectype)) + { + /* The number of units is set according to the smallest scalar + type (or the largest vector size, but we only support one +@@ -11260,9 +11280,8 @@ vect_get_vector_types_for_stmt (stmt_vec + } + } + +- gcc_assert (*stmt_vectype_out == boolean_type_node +- || multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype), +- TYPE_VECTOR_SUBPARTS (*stmt_vectype_out))); ++ gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype), ++ TYPE_VECTOR_SUBPARTS (*stmt_vectype_out))); + + if (dump_enabled_p ()) + { +@@ -11274,82 +11293,3 @@ vect_get_vector_types_for_stmt (stmt_vec + *nunits_vectype_out = nunits_vectype; + return opt_result::success (); + } +- +-/* Try to determine the correct vector type for STMT_INFO, which is a +- statement that produces a scalar boolean result. Return the vector +- type on success, otherwise return NULL_TREE. NODE, if nonnull, +- is the SLP tree node that will use the returned vector type. */ +- +-opt_tree +-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node) +-{ +- vec_info *vinfo = stmt_info->vinfo; +- gimple *stmt = stmt_info->stmt; +- tree mask_type = NULL; +- tree vectype, scalar_type; +- +- if (is_gimple_assign (stmt) +- && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison +- && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt)))) +- { +- scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); +- mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node); +- +- if (!mask_type) +- return opt_tree::failure_at (stmt, +- "not vectorized: unsupported mask\n"); +- } +- else +- { +- tree rhs; +- ssa_op_iter iter; +- enum vect_def_type dt; +- +- FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE) +- { +- if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype)) +- return opt_tree::failure_at (stmt, +- "not vectorized:can't compute mask" +- " type for statement, %G", stmt); +- +- /* No vectype probably means external definition. +- Allow it in case there is another operand which +- allows to determine mask type. */ +- if (!vectype) +- continue; +- +- if (!mask_type) +- mask_type = vectype; +- else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type), +- TYPE_VECTOR_SUBPARTS (vectype))) +- return opt_tree::failure_at (stmt, +- "not vectorized: different sized mask" +- " types in statement, %T and %T\n", +- mask_type, vectype); +- else if (VECTOR_BOOLEAN_TYPE_P (mask_type) +- != VECTOR_BOOLEAN_TYPE_P (vectype)) +- return opt_tree::failure_at (stmt, +- "not vectorized: mixed mask and " +- "nonmask vector types in statement, " +- "%T and %T\n", +- mask_type, vectype); +- } +- +- /* We may compare boolean value loaded as vector of integers. +- Fix mask_type in such case. */ +- if (mask_type +- && !VECTOR_BOOLEAN_TYPE_P (mask_type) +- && gimple_code (stmt) == GIMPLE_ASSIGN +- && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) +- mask_type = truth_type_for (mask_type); +- } +- +- /* No mask_type should mean loop invariant predicate. +- This is probably a subject for optimization in if-conversion. */ +- if (!mask_type) +- return opt_tree::failure_at (stmt, +- "not vectorized: can't compute mask type " +- "for statement: %G", stmt); +- +- return opt_tree::success (mask_type); +-} diff --git a/fix-range-set-by-vectorization-on-niter-IVs.patch b/fix-range-set-by-vectorization-on-niter-IVs.patch new file mode 100644 index 0000000..d64a4b9 --- /dev/null +++ b/fix-range-set-by-vectorization-on-niter-IVs.patch @@ -0,0 +1,74 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-tree-optimization-98117-fix-range-set-by-vectorizati.patch +cdcbef3c3310a14f2994982b44cb1f8e14c77232 + +diff --git a/gcc/testsuite/gcc.dg/torture/pr98117.c b/gcc/testsuite/gcc.dg/torture/pr98117.c +new file mode 100644 +index 00000000000..f2160257263 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/torture/pr98117.c +@@ -0,0 +1,19 @@ ++/* { dg-do run } */ ++/* { dg-additional-options "-fno-tree-scev-cprop" } */ ++ ++unsigned char c; ++void __attribute__((noipa)) ++e() ++{ ++ do ++ { ++ } ++ while (++c); ++} ++int main() ++{ ++ e(); ++ if (c != 0) ++ __builtin_abort (); ++ return 0; ++} +diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c +index 36179188f6d..2370b879b21 100644 +--- a/gcc/tree-vect-loop-manip.c ++++ b/gcc/tree-vect-loop-manip.c +@@ -2034,13 +2034,29 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters, + niters_vector = force_gimple_operand (niters_vector, &stmts, true, var); + gsi_insert_seq_on_edge_immediate (pe, stmts); + /* Peeling algorithm guarantees that vector loop bound is at least ONE, +- we set range information to make niters analyzer's life easier. */ ++ we set range information to make niters analyzer's life easier. ++ Note the number of latch iteration value can be TYPE_MAX_VALUE so ++ we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf. */ + if (stmts != NULL && log_vf) +- set_range_info (niters_vector, VR_RANGE, +- wi::to_wide (build_int_cst (type, 1)), +- wi::to_wide (fold_build2 (RSHIFT_EXPR, type, +- TYPE_MAX_VALUE (type), +- log_vf))); ++ { ++ if (niters_no_overflow) ++ set_range_info (niters_vector, VR_RANGE, ++ wi::one (TYPE_PRECISION (type)), ++ wi::rshift (wi::max_value (TYPE_PRECISION (type), ++ TYPE_SIGN (type)), ++ exact_log2 (const_vf), ++ TYPE_SIGN (type))); ++ /* For VF == 1 the vector IV might also overflow so we cannot ++ assert a minimum value of 1. */ ++ else if (const_vf > 1) ++ set_range_info (niters_vector, VR_RANGE, ++ wi::one (TYPE_PRECISION (type)), ++ wi::rshift (wi::max_value (TYPE_PRECISION (type), ++ TYPE_SIGN (type)) ++ - (const_vf - 1), ++ exact_log2 (const_vf), TYPE_SIGN (type)) ++ + 1); ++ } + } + *niters_vector_ptr = niters_vector; + *step_vector_ptr = step_vector; +-- +2.19.1 + diff --git a/gcc.spec b/gcc.spec index 2822547..368e0e8 100644 --- a/gcc.spec +++ b/gcc.spec @@ -1,4 +1,4 @@ -%global DATE 20210204 +%global DATE 20210428 %global gcc_version 9.3.1 %global gcc_major 9.3.1 @@ -59,7 +59,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: %{DATE}.16 +Release: %{DATE}.19 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -221,10 +221,21 @@ Patch104: fix-avx512vl-vcvttpd2dq-2-fail.patch Patch105: fix-issue604-ldist-dependency-fixup.patch Patch106: Apply-maximum-nunits-for-BB-SLP.patch Patch107: Fix-interaction-between-aka-changes-and-DR1558.patch -Patch108: Handle-POLY_INT_CSTs-in-declare_return_value.patch -Patch109: Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch -Patch110: fix-strncpy-inline-warning.patch - +Patch108: fix-range-set-by-vectorization-on-niter-IVs.patch +Patch109: optabs-Dont-use-scalar-conversions-for-vectors.patch +Patch110: add-fp-model-options.patch +Patch111: fix-CTOR-vectorization.patch +Patch112: PR92429-do-not-fold-when-updating.patch +Patch113: Handle-POLY_INT_CSTs-in-declare_return_value.patch +Patch114: Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch +Patch115: fix-strncpy-inline-warning.patch +Patch116: fix-ICE-in-vect.patch +Patch118: Fix-type-mismatch-in-SLPed-constructors.patch +Patch119: add-check-for-pressure-in-sche1.patch +Patch120: revert-moutline-atomics.patch +Patch121: fix-ICE-in-eliminate-stmt.patch +Patch122: revise-type-before-build-MULT.patch +Patch123: Simplify-X-C1-C2.patch %global gcc_target_platform %{_arch}-linux-gnu @@ -777,6 +788,18 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch108 -p1 %patch109 -p1 %patch110 -p1 +%patch111 -p1 +%patch112 -p1 +%patch113 -p1 +%patch114 -p1 +%patch115 -p1 +%patch116 -p1 +%patch118 -p1 +%patch119 -p1 +%patch120 -p1 +%patch121 -p1 +%patch122 -p1 +%patch123 -p1 %build @@ -785,8 +808,7 @@ export CONFIG_SITE=NONE CC=gcc CXX=g++ -OPT_FLAGS=`echo %{optflags}|sed -e 's/\(-Wp,\)\?-D_FORTIFY_SOURCE=[12]//g'` -OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'` +OPT_FLAGS=`echo %{optflags}|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'` OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-mfpmath=sse/-mfpmath=sse -msse2/g'` OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/ -pipe / /g'` OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-Werror=format-security/ /g'` @@ -827,7 +849,7 @@ enablelgo=,go %if %{build_d} enableld=,d %endif -OPT_FLAGS="$OPT_FLAGS -fPIE -Wl,-z,relro,-z,now" +OPT_FLAGS="$OPT_FLAGS -O2 -Wp,-D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -Wl,-z,relro,-z,now" OPT_LDFLAGS="$OPT_LDFLAGS -Wl,-z,relro,-z,now" export extra_ldflags_libobjc="-Wl,-z,relro,-z,now" export FCFLAGS="$OPT_FLAGS" @@ -1804,6 +1826,7 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdnoreturn.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdatomic.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/gcov.h +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/simdmath.h %ifarch %{ix86} x86_64 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h @@ -2231,6 +2254,7 @@ end %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_arithmetic.mod %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_exceptions.mod %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_features.mod +%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/simdmath_f.h %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/f951 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libgfortran.spec %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libcaf_single.a @@ -2708,6 +2732,30 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Wed Apr 28 2021 eastb233 - 9.3.1-20210428.19 +- add-fp-model-options.patch: New file +- enable-simd-math.patch: Enable simd math library in C and Fortran +- fix-CTOR-vectorization.patch: New file +- fix-range-set-by-vectorization-on-niter-IVs.patch: New file +- medium-code-mode.patch: Fix bugs when used with fpic +- optabs-Dont-use-scalar-conversions-for-vectors.patch: New file +- PR92429-do-not-fold-when-updating.patch: New file +- redundant-loop-elimination.patch: Fix some programming specifications +- fix-ICE-in-vect.patch: New file +- Fix-type-mismatch-in-SLPed-constructors.patch: New file +- add-check-for-pressure-in-sche1.patch: New file +- revert-moutline-atomics.patch: New file +- fix-ICE-in-eliminate-stmt.patch: New file +- revise-type-before-build-MULT.patch: New file +- Simplify-X-C1-C2.patch: New file +- gcc.spec: Add new patches + +* Mon Mar 15 2021 tianwei - 9.3.1-20210204.17 +- Type:bugfix +- ID:NA +- SUG:NA +- DESC:add SP and FS for x86 + * Thu Feb 04 2021 eastb233 - 9.3.1-20210204.16 - Handle-POLY_INT_CSTs-in-declare_return_value.patch: New file - Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch: New file diff --git a/medium-code-mode.patch b/medium-code-mode.patch index cf629d2..f63e920 100644 --- a/medium-code-mode.patch +++ b/medium-code-mode.patch @@ -1,105 +1,98 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2020-07-16 14:54:30.588000000 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2020-07-16 15:06:33.000000000 +0800 -@@ -2030,6 +2030,32 @@ aarch64_load_symref_appropriately (rtx d +--- a/gcc/config/aarch64/aarch64.c 2021-02-18 11:03:29.728000000 +0800 ++++ b/gcc/config/aarch64/aarch64.c 2021-02-18 14:59:54.432000000 +0800 +@@ -2417,6 +2417,29 @@ aarch64_load_symref_appropriately (rtx d emit_insn (gen_add_losym (dest, tmp_reg, imm)); return; } + case SYMBOL_MEDIUM_ABSOLUTE: -+ { -+ rtx tmp_reg = dest; -+ machine_mode mode = GET_MODE (dest); ++ { ++ rtx tmp_reg = dest; ++ machine_mode mode = GET_MODE (dest); + -+ gcc_assert (mode == Pmode || mode == ptr_mode); -+ if (can_create_pseudo_p ()) -+ tmp_reg = gen_reg_rtx (mode); ++ gcc_assert (mode == Pmode || mode == ptr_mode); ++ if (can_create_pseudo_p ()) ++ tmp_reg = gen_reg_rtx (mode); + -+ if (mode == DImode) -+ { -+ emit_insn ( -+ gen_load_symbol_medium_di (dest, tmp_reg, imm)); -+ } -+ else -+ { -+ emit_insn ( -+ gen_load_symbol_medium_si (dest, tmp_reg, imm)); -+ } -+ if (REG_P (dest)) -+ { -+ set_unique_reg_note ( -+ get_last_insn (), REG_EQUIV, copy_rtx (imm)); -+ } -+ return; -+ } ++ if (mode == DImode) ++ { ++ emit_insn (gen_load_symbol_medium_di (dest, tmp_reg, imm)); ++ } ++ else ++ { ++ emit_insn (gen_load_symbol_medium_si (dest, tmp_reg, imm)); ++ } ++ if (REG_P (dest)) ++ { ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (imm)); ++ } ++ return; ++ } case SYMBOL_TINY_ABSOLUTE: emit_insn (gen_rtx_SET (dest, imm)); -@@ -2152,6 +2178,64 @@ aarch64_load_symref_appropriately (rtx d +@@ -2539,6 +2562,60 @@ aarch64_load_symref_appropriately (rtx d return; } -+ case SYMBOL_MEDIUM_GOT_4G: -+ { -+ rtx tmp_reg = dest; -+ machine_mode mode = GET_MODE (dest); -+ if (can_create_pseudo_p ()) -+ { -+ tmp_reg = gen_reg_rtx (mode); -+ } -+ rtx insn; -+ rtx mem; -+ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); ++ case SYMBOL_MEDIUM_GOT_4G: ++ { ++ rtx tmp_reg = dest; ++ machine_mode mode = GET_MODE (dest); ++ if (can_create_pseudo_p ()) ++ { ++ tmp_reg = gen_reg_rtx (mode); ++ } ++ rtx insn; ++ rtx mem; ++ rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_"); ++ ++ if (mode == DImode) ++ { ++ emit_insn (gen_load_symbol_medium_di (tmp_reg, dest, s)); ++ } ++ else ++ { ++ emit_insn (gen_load_symbol_medium_si (tmp_reg, dest, s)); ++ } ++ if (REG_P (dest)) ++ { ++ set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (s)); ++ } + -+ if (mode == DImode) -+ { -+ emit_insn ( -+ gen_load_symbol_medium_di (tmp_reg, dest, s)); -+ } -+ else -+ { -+ emit_insn ( -+ gen_load_symbol_medium_si (tmp_reg, dest, s)); -+ } -+ if (REG_P (dest)) -+ { -+ set_unique_reg_note ( -+ get_last_insn (), REG_EQUIV, copy_rtx (s)); -+ } ++ if (mode == ptr_mode) ++ { ++ if (mode == DImode) ++ { ++ emit_insn (gen_get_gotoff_di (dest, imm)); ++ insn = gen_ldr_got_medium_di (dest, tmp_reg, dest); ++ } ++ else ++ { ++ emit_insn (gen_get_gotoff_si (dest, imm)); ++ insn = gen_ldr_got_medium_si (dest, tmp_reg, dest); ++ } ++ mem = XVECEXP (SET_SRC (insn), 0, 0); ++ } ++ else ++ { ++ gcc_assert (mode == Pmode); ++ emit_insn (gen_get_gotoff_di (dest, imm)); ++ insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest); ++ mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); ++ } + -+ if (mode == ptr_mode) -+ { -+ if (mode == DImode) -+ { -+ emit_insn (gen_get_gotoff_di (dest, imm)); -+ insn = gen_ldr_got_medium_di ( -+ dest, tmp_reg, dest); -+ } -+ else -+ { -+ emit_insn (gen_get_gotoff_si (dest, imm)); -+ insn = gen_ldr_got_medium_si ( -+ dest, tmp_reg, dest); -+ } -+ mem = XVECEXP (SET_SRC (insn), 0, 0); -+ } -+ else -+ { -+ gcc_assert (mode == Pmode); -+ emit_insn (gen_get_gotoff_di (dest, imm)); -+ insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest); -+ mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0); -+ } ++ gcc_assert (GET_CODE (mem) == MEM); ++ MEM_READONLY_P (mem) = 1; ++ MEM_NOTRAP_P (mem) = 1; ++ emit_insn (insn); ++ return; ++ } + -+ gcc_assert (GET_CODE (mem) == MEM); -+ MEM_READONLY_P (mem) = 1; -+ MEM_NOTRAP_P (mem) = 1; -+ emit_insn (insn); -+ return; -+ } case SYMBOL_SMALL_TLSGD: { rtx_insn *insns; -@@ -3372,11 +3456,12 @@ aarch64_expand_mov_immediate (rtx dest, +@@ -4531,11 +4608,12 @@ aarch64_expand_mov_immediate (rtx dest, return; @@ -114,7 +107,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c case SYMBOL_TINY_GOT: case SYMBOL_TINY_TLSIE: if (const_offset != 0) -@@ -3395,6 +3480,7 @@ aarch64_expand_mov_immediate (rtx dest, +@@ -4554,6 +4632,7 @@ aarch64_expand_mov_immediate (rtx dest, case SYMBOL_TLSLE24: case SYMBOL_TLSLE32: case SYMBOL_TLSLE48: @@ -122,30 +115,61 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c aarch64_load_symref_appropriately (dest, imm, sty); return; -@@ -10334,6 +10420,13 @@ cost_plus: +@@ -8450,7 +8529,14 @@ aarch64_classify_address (struct aarch64 + split_const (info->offset, &sym, &offs); + if (GET_CODE (sym) == SYMBOL_REF + && (aarch64_classify_symbol (sym, INTVAL (offs)) +- == SYMBOL_SMALL_ABSOLUTE)) ++ == SYMBOL_SMALL_ABSOLUTE ++ /* Fix fail on dbl_mov_immediate_1.c. If end up here with ++ MEDIUM_ABSOLUTE, the symbol is a constant number that is ++ forced to memory in reload pass, which is ok to go on with ++ the original design that subtitude the mov to ++ 'adrp and ldr :losum'. */ ++ || aarch64_classify_symbol (sym, INTVAL (offs)) ++ == SYMBOL_MEDIUM_ABSOLUTE)) + { + /* The symbol and offset must be aligned to the access size. */ + unsigned int align; +@@ -10365,7 +10451,13 @@ static inline bool + aarch64_can_use_per_function_literal_pools_p (void) + { + return (aarch64_pcrelative_literal_loads +- || aarch64_cmodel == AARCH64_CMODEL_LARGE); ++ || aarch64_cmodel == AARCH64_CMODEL_LARGE ++ /* Fix const9.C so that constants goes to function_literal_pools. ++ According to the orignal design of aarch64 mcmodel=medium, we ++ don't care where this symbol is put. For the benefit of code size ++ and behaviour consistent with other mcmodel, put it into ++ function_literal_pools. */ ++ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM); + } + + static bool +@@ -11993,6 +12085,13 @@ cost_plus: if (speed) *cost += extra_cost->alu.arith; } -+ else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM -+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) -+ { -+ /* 4 movs adr sub add 2movs ldr. */ -+ if (speed) -+ *cost += 7*extra_cost->alu.arith; -+ } ++ else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM ++ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) ++ { ++ /* 4 movs adr sub add 2movs ldr. */ ++ if (speed) ++ *cost += 7*extra_cost->alu.arith; ++ } if (flag_pic) { -@@ -10341,6 +10434,8 @@ cost_plus: +@@ -12000,6 +12099,8 @@ cost_plus: *cost += COSTS_N_INSNS (1); if (speed) *cost += extra_cost->ldst.load; + if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC) -+ *cost += 2*extra_cost->alu.arith; ++ *cost += 2*extra_cost->alu.arith; } return true; -@@ -11395,6 +11490,7 @@ initialize_aarch64_tls_size (struct gcc_ +@@ -13176,6 +13277,7 @@ initialize_aarch64_tls_size (struct gcc_ if (aarch64_tls_size > 32) aarch64_tls_size = 32; break; @@ -153,17 +177,17 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c case AARCH64_CMODEL_LARGE: /* The maximum TLS size allowed under large is 16E. FIXME: 16E should be 64bit, we only support 48bit offset now. */ -@@ -12187,6 +12283,9 @@ initialize_aarch64_code_model (struct gc +@@ -13968,6 +14070,9 @@ initialize_aarch64_code_model (struct gc aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC; #endif break; -+ case AARCH64_CMODEL_MEDIUM: -+ aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC; ++ case AARCH64_CMODEL_MEDIUM: ++ aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC; + break; case AARCH64_CMODEL_LARGE: sorry ("code model %qs with %<-f%s%>", "large", opts->x_flag_pic > 1 ? "PIC" : "pic"); -@@ -12205,6 +12304,7 @@ static void +@@ -13986,6 +14091,7 @@ static void aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts) { ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string; @@ -171,7 +195,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ptr->x_aarch64_branch_protection_string = opts->x_aarch64_branch_protection_string; } -@@ -12220,6 +12320,7 @@ aarch64_option_restore (struct gcc_optio +@@ -14001,6 +14107,7 @@ aarch64_option_restore (struct gcc_optio opts->x_explicit_arch = ptr->x_explicit_arch; selected_arch = aarch64_get_arch (ptr->x_explicit_arch); opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string; @@ -179,7 +203,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c opts->x_aarch64_branch_protection_string = ptr->x_aarch64_branch_protection_string; if (opts->x_aarch64_branch_protection_string) -@@ -13067,6 +13168,8 @@ aarch64_classify_symbol (rtx x, HOST_WID +@@ -14868,6 +14975,8 @@ aarch64_classify_symbol (rtx x, HOST_WID case AARCH64_CMODEL_SMALL_SPIC: case AARCH64_CMODEL_SMALL_PIC: @@ -188,7 +212,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c case AARCH64_CMODEL_SMALL: return SYMBOL_SMALL_ABSOLUTE; -@@ -13100,6 +13203,7 @@ aarch64_classify_symbol (rtx x, HOST_WID +@@ -14904,6 +15013,7 @@ aarch64_classify_symbol (rtx x, HOST_WID return SYMBOL_TINY_ABSOLUTE; case AARCH64_CMODEL_SMALL: @@ -196,75 +220,83 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c /* Same reasoning as the tiny code model, but the offset cap here is 1MB, allowing +/-3.9GB for the offset to the symbol. */ -@@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID +@@ -14927,7 +15037,50 @@ aarch64_classify_symbol (rtx x, HOST_WID ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G); return SYMBOL_SMALL_ABSOLUTE; + case AARCH64_CMODEL_MEDIUM: -+ { -+ tree decl_local = SYMBOL_REF_DECL (x); -+ if (decl_local != NULL -+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) -+ { -+ HOST_WIDE_INT size = tree_to_uhwi ( -+ DECL_SIZE_UNIT (decl_local)); -+ /* If the data is smaller than the threshold, goto -+ the small code model. Else goto the large code -+ model. */ -+ if (size >= HOST_WIDE_INT (aarch64_data_threshold)) -+ goto AARCH64_LARGE_ROUTINE; -+ } -+ goto AARCH64_SMALL_ROUTINE; -+ } ++ { ++ tree decl_local = SYMBOL_REF_DECL (x); ++ if (decl_local != NULL ++ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) ++ { ++ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local)); ++ /* If the data is smaller than the threshold, goto ++ the small code model. Else goto the large code ++ model. */ ++ if (size >= HOST_WIDE_INT (aarch64_data_threshold)) ++ goto AARCH64_LARGE_ROUTINE; ++ } ++ goto AARCH64_SMALL_ROUTINE; ++ } + + case AARCH64_CMODEL_MEDIUM_PIC: -+ { -+ tree decl_local = SYMBOL_REF_DECL (x); -+ if (decl_local != NULL -+ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) -+ { -+ HOST_WIDE_INT size = tree_to_uhwi ( -+ DECL_SIZE_UNIT (decl_local)); -+ if (size < HOST_WIDE_INT (aarch64_data_threshold)) -+ { -+ if (!aarch64_symbol_binds_local_p (x)) -+ { -+ return SYMBOL_SMALL_GOT_4G; -+ } -+ return SYMBOL_SMALL_ABSOLUTE; -+ } -+ } -+ if (!aarch64_symbol_binds_local_p (x)) -+ { -+ return SYMBOL_MEDIUM_GOT_4G; -+ } -+ return SYMBOL_MEDIUM_ABSOLUTE; -+ } ++ { ++ tree decl_local = SYMBOL_REF_DECL (x); ++ if (decl_local != NULL ++ && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local))) ++ { ++ HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local)); ++ if (size < HOST_WIDE_INT (aarch64_data_threshold)) ++ { ++ if (!aarch64_symbol_binds_local_p (x)) ++ { ++ /* flag_pic is 2 only when -fPIC is on, when we should ++ use 4G GOT. */ ++ return flag_pic == 2 ? SYMBOL_SMALL_GOT_4G ++ : SYMBOL_SMALL_GOT_28K ; ++ } ++ return SYMBOL_SMALL_ABSOLUTE; ++ } ++ } ++ if (!aarch64_symbol_binds_local_p (x)) ++ { ++ return SYMBOL_MEDIUM_GOT_4G; ++ } ++ return SYMBOL_MEDIUM_ABSOLUTE; ++ } ++ case AARCH64_CMODEL_LARGE: + AARCH64_LARGE_ROUTINE: /* This is alright even in PIC code as the constant pool reference is always PC relative and within the same translation unit. */ -@@ -15364,6 +15509,8 @@ aarch64_asm_preferred_eh_data_format (in +@@ -17789,6 +17942,8 @@ aarch64_asm_preferred_eh_data_format (in case AARCH64_CMODEL_SMALL: case AARCH64_CMODEL_SMALL_PIC: case AARCH64_CMODEL_SMALL_SPIC: -+ case AARCH64_CMODEL_MEDIUM: -+ case AARCH64_CMODEL_MEDIUM_PIC: ++ case AARCH64_CMODEL_MEDIUM: ++ case AARCH64_CMODEL_MEDIUM_PIC: /* text+got+data < 4Gb. 4-byte signed relocs are sufficient for everything. */ type = DW_EH_PE_sdata4; -@@ -18454,7 +18601,8 @@ aarch64_empty_mask_is_expensive (unsigne +@@ -21014,7 +21169,14 @@ aarch64_empty_mask_is_expensive (unsigne bool aarch64_use_pseudo_pic_reg (void) { - return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC; ++ /* flag_pic is 2 when -fPIC is on, where we do not need the pseudo ++ pic reg. In medium code mode, when combine with -fpie/-fpic, there are ++ possibility that some symbol size smaller than the -mlarge-data-threshold ++ will still use SMALL_SPIC relocation, which need the pseudo pic reg. ++ Fix spill_1.c fail. */ + return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC -+ || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC ; ++ || (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC ++ && flag_pic != 2); } /* Implement TARGET_UNSPEC_MAY_TRAP_P. */ -@@ -18464,6 +18612,7 @@ aarch64_unspec_may_trap_p (const_rtx x, +@@ -21024,6 +21186,7 @@ aarch64_unspec_may_trap_p (const_rtx x, { switch (XINT (x, 1)) { @@ -273,8 +305,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c case UNSPEC_GOTSMALLPIC28K: case UNSPEC_GOTTINYPIC: diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h ---- a/gcc/config/aarch64/aarch64.h 2020-07-16 14:54:30.592000000 +0800 -+++ b/gcc/config/aarch64/aarch64.h 2020-07-16 14:55:05.672000000 +0800 +--- a/gcc/config/aarch64/aarch64.h 2021-02-18 11:03:28.336000000 +0800 ++++ b/gcc/config/aarch64/aarch64.h 2021-02-18 10:57:45.488000000 +0800 @@ -33,6 +33,10 @@ #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () @@ -287,9 +319,9 @@ diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE) \ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md ---- a/gcc/config/aarch64/aarch64.md 2020-07-16 14:54:30.588000000 +0800 -+++ b/gcc/config/aarch64/aarch64.md 2020-07-16 14:55:05.676000000 +0800 -@@ -209,6 +209,11 @@ +--- a/gcc/config/aarch64/aarch64.md 2021-02-18 11:03:28.340000000 +0800 ++++ b/gcc/config/aarch64/aarch64.md 2021-02-18 10:57:45.488000000 +0800 +@@ -224,6 +224,11 @@ UNSPEC_RSQRTS UNSPEC_NZCV UNSPEC_XPACLRI @@ -301,7 +333,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md UNSPEC_LD1_SVE UNSPEC_ST1_SVE UNSPEC_LDNT1_SVE -@@ -6548,6 +6553,39 @@ +@@ -6689,6 +6694,39 @@ [(set_attr "type" "load_4")] ) @@ -341,7 +373,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md (define_insn "ldr_got_small_28k_" [(set (match_operand:PTR 0 "register_operand" "=r") (unspec:PTR [(mem:PTR (lo_sum:PTR -@@ -6709,6 +6747,23 @@ +@@ -6852,6 +6890,23 @@ (set_attr "length" "12")] ) @@ -366,8 +398,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)] "TARGET_TLS_DESC" diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt ---- a/gcc/config/aarch64/aarch64.opt 2020-07-16 14:54:30.580000000 +0800 -+++ b/gcc/config/aarch64/aarch64.opt 2020-07-16 14:55:05.676000000 +0800 +--- a/gcc/config/aarch64/aarch64.opt 2021-02-18 11:03:28.340000000 +0800 ++++ b/gcc/config/aarch64/aarch64.opt 2021-02-18 10:57:45.488000000 +0800 @@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_cor TargetVariable enum aarch64_arch explicit_arch = aarch64_no_arch @@ -396,8 +428,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt Target Report RejectNegative Mask(BIG_END) Assume target CPU is configured as big endian. diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h ---- a/gcc/config/aarch64/aarch64-opts.h 2020-07-16 14:54:30.584000000 +0800 -+++ b/gcc/config/aarch64/aarch64-opts.h 2020-07-16 14:55:05.676000000 +0800 +--- a/gcc/config/aarch64/aarch64-opts.h 2020-03-12 19:07:21.000000000 +0800 ++++ b/gcc/config/aarch64/aarch64-opts.h 2021-02-18 10:57:45.488000000 +0800 @@ -66,6 +66,10 @@ enum aarch64_code_model { /* -fpic for small memory model. GOT size to 28KiB (4K*8-4K) or 3580 entries. */ @@ -410,8 +442,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts The PIC variant is not yet implemented. */ AARCH64_CMODEL_LARGE diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h ---- a/gcc/config/aarch64/aarch64-protos.h 2020-07-16 14:54:30.584000000 +0800 -+++ b/gcc/config/aarch64/aarch64-protos.h 2020-07-16 14:55:05.676000000 +0800 +--- a/gcc/config/aarch64/aarch64-protos.h 2021-02-18 11:03:29.432000000 +0800 ++++ b/gcc/config/aarch64/aarch64-protos.h 2021-02-18 10:57:45.488000000 +0800 @@ -95,9 +95,11 @@ */ enum aarch64_symbol_type diff --git a/optabs-Dont-use-scalar-conversions-for-vectors.patch b/optabs-Dont-use-scalar-conversions-for-vectors.patch new file mode 100644 index 0000000..91407d8 --- /dev/null +++ b/optabs-Dont-use-scalar-conversions-for-vectors.patch @@ -0,0 +1,69 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-optabs-Don-t-use-scalar-conversions-for-vectors-PR93.patch +b6268016bf46dd63227dcbb73d13c30a3b4b9d2a + +diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c +index 3d829c27826..badd30bfda8 100644 +--- a/gcc/optabs-tree.c ++++ b/gcc/optabs-tree.c +@@ -284,9 +284,14 @@ supportable_convert_operation (enum tree_code code, + machine_mode m1,m2; + bool truncp; + ++ gcc_assert (VECTOR_TYPE_P (vectype_out) && VECTOR_TYPE_P (vectype_in)); ++ + m1 = TYPE_MODE (vectype_out); + m2 = TYPE_MODE (vectype_in); + ++ if (!VECTOR_MODE_P (m1) || !VECTOR_MODE_P (m2)) ++ return false; ++ + /* First check if we can done conversion directly. */ + if ((code == FIX_TRUNC_EXPR + && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp) +diff --git a/gcc/testsuite/gcc.dg/vect/pr93843-1.c b/gcc/testsuite/gcc.dg/vect/pr93843-1.c +new file mode 100644 +index 00000000000..23a79ca4c96 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/pr93843-1.c +@@ -0,0 +1,21 @@ ++char a; ++struct S { short b, c; } d; ++ ++__attribute__((noipa)) void ++foo (int x) ++{ ++ if (x != 4) ++ __builtin_abort (); ++} ++ ++int ++main () ++{ ++ short *g = &d.c, *h = &d.b; ++ char e = 4 - a; ++ int f; ++ *h = *g = e; ++ for (f = 0; f < 2; f++) ++ foo (d.c); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/vect/pr93843-2.c b/gcc/testsuite/gcc.dg/vect/pr93843-2.c +new file mode 100644 +index 00000000000..5fae3e5be17 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/pr93843-2.c +@@ -0,0 +1,11 @@ ++char in[2] = {2, 2}; ++short out[2] = {}; ++ ++int ++main() ++{ ++ for (int i = 0; i < 2; ++i) ++ out[i] = in[i]; ++ asm("":::"memory"); ++ if (out[0] != 2) __builtin_abort(); ++} diff --git a/redundant-loop-elimination.patch b/redundant-loop-elimination.patch index fb33bec..53a5063 100644 --- a/redundant-loop-elimination.patch +++ b/redundant-loop-elimination.patch @@ -1,6 +1,6 @@ diff -Nurp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2020-11-23 03:24:54.760000000 -0500 -+++ b/gcc/common.opt 2020-11-23 03:23:59.716000000 -0500 +--- a/gcc/common.opt 2021-02-18 21:32:50.724000000 -0500 ++++ b/gcc/common.opt 2021-02-18 21:33:36.920000000 -0500 @@ -1150,6 +1150,10 @@ fcompare-elim Common Report Var(flag_compare_elim_after_reload) Optimization Perform comparison elimination after register allocation has finished. @@ -13,8 +13,8 @@ diff -Nurp a/gcc/common.opt b/gcc/common.opt Common Var(flag_conserve_stack) Optimization Do not perform optimizations increasing noticeably stack usage. diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c ---- a/gcc/tree-ssa-phiopt.c 2020-11-23 03:24:54.760000000 -0500 -+++ b/gcc/tree-ssa-phiopt.c 2020-11-23 03:27:42.824000000 -0500 +--- a/gcc/tree-ssa-phiopt.c 2021-02-18 21:32:52.648000000 -0500 ++++ b/gcc/tree-ssa-phiopt.c 2021-02-19 01:55:10.128000000 -0500 @@ -71,6 +71,7 @@ static hash_set * get_non_trapping static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree); static void hoist_adjacent_loads (basic_block, basic_block, @@ -48,7 +48,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c + ... +*/ +static bool -+check_uses_cond (tree ssa_name, gimple *stmt, ++check_uses_cond (const_tree ssa_name, gimple *stmt, + hash_set *hset ATTRIBUTE_UNUSED) +{ + tree_code code = gimple_cond_code (stmt); @@ -76,7 +76,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c + _tmp = SSA_NAME | _tmp2; +*/ +static bool -+check_uses_assign (tree ssa_name, gimple *stmt, hash_set *hset) ++check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set *hset) +{ + tree_code code = gimple_assign_rhs_code (stmt); + tree lhs, rhs1, rhs2; @@ -113,7 +113,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c + # result = PHI +*/ +static bool -+check_uses_phi (tree ssa_name, gimple *stmt, hash_set *hset) ++check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set *hset) +{ + for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++) + { @@ -223,7 +223,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +} + +static bool -+check_def_gimple (gimple *def1, gimple *def2, tree result) ++check_def_gimple (gimple *def1, gimple *def2, const_tree result) +{ + /* def1 and def2 should be POINTER_PLUS_EXPR. */ + if (!is_gimple_assign (def1) || !is_gimple_assign (def2) @@ -255,7 +255,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c +} + +static bool -+check_loop_body (basic_block bb0, basic_block bb2, tree result) ++check_loop_body (basic_block bb0, basic_block bb2, const_tree result) +{ + gimple *g01 = first_stmt (bb0); + if (!g01 || !is_gimple_assign (g01) @@ -373,8 +373,8 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c + ... +*/ +static bool -+check_gimple_order (basic_block bb1, tree base, tree cst, tree result, -+ gimple *&output) ++check_gimple_order (basic_block bb1, const_tree base, const_tree cst, ++ const_tree result, gimple *&output) +{ + gimple *g1 = first_stmt (bb1); + if (!g1 || !is_gimple_assign (g1) diff --git a/revert-moutline-atomics.patch b/revert-moutline-atomics.patch new file mode 100644 index 0000000..59b5a94 --- /dev/null +++ b/revert-moutline-atomics.patch @@ -0,0 +1,418 @@ +This backport contains 1 patch from gcc main stream tree. +The commit id of these patchs list as following in the order of time. + +0001-aarch64-Implement-moutline-atomics.patch +3950b229a5ed6710f30241c2ddc3c74909bf4740 + +diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +--- a/gcc/config/aarch64/aarch64.c 2021-03-11 17:12:30.380000000 +0800 ++++ b/gcc/config/aarch64/aarch64.c 2021-03-11 17:13:29.992000000 +0800 +@@ -18150,82 +18150,6 @@ aarch64_emit_unlikely_jump (rtx insn) + add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); + } + +-/* We store the names of the various atomic helpers in a 5x4 array. +- Return the libcall function given MODE, MODEL and NAMES. */ +- +-rtx +-aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, +- const atomic_ool_names *names) +-{ +- memmodel model = memmodel_base (INTVAL (model_rtx)); +- int mode_idx, model_idx; +- +- switch (mode) +- { +- case E_QImode: +- mode_idx = 0; +- break; +- case E_HImode: +- mode_idx = 1; +- break; +- case E_SImode: +- mode_idx = 2; +- break; +- case E_DImode: +- mode_idx = 3; +- break; +- case E_TImode: +- mode_idx = 4; +- break; +- default: +- gcc_unreachable (); +- } +- +- switch (model) +- { +- case MEMMODEL_RELAXED: +- model_idx = 0; +- break; +- case MEMMODEL_CONSUME: +- case MEMMODEL_ACQUIRE: +- model_idx = 1; +- break; +- case MEMMODEL_RELEASE: +- model_idx = 2; +- break; +- case MEMMODEL_ACQ_REL: +- case MEMMODEL_SEQ_CST: +- model_idx = 3; +- break; +- default: +- gcc_unreachable (); +- } +- +- return init_one_libfunc_visibility (names->str[mode_idx][model_idx], +- VISIBILITY_HIDDEN); +-} +- +-#define DEF0(B, N) \ +- { "__aarch64_" #B #N "_relax", \ +- "__aarch64_" #B #N "_acq", \ +- "__aarch64_" #B #N "_rel", \ +- "__aarch64_" #B #N "_acq_rel" } +- +-#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \ +- { NULL, NULL, NULL, NULL } +-#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16) +- +-static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } }; +-const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } }; +-const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; +-const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; +-const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } }; +-const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } }; +- +-#undef DEF0 +-#undef DEF4 +-#undef DEF5 +- + /* Expand a compare and swap pattern. */ + + void +@@ -18272,17 +18196,6 @@ aarch64_expand_compare_and_swap (rtx ope + newval, mod_s)); + cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); + } +- else if (TARGET_OUTLINE_ATOMICS) +- { +- /* Oldval must satisfy compare afterward. */ +- if (!aarch64_plus_operand (oldval, mode)) +- oldval = force_reg (mode, oldval); +- rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names); +- rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode, +- oldval, mode, newval, mode, +- XEXP (mem, 0), Pmode); +- cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode); +- } + else + { + /* The oldval predicate varies by mode. Test it and force to reg. */ +diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +--- a/gcc/config/aarch64/aarch64.opt 2021-03-11 17:12:30.380000000 +0800 ++++ b/gcc/config/aarch64/aarch64.opt 2021-03-11 17:13:29.992000000 +0800 +@@ -272,6 +272,3 @@ user-land code. + TargetVariable + long aarch64_stack_protector_guard_offset = 0 + +-moutline-atomics +-Target Report Mask(OUTLINE_ATOMICS) Save +-Generate local calls to out-of-line atomic operations. +diff -Nurp a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md +--- a/gcc/config/aarch64/atomics.md 2021-03-11 17:12:30.380000000 +0800 ++++ b/gcc/config/aarch64/atomics.md 2021-03-11 17:13:29.992000000 +0800 +@@ -186,27 +186,16 @@ + (match_operand:SI 3 "const_int_operand")] + "" + { ++ rtx (*gen) (rtx, rtx, rtx, rtx); ++ + /* Use an atomic SWP when available. */ + if (TARGET_LSE) +- { +- emit_insn (gen_aarch64_atomic_exchange_lse +- (operands[0], operands[1], operands[2], operands[3])); +- } +- else if (TARGET_OUTLINE_ATOMICS) +- { +- machine_mode mode = mode; +- rtx func = aarch64_atomic_ool_func (mode, operands[3], +- &aarch64_ool_swp_names); +- rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, +- mode, operands[2], mode, +- XEXP (operands[1], 0), Pmode); +- emit_move_insn (operands[0], rval); +- } ++ gen = gen_aarch64_atomic_exchange_lse; + else +- { +- emit_insn (gen_aarch64_atomic_exchange +- (operands[0], operands[1], operands[2], operands[3])); +- } ++ gen = gen_aarch64_atomic_exchange; ++ ++ emit_insn (gen (operands[0], operands[1], operands[2], operands[3])); ++ + DONE; + } + ) +@@ -291,39 +280,6 @@ + } + operands[1] = force_reg (mode, operands[1]); + } +- else if (TARGET_OUTLINE_ATOMICS) +- { +- const atomic_ool_names *names; +- switch () +- { +- case MINUS: +- operands[1] = expand_simple_unop (mode, NEG, operands[1], +- NULL, 1); +- /* fallthru */ +- case PLUS: +- names = &aarch64_ool_ldadd_names; +- break; +- case IOR: +- names = &aarch64_ool_ldset_names; +- break; +- case XOR: +- names = &aarch64_ool_ldeor_names; +- break; +- case AND: +- operands[1] = expand_simple_unop (mode, NOT, operands[1], +- NULL, 1); +- names = &aarch64_ool_ldclr_names; +- break; +- default: +- gcc_unreachable (); +- } +- machine_mode mode = mode; +- rtx func = aarch64_atomic_ool_func (mode, operands[2], names); +- emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode, +- operands[1], mode, +- XEXP (operands[0], 0), Pmode); +- DONE; +- } + else + gen = gen_aarch64_atomic_; + +@@ -449,40 +405,6 @@ + } + operands[2] = force_reg (mode, operands[2]); + } +- else if (TARGET_OUTLINE_ATOMICS) +- { +- const atomic_ool_names *names; +- switch () +- { +- case MINUS: +- operands[2] = expand_simple_unop (mode, NEG, operands[2], +- NULL, 1); +- /* fallthru */ +- case PLUS: +- names = &aarch64_ool_ldadd_names; +- break; +- case IOR: +- names = &aarch64_ool_ldset_names; +- break; +- case XOR: +- names = &aarch64_ool_ldeor_names; +- break; +- case AND: +- operands[2] = expand_simple_unop (mode, NOT, operands[2], +- NULL, 1); +- names = &aarch64_ool_ldclr_names; +- break; +- default: +- gcc_unreachable (); +- } +- machine_mode mode = mode; +- rtx func = aarch64_atomic_ool_func (mode, operands[3], names); +- rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode, +- operands[2], mode, +- XEXP (operands[1], 0), Pmode); +- emit_move_insn (operands[0], rval); +- DONE; +- } + else + gen = gen_aarch64_atomic_fetch_; + +@@ -572,7 +494,7 @@ + { + /* Use an atomic load-operate instruction when possible. In this case + we will re-compute the result from the original mem value. */ +- if (TARGET_LSE || TARGET_OUTLINE_ATOMICS) ++ if (TARGET_LSE) + { + rtx tmp = gen_reg_rtx (mode); + operands[2] = force_reg (mode, operands[2]); +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c 2021-03-11 17:12:34.168000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c 2021-03-11 17:13:30.656000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */ ++/* { dg-options "-O2 -march=armv8-a+nolse" } */ + /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ + + int +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c 2021-03-11 17:12:34.168000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c 2021-03-11 17:13:30.656000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */ ++/* { dg-options "-O2 -march=armv8-a+nolse" } */ + /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */ + + int +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c 2021-03-11 17:12:33.988000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */ + + #include "atomic-comp-swap-release-acquire.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c 2021-03-11 17:12:33.988000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-acq_rel.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c 2021-03-11 17:12:33.988000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-acquire.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c 2021-03-11 17:12:33.992000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-char.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c 2021-03-11 17:12:33.992000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-consume.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c 2021-03-11 17:12:33.992000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + int v = 0; + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c 2021-03-11 17:12:33.992000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-int.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c 2021-03-11 17:12:33.992000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + long v = 0; + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c 2021-03-11 17:12:33.992000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-relaxed.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c 2021-03-11 17:12:34.012000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-release.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c 2021-03-11 17:12:34.012000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c 2021-03-11 17:13:30.648000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-seq_cst.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c +--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c 2021-03-11 17:12:34.168000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c 2021-03-11 17:13:30.652000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "atomic-op-short.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c +--- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c 2021-03-11 17:12:34.168000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c 2021-03-11 17:13:30.656000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */ + + #include "sync-comp-swap.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c +--- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c 2021-03-11 17:12:34.168000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c 2021-03-11 17:13:30.656000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "sync-op-acquire.x" + +diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c +--- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c 2021-03-11 17:12:34.168000000 +0800 ++++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c 2021-03-11 17:13:30.656000000 +0800 +@@ -1,5 +1,5 @@ + /* { dg-do compile } */ +-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */ ++/* { dg-options "-march=armv8-a+nolse -O2" } */ + + #include "sync-op-full.x" + diff --git a/revise-type-before-build-MULT.patch b/revise-type-before-build-MULT.patch new file mode 100644 index 0000000..ddcb05e --- /dev/null +++ b/revise-type-before-build-MULT.patch @@ -0,0 +1,80 @@ +diff -uprN a/gcc/testsuite/gcc.dg/affine-add-1.c b/gcc/testsuite/gcc.dg/affine-add-1.c +--- a/gcc/testsuite/gcc.dg/affine-add-1.c 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/gcc.dg/affine-add-1.c 2021-03-18 19:41:21.308000000 +0800 +@@ -0,0 +1,16 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2" } */ ++ ++extern unsigned char a[][13][23][15][11]; ++short b; ++int c, d; ++void e(int f, int g[][3][4][3]) { ++ for (char h = 0;; h = 2) ++ for (; f;) ++ for (short i;; i = d) ++ for (char j; j; j = c) ++ for (char k = 0; k < 4; k = g[h][b][i][j]) ++ a[h][b][i][j][k] = 0; ++} ++unsigned char a[3][13][23][15][11]; ++int main() {} +diff -uprN a/gcc/testsuite/g++.dg/affine-add-1.C b/gcc/testsuite/g++.dg/affine-add-1.C +--- a/gcc/testsuite/g++.dg/affine-add-1.C 1970-01-01 08:00:00.000000000 +0800 ++++ b/gcc/testsuite/g++.dg/affine-add-1.C 2021-03-18 19:40:28.432000000 +0800 +@@ -0,0 +1,33 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O3" } */ ++ ++#include ++ ++extern int a[]; ++extern unsigned char b[][151800]; ++extern long long c[][20][23][22][11]; ++char d, e; ++int f; ++unsigned g; ++long h; ++void i(unsigned long long s, unsigned short j) { ++ for (char k = 0; k < 12; k += 3) ++ for (short l = 0; l < 9; l = std::min(j, (unsigned short)4050683)) { ++ for (bool m(h); m < bool(~0); m = 1) ++ for (int t = 0; t < 4; t = std::min(s, (unsigned long long)40808803)) ++ for (int n = 0; n < 9; n += e) ++ a[n] = 0; ++ for (char o = 0; o < g; o = 4) ++ for (bool p; p < f; p = d) { ++ for (long q(s); q < 4ULL; q += 1ULL) ++ b[k][o + q] = 0; ++ for (int r = 0; r < 11; r += ~0 || 0) ++ c[k][l][o][d][r] = 0; ++ } ++ } ++} ++int a[0]; ++unsigned char b[3][151800]; ++long long c[3][20][23][22][11]; ++int main() {} ++ +diff -uprN a/gcc/tree-affine.c b/gcc/tree-affine.c +--- a/gcc/tree-affine.c 2021-03-15 18:55:31.928000000 +0800 ++++ b/gcc/tree-affine.c 2021-03-18 16:34:05.932000000 +0800 +@@ -184,9 +184,16 @@ aff_combination_add_elt (aff_tree *comb, + if (scale == 1) + elt = fold_convert (type, elt); + else +- elt = fold_build2 (MULT_EXPR, type, +- fold_convert (type, elt), +- wide_int_to_tree (type, scale)); ++ { ++ if (POINTER_TYPE_P (TREE_TYPE (elt))) ++ { ++ elt = copy_node (elt); ++ TREE_TYPE (elt) = sizetype; ++ } ++ elt = fold_build2 (MULT_EXPR, type, ++ fold_convert (type, elt), ++ wide_int_to_tree (type, scale)); ++ } + + if (comb->rest) + comb->rest = fold_build2 (PLUS_EXPR, type, comb->rest, -- Gitee