From fb345632b69d8ae56eac4aec99303556270f25c8 Mon Sep 17 00:00:00 2001
From: eastb233 <xiezhiheng@huawei.com>
Date: Fri, 28 May 2021 21:05:39 +0800
Subject: [PATCH] [Sync] Sync from master branch

- Sync patch from master branch
---
 Fix-type-mismatch-in-SLPed-constructors.patch |  118 ++
 PR92429-do-not-fold-when-updating.patch       |   70 ++
 Simplify-X-C1-C2.patch                        |  197 ++++
 add-check-for-pressure-in-sche1.patch         |   52 +
 add-fp-model-options.patch                    |  376 ++++++
 enable-simd-math.patch                        |  236 +++-
 fix-CTOR-vectorization.patch                  |   18 +
 fix-ICE-in-eliminate-stmt.patch               |   79 ++
 fix-ICE-in-vect.patch                         | 1037 +++++++++++++++++
 ...ge-set-by-vectorization-on-niter-IVs.patch |   74 ++
 gcc.spec                                      |   66 +-
 medium-code-mode.patch                        |  350 +++---
 ...t-use-scalar-conversions-for-vectors.patch |   69 ++
 redundant-loop-elimination.patch              |   22 +-
 revert-moutline-atomics.patch                 |  418 +++++++
 revise-type-before-build-MULT.patch           |   80 ++
 16 files changed, 3077 insertions(+), 185 deletions(-)
 create mode 100644 Fix-type-mismatch-in-SLPed-constructors.patch
 create mode 100644 PR92429-do-not-fold-when-updating.patch
 create mode 100644 Simplify-X-C1-C2.patch
 create mode 100644 add-check-for-pressure-in-sche1.patch
 create mode 100644 add-fp-model-options.patch
 create mode 100644 fix-CTOR-vectorization.patch
 create mode 100644 fix-ICE-in-eliminate-stmt.patch
 create mode 100644 fix-ICE-in-vect.patch
 create mode 100644 fix-range-set-by-vectorization-on-niter-IVs.patch
 create mode 100644 optabs-Dont-use-scalar-conversions-for-vectors.patch
 create mode 100644 revert-moutline-atomics.patch
 create mode 100644 revise-type-before-build-MULT.patch

diff --git a/Fix-type-mismatch-in-SLPed-constructors.patch b/Fix-type-mismatch-in-SLPed-constructors.patch
new file mode 100644
index 0000000..b073299
--- /dev/null
+++ b/Fix-type-mismatch-in-SLPed-constructors.patch
@@ -0,0 +1,118 @@
+This backport contains 2 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-Fix-type-mismatch-in-SLPed-constructors.patch
+86c3a7d891f9f175d09d61f5ce163c6dc5ce681f
+0001-re-PR-fortran-91003-ICE-when-compiling-LAPACK-CGEGV-.patch
+d005f61e7a0dbb2c991f13b4b61b1a27ca2d8b73
+
+diff -urpN a/gcc/testsuite/gfortran.dg/pr91003.f90 b/gcc/testsuite/gfortran.dg/pr91003.f90
+--- a/gcc/testsuite/gfortran.dg/pr91003.f90	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gfortran.dg/pr91003.f90	2021-02-22 03:02:39.484000000 -0500
+@@ -0,0 +1,33 @@
++! { dg-do compile }
++! { dg-options "-Ofast" }
++      SUBROUTINE FOO(N, A, B, C, D, E, F, G)
++      COMPLEX A(*)
++      LOGICAL H
++      INTEGER G
++      REAL I, C, J, F, F1, F2, K, E, L, M, B, D
++      DO JC = 1, N
++        K = F*REAL(A(JC))
++        Z = F*AIMAG(A(JC))
++        H = .FALSE.
++        L = G
++        IF(ABS(Z).LT.D .AND. I.GE. MAX(D, B*C, B*J)) THEN
++          H = .TRUE.
++          L = (D / F1) / MAX(D, F2*I)
++        END IF
++        IF(ABS(K).LT.D .AND. C.GE. MAX(D, B*I, B*J)) THEN
++          L = MAX(L, (D / F1) / MAX(D, F2*C))
++        END IF
++        IF(ABS(E).LT.D .AND. J.GE. MAX(D, B*C, B*I)) THEN
++          H = .TRUE.
++          L = MAX(L, (D / BNRM1) / MAX(D, BNRM2*J))
++        END IF
++        IF(H) THEN
++          M = (L*D)*MAX(ABS(K), ABS(Z), ABS(E))
++        END IF
++        IF(H) THEN
++          K = (L*REAL(A(JC)))*F
++          Z = (L*AIMAG(A(JC)))*F
++        END IF
++        A(JC) = CMPLX(K, Z)
++      END DO
++      END
+diff -urpN a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2021-02-22 02:56:51.328000000 -0500
++++ b/gcc/tree-vect-slp.c	2021-02-22 03:03:22.676000000 -0500
+@@ -3442,7 +3442,7 @@ vect_slp_bb (basic_block bb)
+ /* Return 1 if vector type STMT_VINFO is a boolean vector.  */
+ 
+ static bool
+-vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
++vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, unsigned op_num)
+ {
+   enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
+   tree op, vectype;
+@@ -3467,9 +3467,17 @@ vect_mask_constant_operand_p (stmt_vec_i
+       tree cond = gimple_assign_rhs1 (stmt);
+ 
+       if (TREE_CODE (cond) == SSA_NAME)
+-	op = cond;
++	{
++	  if (op_num > 0)
++	    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
++	  op = cond;
++	}
+       else
+-	op = TREE_OPERAND (cond, 0);
++	{
++	  if (op_num > 1)
++	    return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
++	  op = TREE_OPERAND (cond, 0);
++	}
+ 
+       if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
+ 	gcc_unreachable ();
+@@ -3600,9 +3608,10 @@ duplicate_and_interleave (vec_info *vinf
+    operands.  */
+ 
+ static void
+-vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
++vect_get_constant_vectors (slp_tree slp_node, unsigned op_num,
+                            vec<tree> *vec_oprnds)
+ {
++  slp_tree op_node = SLP_TREE_CHILDREN (slp_node)[op_num];
+   stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+   vec_info *vinfo = stmt_vinfo->vinfo;
+   unsigned HOST_WIDE_INT nunits;
+@@ -3624,7 +3633,7 @@ vect_get_constant_vectors (slp_tree op_n
+   /* Check if vector type is a boolean vector.  */
+   tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
+-      && vect_mask_constant_operand_p (stmt_vinfo))
++      && vect_mask_constant_operand_p (stmt_vinfo, op_num))
+     vector_type = truth_type_for (stmt_vectype);
+   else
+     vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op), op_node);
+@@ -3848,7 +3857,7 @@ vect_get_slp_defs (slp_tree slp_node, ve
+ 	  vect_get_slp_vect_defs (child, &vec_defs);
+ 	}
+       else
+-	vect_get_constant_vectors (child, slp_node, &vec_defs);
++	vect_get_constant_vectors (slp_node, i, &vec_defs);
+ 
+       vec_oprnds->quick_push (vec_defs);
+     }
+@@ -4269,6 +4278,10 @@ vectorize_slp_instance_root_stmt (slp_tr
+ 	{
+ 	  tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
+ 	  tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
++	  if (!useless_type_conversion_p (TREE_TYPE (root_lhs),
++					  TREE_TYPE (vect_lhs)))
++	    vect_lhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (root_lhs),
++			       vect_lhs);
+ 	  rstmt = gimple_build_assign (root_lhs, vect_lhs);
+ 	  break;
+ 	}
diff --git a/PR92429-do-not-fold-when-updating.patch b/PR92429-do-not-fold-when-updating.patch
new file mode 100644
index 0000000..65749ee
--- /dev/null
+++ b/PR92429-do-not-fold-when-updating.patch
@@ -0,0 +1,70 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-PR-tree-optimization-92429-do-not-fold-when-updating.patch
+f7dff7699fd70d3b8c3e637818e18c86f93ccfec
+
+diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
+index 4d5e0494511..6e6df0bfdb8 100644
+--- a/gcc/tree-ssa-loop-niter.c
++++ b/gcc/tree-ssa-loop-niter.c
+@@ -1934,7 +1934,8 @@ number_of_iterations_cond (class loop *loop,
+ 
+ tree
+ simplify_replace_tree (tree expr, tree old, tree new_tree,
+-		       tree (*valueize) (tree, void*), void *context)
++		       tree (*valueize) (tree, void*), void *context,
++		       bool do_fold)
+ {
+   unsigned i, n;
+   tree ret = NULL_TREE, e, se;
+@@ -1966,7 +1967,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
+   for (i = 0; i < n; i++)
+     {
+       e = TREE_OPERAND (expr, i);
+-      se = simplify_replace_tree (e, old, new_tree, valueize, context);
++      se = simplify_replace_tree (e, old, new_tree, valueize, context, do_fold);
+       if (e == se)
+ 	continue;
+ 
+@@ -1976,7 +1977,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
+       TREE_OPERAND (ret, i) = se;
+     }
+ 
+-  return (ret ? fold (ret) : expr);
++  return (ret ? (do_fold ? fold (ret) : ret) : expr);
+ }
+ 
+ /* Expand definitions of ssa names in EXPR as long as they are simple
+diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
+index 621e2c2e28d..eb8d1579479 100644
+--- a/gcc/tree-ssa-loop-niter.h
++++ b/gcc/tree-ssa-loop-niter.h
+@@ -58,7 +58,7 @@ extern void free_numbers_of_iterations_estimates (class loop *);
+ extern void free_numbers_of_iterations_estimates (function *);
+ extern tree simplify_replace_tree (tree, tree,
+ 				   tree, tree (*)(tree, void *) = NULL,
+-				   void * = NULL);
++				   void * = NULL, bool do_fold = true);
+ extern void substitute_in_loop_info (struct loop *, tree, tree);
+ 
+ #endif /* GCC_TREE_SSA_LOOP_NITER_H */
+diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+index 8e318a037a7..e5fb434bd4e 100644
+--- a/gcc/tree-vect-loop.c
++++ b/gcc/tree-vect-loop.c
+@@ -8434,8 +8434,13 @@ update_epilogue_loop_vinfo (class loop *epilogue, tree advance)
+ 	    gimple_set_op (stmt, j, *new_op);
+ 	  else
+ 	    {
++	      /* PR92429: The last argument of simplify_replace_tree disables
++		 folding when replacing arguments.  This is required as
++		 otherwise you might end up with different statements than the
++		 ones analyzed in vect_loop_analyze, leading to different
++		 vectorization.  */
+ 	      op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
+-				     &find_in_mapping, &mapping);
++					  &find_in_mapping, &mapping, false);
+ 	      gimple_set_op (stmt, j, op);
+ 	    }
+ 	}
diff --git a/Simplify-X-C1-C2.patch b/Simplify-X-C1-C2.patch
new file mode 100644
index 0000000..0997a00
--- /dev/null
+++ b/Simplify-X-C1-C2.patch
@@ -0,0 +1,197 @@
+This backport contains 2 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-Simplify-X-C1-C2-with-undefined-overflow.patch
+ca2b8c082c4f16919071c9f8de8db0b33b54c405
+
+0002-Simplify-X-C1-C2-with-wrapping-overflow.patch
+287522613d661b4c5ba8403b051eb470c1674cba
+
+diff -Nurp a/gcc/expr.c b/gcc/expr.c
+--- a/gcc/expr.c	2021-03-17 16:34:24.700000000 +0800
++++ b/gcc/expr.c	2021-03-17 10:30:11.500000000 +0800
+@@ -11706,38 +11706,6 @@ string_constant (tree arg, tree *ptr_off
+   return init;
+ }
+ 
+-/* Compute the modular multiplicative inverse of A modulo M
+-   using extended Euclid's algorithm.  Assumes A and M are coprime.  */
+-static wide_int
+-mod_inv (const wide_int &a, const wide_int &b)
+-{
+-  /* Verify the assumption.  */
+-  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
+-
+-  unsigned int p = a.get_precision () + 1;
+-  gcc_checking_assert (b.get_precision () + 1 == p);
+-  wide_int c = wide_int::from (a, p, UNSIGNED);
+-  wide_int d = wide_int::from (b, p, UNSIGNED);
+-  wide_int x0 = wide_int::from (0, p, UNSIGNED);
+-  wide_int x1 = wide_int::from (1, p, UNSIGNED);
+-
+-  if (wi::eq_p (b, 1))
+-    return wide_int::from (1, p, UNSIGNED);
+-
+-  while (wi::gt_p (c, 1, UNSIGNED))
+-    {
+-      wide_int t = d;
+-      wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
+-      c = t;
+-      wide_int s = x0;
+-      x0 = wi::sub (x1, wi::mul (q, x0));
+-      x1 = s;
+-    }
+-  if (wi::lt_p (x1, 0, SIGNED))
+-    x1 += d;
+-  return x1;
+-}
+-
+ /* Optimize x % C1 == C2 for signed modulo if C1 is a power of two and C2
+    is non-zero and C3 ((1<<(prec-1)) | (C1 - 1)):
+    for C2 > 0 to x & C3 == C2
+@@ -11948,7 +11916,7 @@ maybe_optimize_mod_cmp (enum tree_code c
+   w = wi::lrshift (w, shift);
+   wide_int a = wide_int::from (w, prec + 1, UNSIGNED);
+   wide_int b = wi::shifted_mask (prec, 1, false, prec + 1);
+-  wide_int m = wide_int::from (mod_inv (a, b), prec, UNSIGNED);
++  wide_int m = wide_int::from (wi::mod_inv (a, b), prec, UNSIGNED);
+   tree c3 = wide_int_to_tree (type, m);
+   tree c5 = NULL_TREE;
+   wide_int d, e;
+diff -Nurp a/gcc/match.pd b/gcc/match.pd
+--- a/gcc/match.pd	2021-03-17 16:34:19.320000000 +0800
++++ b/gcc/match.pd	2021-03-17 10:30:11.500000000 +0800
+@@ -3290,6 +3290,35 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+      (scmp @0 @2)
+      (cmp @0 @2))))))
+ 
++/* For integral types with undefined overflow fold
++   x * C1 == C2 into x == C2 / C1 or false.
++   If overflow wraps and C1 is odd, simplify to x == C2 / C1 in the ring
++   Z / 2^n Z.  */
++(for cmp (eq ne)
++ (simplify
++  (cmp (mult @0 INTEGER_CST@1) INTEGER_CST@2)
++  (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
++       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
++       && wi::to_wide (@1) != 0)
++   (with { widest_int quot; }
++    (if (wi::multiple_of_p (wi::to_widest (@2), wi::to_widest (@1),
++			    TYPE_SIGN (TREE_TYPE (@0)), &quot))
++     (cmp @0 { wide_int_to_tree (TREE_TYPE (@0), quot); })
++     { constant_boolean_node (cmp == NE_EXPR, type); }))
++   (if (INTEGRAL_TYPE_P (TREE_TYPE (@0))
++	&& TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
++	&& (wi::bit_and (wi::to_wide (@1), 1) == 1))
++    (cmp @0
++     {
++       tree itype = TREE_TYPE (@0);
++       int p = TYPE_PRECISION (itype);
++       wide_int m = wi::one (p + 1) << p;
++       wide_int a = wide_int::from (wi::to_wide (@1), p + 1, UNSIGNED);
++       wide_int i = wide_int::from (wi::mod_inv (a, m),
++				    p, TYPE_SIGN (itype));
++       wide_int_to_tree (itype, wi::mul (i, wi::to_wide (@2)));
++     })))))
++
+ /* Simplify comparison of something with itself.  For IEEE
+    floating-point, we can only do some of these simplifications.  */
+ (for cmp (eq ge le)
+diff -Nurp a/gcc/testsuite/gcc.c-torture/execute/pr23135.c b/gcc/testsuite/gcc.c-torture/execute/pr23135.c
+--- a/gcc/testsuite/gcc.c-torture/execute/pr23135.c	2021-03-17 16:34:24.016000000 +0800
++++ b/gcc/testsuite/gcc.c-torture/execute/pr23135.c	2021-03-17 10:30:13.572000000 +0800
+@@ -1,7 +1,7 @@
+ /* Based on execute/simd-1.c, modified by joern.rennecke@st.com to
+    trigger a reload bug.  Verified for gcc mainline from 20050722 13:00 UTC
+    for sh-elf -m4 -O2.  */
+-/* { dg-options "-Wno-psabi" } */
++/* { dg-options "-Wno-psabi -fwrapv" } */
+ /* { dg-add-options stack_size } */
+ 
+ #ifndef STACK_SIZE
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433-2.c	2021-03-17 10:30:13.276000000 +0800
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fwrapv -fdump-tree-gimple" } */
++
++typedef __INT32_TYPE__ int32_t;
++typedef unsigned __INT32_TYPE__ uint32_t;
++
++int e(int32_t x){return 3*x==5;}
++int f(int32_t x){return 3*x==-5;}
++int g(int32_t x){return -3*x==5;}
++int h(int32_t x){return 7*x==3;}
++int i(uint32_t x){return 7*x==3;}
++
++/* { dg-final { scan-tree-dump-times "== 1431655767" 1 "gimple" } } */
++/* { dg-final { scan-tree-dump-times "== -1431655767" 2 "gimple" } } */
++/* { dg-final { scan-tree-dump-times "== 613566757" 2 "gimple" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c b/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr95433.c	2021-03-17 10:30:13.276000000 +0800
+@@ -0,0 +1,8 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-optimized" } */
++
++int f(int x){return x*7==17;}
++int g(int x){return x*3==15;}
++
++/* { dg-final { scan-tree-dump "return 0;" "optimized" } } */
++/* { dg-final { scan-tree-dump "== 5;" "optimized" } } */
+diff -Nurp a/gcc/wide-int.cc b/gcc/wide-int.cc
+--- a/gcc/wide-int.cc	2021-03-17 16:34:24.488000000 +0800
++++ b/gcc/wide-int.cc	2021-03-17 10:30:11.500000000 +0800
+@@ -2223,6 +2223,39 @@ wi::round_up_for_mask (const wide_int &v
+   return (val | tmp) & -tmp;
+ }
+ 
++/* Compute the modular multiplicative inverse of A modulo B
++   using extended Euclid's algorithm.  Assumes A and B are coprime,
++   and that A and B have the same precision.  */
++wide_int
++wi::mod_inv (const wide_int &a, const wide_int &b)
++{
++  /* Verify the assumption.  */
++  gcc_checking_assert (wi::eq_p (wi::gcd (a, b), 1));
++
++  unsigned int p = a.get_precision () + 1;
++  gcc_checking_assert (b.get_precision () + 1 == p);
++  wide_int c = wide_int::from (a, p, UNSIGNED);
++  wide_int d = wide_int::from (b, p, UNSIGNED);
++  wide_int x0 = wide_int::from (0, p, UNSIGNED);
++  wide_int x1 = wide_int::from (1, p, UNSIGNED);
++
++  if (wi::eq_p (b, 1))
++    return wide_int::from (1, p, UNSIGNED);
++
++  while (wi::gt_p (c, 1, UNSIGNED))
++    {
++      wide_int t = d;
++      wide_int q = wi::divmod_trunc (c, d, UNSIGNED, &d);
++      c = t;
++      wide_int s = x0;
++      x0 = wi::sub (x1, wi::mul (q, x0));
++      x1 = s;
++    }
++  if (wi::lt_p (x1, 0, SIGNED))
++    x1 += d;
++  return x1;
++}
++
+ /*
+  * Private utilities.
+  */
+diff -Nurp a/gcc/wide-int.h b/gcc/wide-int.h
+--- a/gcc/wide-int.h	2021-03-17 16:34:14.792000000 +0800
++++ b/gcc/wide-int.h	2021-03-17 10:30:11.500000000 +0800
+@@ -3368,6 +3368,8 @@ namespace wi
+   wide_int round_down_for_mask (const wide_int &, const wide_int &);
+   wide_int round_up_for_mask (const wide_int &, const wide_int &);
+ 
++  wide_int mod_inv (const wide_int &a, const wide_int &b);
++
+   template <typename T>
+   T mask (unsigned int, bool);
+ 
diff --git a/add-check-for-pressure-in-sche1.patch b/add-check-for-pressure-in-sche1.patch
new file mode 100644
index 0000000..b57a685
--- /dev/null
+++ b/add-check-for-pressure-in-sche1.patch
@@ -0,0 +1,52 @@
+--- a/gcc/haifa-sched.c	2021-03-08 14:46:59.204000000 +0800
++++ b/gcc/haifa-sched.c	2021-03-09 13:32:40.656000000 +0800
+@@ -2036,8 +2036,10 @@ model_start_update_pressure (struct mode
+       /* The instruction wasn't part of the model schedule; it was moved
+ 	 from a different block.  Update the pressure for the end of
+ 	 the model schedule.  */
+-      MODEL_REF_PRESSURE (group, point, pci) += delta;
+-      MODEL_MAX_PRESSURE (group, point, pci) += delta;
++      if (MODEL_REF_PRESSURE (group, point, pci) != -1 || delta > 0)
++	MODEL_REF_PRESSURE (group, point, pci) += delta;
++      if (MODEL_MAX_PRESSURE (group, point, pci) != -1 || delta > 0)
++	MODEL_MAX_PRESSURE (group, point, pci) += delta;
+     }
+   else
+     {
+diff -uprN a/gcc/testsuite/gcc.dg/sche1-pressure-check.c b/gcc/testsuite/gcc.dg/sche1-pressure-check.c
+--- a/gcc/testsuite/gcc.dg/sche1-pressure-check.c  1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/sche1-pressure-check.c  2021-03-09 13:40:34.036000000 +0800
+@@ -0,0 +1,33 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++int a, g, h;
++char b, c;
++short d;
++static int e;
++int *volatile f;
++void i() {
++  int j = 0;
++  int *k = &a;
++  for (; c; c--) {
++    g && (d = 0);
++    j ^= 10;
++    {
++      int l[2];
++      l;
++      h = l[1];
++    }
++    e = 1;
++    for (; e <= 7; e++) {
++      *k = 6;
++      *f = b = 0;
++      for (; b <= 7; b++) {
++        int m = 5;
++        if (g)
++          *k &= m ^= j;
++      }
++    }
++  }
++}
++int main() {}
++
diff --git a/add-fp-model-options.patch b/add-fp-model-options.patch
new file mode 100644
index 0000000..8d23b99
--- /dev/null
+++ b/add-fp-model-options.patch
@@ -0,0 +1,376 @@
+diff -Nurp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2021-02-18 21:22:07.216000000 +0800
++++ b/gcc/common.opt	2021-02-19 16:04:17.876000000 +0800
+@@ -1506,6 +1506,32 @@ ffp-int-builtin-inexact
+ Common Report Var(flag_fp_int_builtin_inexact) Init(1) Optimization
+ Allow built-in functions ceil, floor, round, trunc to raise \"inexact\" exceptions.
+ 
++fftz
++Common Report Var(flag_ftz) Optimization
++Control fpcr register for flush to zero.
++
++fp-model=
++Common Joined RejectNegative Enum(fp_model) Var(flag_fp_model) Init(FP_MODEL_NORMAL) Optimization
++-fp-model=[normal|fast|precise|except|strict] Perform floating-point precision control.
++
++Enum
++Name(fp_model) Type(enum fp_model) UnknownError(unknown floating point precision model %qs)
++
++EnumValue
++Enum(fp_model) String(normal) Value(FP_MODEL_NORMAL)
++
++EnumValue
++Enum(fp_model) String(fast) Value(FP_MODEL_FAST)
++
++EnumValue
++Enum(fp_model) String(precise) Value(FP_MODEL_PRECISE)
++
++EnumValue
++Enum(fp_model) String(except) Value(FP_MODEL_EXCEPT)
++
++EnumValue
++Enum(fp_model) String(strict) Value(FP_MODEL_STRICT)
++
+ ; Nonzero means don't put addresses of constant functions in registers.
+ ; Used for compiling the Unix kernel, where strange substitutions are
+ ; done on the assembly output.
+diff -Nurp a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
+--- a/gcc/config/aarch64/aarch64-linux.h	2021-02-18 21:22:07.220000000 +0800
++++ b/gcc/config/aarch64/aarch64-linux.h	2021-02-18 21:23:55.932000000 +0800
+@@ -50,7 +50,8 @@
+ #define LINK_SPEC LINUX_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
+ 
+ #define GNU_USER_TARGET_MATHFILE_SPEC \
+-  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"
++  "%{Ofast|ffast-math|funsafe-math-optimizations|fp-model=fast|fftz:\
++  %{!fno-ftz:crtfastmath.o%s}}"
+ 
+ #undef ENDFILE_SPEC
+ #define ENDFILE_SPEC   \
+diff -Nurp a/gcc/flag-types.h b/gcc/flag-types.h
+--- a/gcc/flag-types.h	2020-03-12 19:07:21.000000000 +0800
++++ b/gcc/flag-types.h	2021-02-18 21:23:55.932000000 +0800
+@@ -207,6 +207,15 @@ enum fp_contract_mode {
+   FP_CONTRACT_FAST = 2
+ };
+ 
++/* Floating-point precision mode.  */
++enum fp_model {
++  FP_MODEL_NORMAL = 0,
++  FP_MODEL_FAST = 1,
++  FP_MODEL_PRECISE = 2,
++  FP_MODEL_EXCEPT = 3,
++  FP_MODEL_STRICT = 4
++};
++
+ /* Scalar storage order kind.  */
+ enum scalar_storage_order_kind {
+   SSO_NATIVE = 0,
+diff -Nurp a/gcc/fortran/options.c b/gcc/fortran/options.c
+--- a/gcc/fortran/options.c	2020-03-12 19:07:21.000000000 +0800
++++ b/gcc/fortran/options.c	2021-02-18 21:23:55.932000000 +0800
+@@ -247,6 +247,7 @@ form_from_filename (const char *filename
+   return f_form;
+ }
+ 
++static void gfc_handle_fpe_option (const char *arg, bool trap);
+ 
+ /* Finalize commandline options.  */
+ 
+@@ -274,6 +275,13 @@ gfc_post_options (const char **pfilename
+   if (flag_protect_parens == -1)
+     flag_protect_parens = !optimize_fast;
+ 
++  /* If fp-model=precise/strict, turn on all ffpe-trap and ffpe-summary.  */
++  if (flag_fp_model == FP_MODEL_EXCEPT || flag_fp_model == FP_MODEL_STRICT)
++    {
++      gfc_handle_fpe_option ("all", false);
++      gfc_handle_fpe_option ("invalid,zero,overflow,underflow", true);
++    }
++
+   /* -Ofast sets implies -fstack-arrays unless an explicit size is set for
+      stack arrays.  */
+   if (flag_stack_arrays == -1 && flag_max_stack_var_size == -2)
+diff -Nurp a/gcc/opts.c b/gcc/opts.c
+--- a/gcc/opts.c	2021-02-18 21:22:07.424000000 +0800
++++ b/gcc/opts.c	2021-02-19 16:00:08.628000000 +0800
+@@ -196,6 +196,7 @@ static void set_debug_level (enum debug_
+ 			     struct gcc_options *opts_set,
+ 			     location_t loc);
+ static void set_fast_math_flags (struct gcc_options *opts, int set);
++static void set_fp_model_flags (struct gcc_options *opts, int set);
+ static void decode_d_option (const char *arg, struct gcc_options *opts,
+ 			     location_t loc, diagnostic_context *dc);
+ static void set_unsafe_math_optimizations_flags (struct gcc_options *opts,
+@@ -2433,6 +2434,10 @@ common_handle_option (struct gcc_options
+       set_fast_math_flags (opts, value);
+       break;
+ 
++    case OPT_fp_model_:
++      set_fp_model_flags (opts, value);
++      break;
++
+     case OPT_funsafe_math_optimizations:
+       set_unsafe_math_optimizations_flags (opts, value);
+       break;
+@@ -2905,6 +2910,69 @@ set_fast_math_flags (struct gcc_options
+     }
+ }
+ 
++/* Handle fp-model options.  */
++static void
++set_fp_model_flags (struct gcc_options *opts, int set)
++{
++  enum fp_model model = (enum fp_model) set;
++  switch (model)
++    {
++      case FP_MODEL_FAST:
++	/* Equivalent to open ffast-math.  */
++	set_fast_math_flags (opts, 1);
++	break;
++
++      case FP_MODEL_PRECISE:
++	/* Equivalent to close ffast-math.  */
++	set_fast_math_flags (opts, 0);
++	/* Turn on -frounding-math -fsignaling-nans.  */
++	if (!opts->frontend_set_flag_signaling_nans)
++	  opts->x_flag_signaling_nans = 1;
++	if (!opts->frontend_set_flag_rounding_math)
++	  opts->x_flag_rounding_math = 1;
++	opts->x_flag_expensive_optimizations = 0;
++	opts->x_flag_code_hoisting = 0;
++	opts->x_flag_predictive_commoning = 0;
++	opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
++	break;
++
++      case FP_MODEL_EXCEPT:
++	if (!opts->frontend_set_flag_signaling_nans)
++	  opts->x_flag_signaling_nans = 1;
++	if (!opts->frontend_set_flag_errno_math)
++	  opts->x_flag_errno_math = 1;
++	if (!opts->frontend_set_flag_trapping_math)
++	  opts->x_flag_trapping_math = 1;
++	opts->x_flag_fp_int_builtin_inexact = 1;
++	/* Also turn on ffpe-trap in fortran.  */
++	break;
++
++      case FP_MODEL_STRICT:
++	/* Turn on both precise and except.  */
++	if (!opts->frontend_set_flag_signaling_nans)
++	  opts->x_flag_signaling_nans = 1;
++	if (!opts->frontend_set_flag_rounding_math)
++	  opts->x_flag_rounding_math = 1;
++	opts->x_flag_expensive_optimizations = 0;
++	opts->x_flag_code_hoisting = 0;
++	opts->x_flag_predictive_commoning = 0;
++	if (!opts->frontend_set_flag_errno_math)
++	  opts->x_flag_errno_math = 1;
++	if (!opts->frontend_set_flag_trapping_math)
++	  opts->x_flag_trapping_math = 1;
++	opts->x_flag_fp_int_builtin_inexact = 1;
++	opts->x_flag_fp_contract_mode = FP_CONTRACT_OFF;
++	break;
++
++      case FP_MODEL_NORMAL:
++	/* Do nothing.  */
++	break;
++
++      default:
++	gcc_unreachable ();
++    }
++}
++
+ /* When -funsafe-math-optimizations is set the following
+    flags are set as well.  */
+ static void
+diff -Nurp a/gcc/opts-common.c b/gcc/opts-common.c
+--- a/gcc/opts-common.c	2020-03-12 19:07:21.000000000 +0800
++++ b/gcc/opts-common.c	2021-02-19 09:49:18.880000000 +0800
+@@ -26,7 +26,8 @@ along with GCC; see the file COPYING3.
+ #include "diagnostic.h"
+ #include "spellcheck.h"
+ 
+-static void prune_options (struct cl_decoded_option **, unsigned int *);
++static void prune_options (struct cl_decoded_option **, unsigned int *,
++			   unsigned int);
+ 
+ /* An option that is undocumented, that takes a joined argument, and
+    that doesn't fit any of the classes of uses (language/common,
+@@ -968,7 +969,7 @@ decode_cmdline_options_to_array (unsigne
+ 
+   *decoded_options = opt_array;
+   *decoded_options_count = num_decoded_options;
+-  prune_options (decoded_options, decoded_options_count);
++  prune_options (decoded_options, decoded_options_count, lang_mask);
+ }
+ 
+ /* Return true if NEXT_OPT_IDX cancels OPT_IDX.  Return false if the
+@@ -989,11 +990,108 @@ cancel_option (int opt_idx, int next_opt
+   return false;
+ }
+ 
++/* Check whether opt_idx exists in decoded_options array bewteen index
++   start and end.  If found, return its index in decoded_options,
++   else return end.  */
++static unsigned int
++find_opt_idx (struct cl_decoded_option *decoded_options,
++	       unsigned int decoded_options_count,
++	       unsigned int start, unsigned int end, unsigned int opt_idx)
++{
++  gcc_assert (end <= decoded_options_count);
++  gcc_assert (opt_idx < cl_options_count);
++  unsigned int k;
++  for (k = start; k < end; k++)
++    {
++      if (decoded_options[k].opt_index == opt_idx)
++	{
++	  return k;
++	}
++    }
++  return k;
++}
++
++/* remove the opt_index element from decoded_options array.  */
++static unsigned int
++remove_option (struct cl_decoded_option *decoded_options,
++	       unsigned int decoded_options_count,
++	       unsigned int opt_index)
++{
++  gcc_assert (opt_index < decoded_options_count);
++  unsigned int i;
++  for (i = opt_index; i < decoded_options_count - 1; i++)
++    {
++      decoded_options[i] = decoded_options[i + 1];
++    }
++  return decoded_options_count - 1;
++}
++
++/* Handle the priority between fp-model, Ofast, and
++   ffast-math.  */
++static unsigned int
++handle_fp_model_driver (struct cl_decoded_option *decoded_options,
++			unsigned int decoded_options_count,
++			unsigned int fp_model_index,
++			unsigned int lang_mask)
++{
++  struct cl_decoded_option fp_model_opt = decoded_options[fp_model_index];
++  enum fp_model model = (enum fp_model) fp_model_opt.value;
++  if (model == FP_MODEL_PRECISE || model == FP_MODEL_STRICT)
++    {
++      /* If found Ofast, override Ofast with O3.  */
++      unsigned int Ofast_index;
++      Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
++				  0, decoded_options_count, OPT_Ofast);
++      while (Ofast_index != decoded_options_count)
++	{
++	  const char *tmp_argv = "-O3";
++	  decode_cmdline_option (&tmp_argv, lang_mask,
++				 &decoded_options[Ofast_index]);
++	  warning (0, "'-Ofast' is degraded to '-O3' due to %qs",
++		   fp_model_opt.orig_option_with_args_text);
++	  Ofast_index = find_opt_idx (decoded_options, decoded_options_count,
++				      0, decoded_options_count, OPT_Ofast);
++	}
++      /* If found ffast-math before fp-model=precise/strict
++	 it, cancel it.  */
++      unsigned int ffast_math_index;
++      ffast_math_index
++	= find_opt_idx (decoded_options, decoded_options_count, 0,
++			fp_model_index, OPT_ffast_math);
++      if (ffast_math_index != fp_model_index)
++	{
++	  decoded_options_count
++	    = remove_option (decoded_options, decoded_options_count,
++			     ffast_math_index);
++	  warning (0, "'-ffast-math' before %qs is canceled",
++		   fp_model_opt.orig_option_with_args_text);
++	}
++    }
++  if (model == FP_MODEL_FAST)
++    {
++      /* If found -fno-fast-math after fp-model=fast, cancel this one.  */
++      unsigned int fno_fast_math_index;
++      fno_fast_math_index
++	= find_opt_idx (decoded_options, decoded_options_count, fp_model_index,
++			decoded_options_count, OPT_ffast_math);
++      if (fno_fast_math_index != decoded_options_count
++	  && decoded_options[fno_fast_math_index].value == 0)
++	{
++	  decoded_options_count
++	    = remove_option (decoded_options, decoded_options_count,
++			     fp_model_index);
++	  warning (0, "'-fp-model=fast' before '-fno-fast-math' is canceled");
++	}
++    }
++  return decoded_options_count;
++}
++
+ /* Filter out options canceled by the ones after them.  */
+ 
+ static void
+ prune_options (struct cl_decoded_option **decoded_options,
+-	       unsigned int *decoded_options_count)
++	       unsigned int *decoded_options_count,
++	       unsigned int lang_mask)
+ {
+   unsigned int old_decoded_options_count = *decoded_options_count;
+   struct cl_decoded_option *old_decoded_options = *decoded_options;
+@@ -1005,6 +1103,8 @@ prune_options (struct cl_decoded_option
+   unsigned int fdiagnostics_color_idx = 0;
+ 
+   /* Remove arguments which are negated by others after them.  */
++
++  unsigned int fp_model_index = old_decoded_options_count;
+   new_decoded_options_count = 0;
+   for (i = 0; i < old_decoded_options_count; i++)
+     {
+@@ -1028,6 +1128,34 @@ prune_options (struct cl_decoded_option
+ 	  fdiagnostics_color_idx = i;
+ 	  continue;
+ 
++	case OPT_fp_model_:
++	  /* Only the last fp-model option will take effect.  */
++	  unsigned int next_fp_model_idx;
++	  next_fp_model_idx = find_opt_idx (old_decoded_options,
++					    old_decoded_options_count,
++					    i + 1,
++					    old_decoded_options_count,
++					    OPT_fp_model_);
++	  if (next_fp_model_idx != old_decoded_options_count)
++	    {
++	      /* Found more than one fp-model, cancel this one.  */
++	      if (old_decoded_options[i].value
++		  != old_decoded_options[next_fp_model_idx].value)
++		{
++		  warning (0, "%qs is overrided by %qs",
++			   old_decoded_options[i].
++			   orig_option_with_args_text,
++			   old_decoded_options[next_fp_model_idx].
++			   orig_option_with_args_text);
++		}
++	      break;
++	    }
++	  else
++	    {
++	      /* Found the last fp-model option.  */
++	      fp_model_index = new_decoded_options_count;
++	    }
++	  /* FALLTHRU.  */
+ 	default:
+ 	  gcc_assert (opt_idx < cl_options_count);
+ 	  option = &cl_options[opt_idx];
+@@ -1067,6 +1195,14 @@ keep:
+ 	  break;
+ 	}
+     }
++  if (fp_model_index < new_decoded_options_count)
++    {
++      new_decoded_options_count
++	= handle_fp_model_driver (new_decoded_options,
++				  new_decoded_options_count,
++				  fp_model_index,
++				  lang_mask);
++    }
+ 
+   if (fdiagnostics_color_idx >= 1)
+     {
diff --git a/enable-simd-math.patch b/enable-simd-math.patch
index 46f7d3d..7658fb8 100644
--- a/enable-simd-math.patch
+++ b/enable-simd-math.patch
@@ -1,7 +1,35 @@
+diff -Nurp a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
+--- a/gcc/c-family/c-opts.c	2021-01-07 17:32:31.856000000 +0800
++++ b/gcc/c-family/c-opts.c	2021-01-07 17:05:02.524000000 +0800
+@@ -783,6 +783,10 @@ c_common_post_options (const char **pfil
+   if (cpp_opts->deps.style == DEPS_NONE)
+     check_deps_environment_vars ();
+ 
++  if (flag_simdmath)
++    {
++      defer_opt (OPT_include, "simdmath.h");
++    }
+   handle_deferred_opts ();
+ 
+   sanitize_cpp_opts ();
+diff -Nurp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2021-01-07 17:30:43.912000000 +0800
++++ b/gcc/common.opt	2021-01-07 17:38:38.612000000 +0800
+@@ -1935,6 +1935,10 @@ fmath-errno
+ Common Report Var(flag_errno_math) Init(1) Optimization SetByCombined
+ Set errno after built-in math functions.
+ 
++fsimdmath
++Common Report Var(flag_simdmath) Init(0) Optimization
++Enable auto-vectorize math functions for mathlib.  This option will turn on -fno-math-errno and -fopenmp-simd.
++
+ fmax-errors=
+ Common Joined RejectNegative UInteger Var(flag_max_errors)
+ -fmax-errors=<number>	Maximum number of errors to report.
 diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
---- a/gcc/config/aarch64/aarch64.c	2020-07-06 17:20:30.368000000 +0800
-+++ b/gcc/config/aarch64/aarch64.c	2020-07-06 20:02:39.480000000 +0800
-@@ -18860,8 +18860,12 @@ aarch64_simd_clone_compute_vecsize_and_s
+--- a/gcc/config/aarch64/aarch64.c	2021-01-07 17:30:43.912000000 +0800
++++ b/gcc/config/aarch64/aarch64.c	2021-01-05 15:17:21.580000000 +0800
+@@ -21588,8 +21588,12 @@ aarch64_simd_clone_compute_vecsize_and_s
    elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
    if (clonei->simdlen == 0)
      {
@@ -17,9 +45,9 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
      }
    else
 diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
---- a/gcc/config/aarch64/aarch64.opt	2020-07-06 17:20:30.364000000 +0800
-+++ b/gcc/config/aarch64/aarch64.opt	2020-07-06 20:02:39.480000000 +0800
-@@ -186,6 +186,12 @@ precision of square root results to abou
+--- a/gcc/config/aarch64/aarch64.opt	2021-01-07 17:30:43.912000000 +0800
++++ b/gcc/config/aarch64/aarch64.opt	2021-01-05 15:17:21.448000000 +0800
+@@ -197,6 +197,12 @@ precision of square root results to abou
  single precision and to 32 bits for double precision.
  If enabled, it implies -mlow-precision-recip-sqrt.
  
@@ -32,3 +60,199 @@ diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
  mlow-precision-div
  Target Var(flag_mlow_precision_div) Optimization
  Enable the division approximation.  Enabling this reduces
+diff -Nurp a/gcc/fortran/scanner.c b/gcc/fortran/scanner.c
+--- a/gcc/fortran/scanner.c	2021-01-07 17:31:59.264000000 +0800
++++ b/gcc/fortran/scanner.c	2021-01-07 17:05:28.776000000 +0800
+@@ -2702,6 +2702,10 @@ gfc_new_file (void)
+       && !load_file (flag_pre_include, NULL, false))
+     exit (FATAL_EXIT_CODE);
+ 
++  if (flag_simdmath
++      && !load_file ("simdmath_f.h", NULL, false))
++    exit (FATAL_EXIT_CODE);
++
+   if (gfc_cpp_enabled ())
+     {
+       result = gfc_cpp_preprocess (gfc_source_file);
+diff -Nurp a/gcc/opts.c b/gcc/opts.c
+--- a/gcc/opts.c	2021-01-07 17:30:57.740000000 +0800
++++ b/gcc/opts.c	2021-01-05 15:17:21.068000000 +0800
+@@ -190,6 +190,7 @@ typedef char *char_p; /* For DEF_VEC_P.
+ static void handle_param (struct gcc_options *opts,
+ 			  struct gcc_options *opts_set, location_t loc,
+ 			  const char *carg);
++static void set_simdmath_flags (struct gcc_options *opts, int set);
+ static void set_debug_level (enum debug_info_type type, int extended,
+ 			     const char *arg, struct gcc_options *opts,
+ 			     struct gcc_options *opts_set,
+@@ -2420,6 +2421,10 @@ common_handle_option (struct gcc_options
+       dc->min_margin_width = value;
+       break;
+ 
++    case OPT_fsimdmath:
++      set_simdmath_flags (opts, value);
++      break;
++
+     case OPT_fdump_:
+       /* Deferred.  */
+       break;
+@@ -2843,6 +2848,18 @@ handle_param (struct gcc_options *opts,
+   free (arg);
+ }
+ 
++/* The following routines are used to set -fno-math-errno and -fopenmp-simd
++   to enable vector mathlib.  */
++static void
++set_simdmath_flags (struct gcc_options *opts, int set)
++{
++  if (set)
++    {
++      opts->x_flag_errno_math = 0;
++      opts->x_flag_openmp_simd = 1;
++    }
++}
++
+ /* Used to set the level of strict aliasing warnings in OPTS,
+    when no level is specified (i.e., when -Wstrict-aliasing, and not
+    -Wstrict-aliasing=level was given).
+diff -Nurp a/libgomp/configure b/libgomp/configure
+--- a/libgomp/configure	2021-01-07 17:40:08.216000000 +0800
++++ b/libgomp/configure	2021-01-07 16:29:45.628000000 +0800
+@@ -17258,7 +17258,7 @@ fi
+ 
+ 
+ 
+-ac_config_files="$ac_config_files omp.h omp_lib.h omp_lib.f90 libgomp_f.h"
++ac_config_files="$ac_config_files omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h"
+ 
+ ac_config_files="$ac_config_files Makefile testsuite/Makefile libgomp.spec"
+ 
+@@ -18426,6 +18426,8 @@ do
+     "gstdint.h") CONFIG_COMMANDS="$CONFIG_COMMANDS gstdint.h" ;;
+     "omp.h") CONFIG_FILES="$CONFIG_FILES omp.h" ;;
+     "omp_lib.h") CONFIG_FILES="$CONFIG_FILES omp_lib.h" ;;
++    "simdmath.h") CONFIG_FILES="$CONFIG_FILES simdmath.h" ;;
++    "simdmath_f.h") CONFIG_FILES="$CONFIG_FILES simdmath_f.h" ;;
+     "omp_lib.f90") CONFIG_FILES="$CONFIG_FILES omp_lib.f90" ;;
+     "libgomp_f.h") CONFIG_FILES="$CONFIG_FILES libgomp_f.h" ;;
+     "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;;
+diff -Nurp a/libgomp/configure.ac b/libgomp/configure.ac
+--- a/libgomp/configure.ac	2021-01-07 17:40:08.216000000 +0800
++++ b/libgomp/configure.ac	2021-01-07 16:26:26.560000000 +0800
+@@ -422,7 +422,7 @@ CFLAGS="$save_CFLAGS"
+ # Determine what GCC version number to use in filesystem paths.
+ GCC_BASE_VER
+ 
+-AC_CONFIG_FILES(omp.h omp_lib.h omp_lib.f90 libgomp_f.h)
++AC_CONFIG_FILES(omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h)
+ AC_CONFIG_FILES(Makefile testsuite/Makefile libgomp.spec)
+ AC_CONFIG_FILES([testsuite/libgomp-test-support.pt.exp:testsuite/libgomp-test-support.exp.in])
+ AC_OUTPUT
+diff -Nurp a/libgomp/Makefile.am b/libgomp/Makefile.am
+--- a/libgomp/Makefile.am	2021-01-07 17:40:08.168000000 +0800
++++ b/libgomp/Makefile.am	2021-01-07 16:27:39.776000000 +0800
+@@ -74,9 +74,9 @@ libgomp_la_SOURCES += openacc.f90
+ endif
+ 
+ nodist_noinst_HEADERS = libgomp_f.h
+-nodist_libsubinclude_HEADERS = omp.h openacc.h
++nodist_libsubinclude_HEADERS = omp.h openacc.h simdmath.h
+ if USE_FORTRAN
+-nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
++nodist_finclude_HEADERS = omp_lib.h simdmath_f.h  omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+ 	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
+ endif
+ 
+diff -Nurp a/libgomp/Makefile.in b/libgomp/Makefile.in
+--- a/libgomp/Makefile.in	2021-01-07 17:40:08.208000000 +0800
++++ b/libgomp/Makefile.in	2021-01-07 16:50:28.820000000 +0800
+@@ -145,7 +145,7 @@ am__CONFIG_DISTCLEAN_FILES = config.stat
+  configure.lineno config.status.lineno
+ mkinstalldirs = $(SHELL) $(top_srcdir)/../mkinstalldirs
+ CONFIG_HEADER = config.h
+-CONFIG_CLEAN_FILES = omp.h omp_lib.h omp_lib.f90 libgomp_f.h \
++CONFIG_CLEAN_FILES = omp.h omp_lib.h simdmath.h simdmath_f.h omp_lib.f90 libgomp_f.h \
+ 	libgomp.spec
+ CONFIG_CLEAN_VPATH_FILES =
+ am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+@@ -575,8 +575,8 @@ libgomp_la_SOURCES = alloc.c atomic.c ba
+ @PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBADD = libgomp.la $(PLUGIN_HSA_LIBS)
+ @PLUGIN_HSA_TRUE@libgomp_plugin_hsa_la_LIBTOOLFLAGS = --tag=disable-static
+ nodist_noinst_HEADERS = libgomp_f.h
+-nodist_libsubinclude_HEADERS = omp.h openacc.h
+-@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
++nodist_libsubinclude_HEADERS = omp.h openacc.h simdmath.h
++@USE_FORTRAN_TRUE@nodist_finclude_HEADERS = omp_lib.h simdmath_f.h omp_lib.f90 omp_lib.mod omp_lib_kinds.mod \
+ @USE_FORTRAN_TRUE@	openacc_lib.h openacc.f90 openacc.mod openacc_kinds.mod
+ 
+ LTLDFLAGS = $(shell $(SHELL) $(top_srcdir)/../libtool-ldflags $(LDFLAGS))
+@@ -668,6 +668,10 @@ omp.h: $(top_builddir)/config.status $(s
+ 	cd $(top_builddir) && $(SHELL) ./config.status $@
+ omp_lib.h: $(top_builddir)/config.status $(srcdir)/omp_lib.h.in
+ 	cd $(top_builddir) && $(SHELL) ./config.status $@
++simdmath_f.h: $(top_builddir)/config.status $(srcdir)/simdmath_f.h.in
++	cd $(top_builddir) && $(SHELL) ./config.status $@
++simdmath.h: $(top_builddir)/config.status $(srcdir)/simdmath.h.in
++	cd $(top_builddir) && $(SHELL) ./config.status $@
+ omp_lib.f90: $(top_builddir)/config.status $(srcdir)/omp_lib.f90.in
+ 	cd $(top_builddir) && $(SHELL) ./config.status $@
+ libgomp_f.h: $(top_builddir)/config.status $(srcdir)/libgomp_f.h.in
+diff -Nurp a/libgomp/simdmath_f.h.in b/libgomp/simdmath_f.h.in
+--- a/libgomp/simdmath_f.h.in	1970-01-01 08:00:00.000000000 +0800
++++ b/libgomp/simdmath_f.h.in	2021-01-07 16:13:23.196000000 +0800
+@@ -0,0 +1,11 @@
++!GCC$ builtin (cos) attributes simd (notinbranch)
++!GCC$ builtin (cosf) attributes simd (notinbranch)
++!GCC$ builtin (sin) attributes simd (notinbranch)
++!GCC$ builtin (sinf) attributes simd (notinbranch)
++!GCC$ builtin (exp) attributes simd (notinbranch)
++!GCC$ builtin (expf) attributes simd (notinbranch)
++!GCC$ builtin (exp2f) attributes simd (notinbranch)
++!GCC$ builtin (log) attributes simd (notinbranch)
++!GCC$ builtin (logf) attributes simd (notinbranch)
++!GCC$ builtin (pow) attributes simd (notinbranch)
++!GCC$ builtin (powf) attributes simd (notinbranch)
+diff -Nurp a/libgomp/simdmath.h.in b/libgomp/simdmath.h.in
+--- a/libgomp/simdmath.h.in	1970-01-01 08:00:00.000000000 +0800
++++ b/libgomp/simdmath.h.in	2021-01-07 16:13:56.144000000 +0800
+@@ -0,0 +1,40 @@
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++#pragma omp declare simd simdlen(2) notinbranch
++double cos (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float cosf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double sin (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float sinf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double exp (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float expf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double log (double x);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float logf (float x);
++
++#pragma omp declare simd simdlen(2) notinbranch
++double pow (double x, double y);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float powf (float x, float y);
++
++#pragma omp declare simd simdlen(4) notinbranch
++float exp2f (float x);
++
++#ifdef __cplusplus
++} // extern "C"
++#endif
diff --git a/fix-CTOR-vectorization.patch b/fix-CTOR-vectorization.patch
new file mode 100644
index 0000000..3cb30ad
--- /dev/null
+++ b/fix-CTOR-vectorization.patch
@@ -0,0 +1,18 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-fix-CTOR-vectorization.patch
+3d42842c07f4143042f3dcc39a050b262bcf1b55
+
+diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+index 9d17e3386fa..fb13af7965e 100644
+--- a/gcc/tree-vect-slp.c
++++ b/gcc/tree-vect-slp.c
+@@ -2257,6 +2257,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
+ 	      /* Value is defined in another basic block.  */
+ 	      if (!def_info)
+ 		return false;
++	      def_info = vect_stmt_to_vectorize (def_info);
+ 	      scalar_stmts.safe_push (def_info);
+ 	    }
+ 	  else
diff --git a/fix-ICE-in-eliminate-stmt.patch b/fix-ICE-in-eliminate-stmt.patch
new file mode 100644
index 0000000..7c5ee13
--- /dev/null
+++ b/fix-ICE-in-eliminate-stmt.patch
@@ -0,0 +1,79 @@
+commit ee80f0c6ba50ebf0300fb0cfe1079a1321295749
+Author: Richard Biener <rguenther@suse.de>
+Date:   Thu Oct 24 11:23:54 2019 +0000
+
+    re PR tree-optimization/92203 (ICE in eliminate_stmt, at tree-ssa-sccvn.c:5492)
+    
+    2019-10-24  Richard Biener  <rguenther@suse.de>
+    
+            PR tree-optimization/92203
+            * treee-ssa-sccvn.c (eliminate_dom_walker::eliminate_stmt):
+            Skip eliminating conversion stmts inserted by insertion.
+    
+            * gcc.dg/torture/pr92203.c: New testcase.
+    
+    From-SVN: r277374
+
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92203.c b/gcc/testsuite/gcc.dg/torture/pr92203.c
+new file mode 100644
+index 00000000000..c752969d5e5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92203.c
+@@ -0,0 +1,37 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-Wno-div-by-zero" } */
++
++unsigned long int rr;
++
++void
++cw (int z9)
++{
++  int m5;
++  unsigned long int vz = 0;
++  long int *na;
++
++  if (z9 == 0)
++    rr = 0;
++  else
++    {
++      na = (long int *) &m5;
++      for (*na = 0; *na < 1; ++*na)
++	{
++	  na = (long int *) &vz;
++	  rr /= 0;
++	}
++    }
++
++  m5 = rr / 5;
++  ++vz;
++  if (vz != 0)
++    while (z9 < 1)
++      {
++	if (m5 >= 0)
++	  rr += m5;
++
++	na = (long int *) &rr;
++	if (*na >= 0)
++	  rr = 0;
++      }
++}
+diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+index 57331ab44dc..3872168a4ed 100644
+--- a/gcc/tree-ssa-sccvn.c
++++ b/gcc/tree-ssa-sccvn.c
+@@ -5459,8 +5459,13 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi)
+ 
+ 	  /* If this is an assignment from our leader (which
+ 	     happens in the case the value-number is a constant)
+-	     then there is nothing to do.  */
+-	  if (gimple_assign_single_p (stmt)
++	     then there is nothing to do.  Likewise if we run into
++	     inserted code that needed a conversion because of
++	     our type-agnostic value-numbering of loads.  */
++	  if ((gimple_assign_single_p (stmt)
++	       || (is_gimple_assign (stmt)
++		   && (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (stmt))
++		       || gimple_assign_rhs_code (stmt) == VIEW_CONVERT_EXPR)))
+ 	      && sprime == gimple_assign_rhs1 (stmt))
+ 	    return;
+ 
diff --git a/fix-ICE-in-vect.patch b/fix-ICE-in-vect.patch
new file mode 100644
index 0000000..3b59dd2
--- /dev/null
+++ b/fix-ICE-in-vect.patch
@@ -0,0 +1,1037 @@
+This backport contains 5 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-Improve-tree-vect-patterns.c-handling-of-boolean-com.patch
+ce19a4822794992097deab96bf15bf78ff481ea1
+0002-Make-vectorizable_operation-punt-early-on-codes-it-d.patch
+4177e933b309408e69eb5561fee7a3cc5e6f8899
+0003-Make-vect_get_mask_type_for_stmt-take-a-group-size.patch
+1c5d68a677b076262c5508e6d4fbdb765cba2d2f
+0004-Record-the-vector-mask-precision-in-stmt_vec_info.patch
+0c3ea6b3424ee4d32d97ca5d7453891b587b3132
+0005-Don-t-defer-choice-of-vector-type-for-bools-PR-92596.patch
+02d895504cc59be06fc3f7ec0cfd4eb160561211
+
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-43.c b/gcc/testsuite/gcc.dg/vect/bb-slp-43.c
+--- a/gcc/testsuite/gcc.dg/vect/bb-slp-43.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-43.c	2021-02-08 09:21:04.487633230 +0800
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++
++void
++f (int *restrict x, short *restrict y)
++{
++  x[0] = x[0] == 1 & y[0] == 2;
++  x[1] = x[1] == 1 & y[1] == 2;
++  x[2] = x[2] == 1 & y[2] == 2;
++  x[3] = x[3] == 1 & y[3] == 2;
++  x[4] = x[4] == 1 & y[4] == 2;
++  x[5] = x[5] == 1 & y[5] == 2;
++  x[6] = x[6] == 1 & y[6] == 2;
++  x[7] = x[7] == 1 & y[7] == 2;
++}
++
++/* { dg-final { scan-tree-dump-not "mixed mask and nonmask" "slp2" } } */
++/* { dg-final { scan-tree-dump-not "vector operands from scalars" "slp2" { target { { vect_int && vect_bool_cmp } && { vect_unpack && vect_hw_misalign } } xfail vect_variable_length } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c
+--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr92596.c	2021-02-08 09:21:04.487633230 +0800
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-O3" } */
++
++typedef struct {
++  long n[5];
++} secp256k1_fe;
++
++secp256k1_fe a;
++
++void fn1(int p1) { a.n[0] = a.n[1] = a.n[2] = p1; }
++void fn2() {
++  int b;
++  fn1(!b);
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c
+--- a/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/vect-bool-cmp-2.c	2021-02-08 09:21:04.487633230 +0800
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++
++void
++f (_Bool *restrict x, _Bool *restrict y)
++{
++  for (int i = 0; i < 128; ++i)
++    x[i] = x[i] == y[i];
++}
++
++/* { dg-final { scan-tree-dump "loop vectorized" "vect" { target vect_bool_cmp } } } */
+diff -Nurp a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
+--- a/gcc/testsuite/lib/target-supports.exp	2021-02-08 09:24:27.611633230 +0800
++++ b/gcc/testsuite/lib/target-supports.exp	2021-02-08 09:21:03.859633230 +0800
+@@ -5668,6 +5668,16 @@ proc check_effective_target_vect_bswap {
+ 	     || [istarget amdgcn-*-*] }}]
+ }
+ 
++# Return 1 if the target supports comparison of bool vectors for at
++# least one vector length.
++
++proc check_effective_target_vect_bool_cmp { } {
++    return [check_cached_effective_target_indexed vect_bool_cmp {
++      expr { [istarget i?86-*-*] || [istarget x86_64-*-*]
++	     || [istarget aarch64*-*-*]
++	     || [is-effective-target arm_neon] }}]
++}
++
+ # Return 1 if the target supports hardware vector shift operation for char.
+ 
+ proc check_effective_target_vect_shift_char { } {
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2021-02-08 09:24:26.471633230 +0800
++++ b/gcc/tree-vect-loop.c	2021-02-08 09:21:02.719633230 +0800
+@@ -164,8 +164,7 @@ static stmt_vec_info vect_is_simple_redu
+ static opt_result
+ vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info,
+ 			      bool vectype_maybe_set_p,
+-			      poly_uint64 *vf,
+-			      vec<stmt_vec_info > *mask_producers)
++			      poly_uint64 *vf)
+ {
+   gimple *stmt = stmt_info->stmt;
+ 
+@@ -193,8 +192,6 @@ vect_determine_vf_for_stmt_1 (stmt_vec_i
+ 	gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
+ 		     || vectype_maybe_set_p)
+ 		    && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
+-      else if (stmt_vectype == boolean_type_node)
+-	mask_producers->safe_push (stmt_info);
+       else
+ 	STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
+     }
+@@ -207,21 +204,17 @@ vect_determine_vf_for_stmt_1 (stmt_vec_i
+ 
+ /* Subroutine of vect_determine_vectorization_factor.  Set the vector
+    types of STMT_INFO and all attached pattern statements and update
+-   the vectorization factor VF accordingly.  If some of the statements
+-   produce a mask result whose vector type can only be calculated later,
+-   add them to MASK_PRODUCERS.  Return true on success or false if
+-   something prevented vectorization.  */
++   the vectorization factor VF accordingly.  Return true on success
++   or false if something prevented vectorization.  */
+ 
+ static opt_result
+-vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf,
+-			    vec<stmt_vec_info > *mask_producers)
++vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf)
+ {
+   vec_info *vinfo = stmt_info->vinfo;
+   if (dump_enabled_p ())
+     dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
+ 		     stmt_info->stmt);
+-  opt_result res
+-    = vect_determine_vf_for_stmt_1 (stmt_info, false, vf, mask_producers);
++  opt_result res = vect_determine_vf_for_stmt_1 (stmt_info, false, vf);
+   if (!res)
+     return res;
+ 
+@@ -240,10 +233,7 @@ vect_determine_vf_for_stmt (stmt_vec_inf
+ 	    dump_printf_loc (MSG_NOTE, vect_location,
+ 			     "==> examining pattern def stmt: %G",
+ 			     def_stmt_info->stmt);
+-	  if (!vect_determine_vf_for_stmt_1 (def_stmt_info, true,
+-					     vf, mask_producers))
+-	  res = vect_determine_vf_for_stmt_1 (def_stmt_info, true,
+-					      vf, mask_producers);
++	  res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, vf);
+ 	  if (!res)
+ 	    return res;
+ 	}
+@@ -252,7 +242,7 @@ vect_determine_vf_for_stmt (stmt_vec_inf
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+ 			 "==> examining pattern statement: %G",
+ 			 stmt_info->stmt);
+-      res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf, mask_producers);
++      res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf);
+       if (!res)
+ 	return res;
+     }
+@@ -297,7 +287,6 @@ vect_determine_vectorization_factor (loo
+   tree vectype;
+   stmt_vec_info stmt_info;
+   unsigned i;
+-  auto_vec<stmt_vec_info> mask_producers;
+ 
+   DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
+ 
+@@ -355,8 +344,7 @@ vect_determine_vectorization_factor (loo
+ 	{
+ 	  stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
+ 	  opt_result res
+-	    = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor,
+-					  &mask_producers);
++	    = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor);
+ 	  if (!res)
+ 	    return res;
+         }
+@@ -374,16 +362,6 @@ vect_determine_vectorization_factor (loo
+     return opt_result::failure_at (vect_location,
+ 				   "not vectorized: unsupported data-type\n");
+   LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
+-
+-  for (i = 0; i < mask_producers.length (); i++)
+-    {
+-      stmt_info = mask_producers[i];
+-      opt_tree mask_type = vect_get_mask_type_for_stmt (stmt_info);
+-      if (!mask_type)
+-	return opt_result::propagate_failure (mask_type);
+-      STMT_VINFO_VECTYPE (stmt_info) = mask_type;
+-    }
+-
+   return opt_result::success ();
+ }
+ 
+diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+--- a/gcc/tree-vectorizer.h	2021-02-08 09:24:26.463633230 +0800
++++ b/gcc/tree-vectorizer.h	2021-02-08 09:21:02.619633230 +0800
+@@ -1080,6 +1080,23 @@ struct _stmt_vec_info {
+   unsigned int operation_precision;
+   signop operation_sign;
+ 
++  /* If the statement produces a boolean result, this value describes
++     how we should choose the associated vector type.  The possible
++     values are:
++
++     - an integer precision N if we should use the vector mask type
++       associated with N-bit integers.  This is only used if all relevant
++       input booleans also want the vector mask type for N-bit integers,
++       or if we can convert them into that form by pattern-matching.
++
++     - ~0U if we considered choosing a vector mask type but decided
++       to treat the boolean as a normal integer type instead.
++
++     - 0 otherwise.  This means either that the operation isn't one that
++       could have a vector mask type (and so should have a normal vector
++       type instead) or that we simply haven't made a choice either way.  */
++  unsigned int mask_precision;
++
+   /* True if this is only suitable for SLP vectorization.  */
+   bool slp_vect_only_p;
+ };
+@@ -1236,6 +1253,15 @@ nested_in_vect_loop_p (struct loop *loop
+ 	  && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
+ }
+ 
++/* Return true if STMT_INFO should produce a vector mask type rather than
++   a normal nonmask type.  */
++
++static inline bool
++vect_use_mask_type_p (stmt_vec_info stmt_info)
++{
++  return stmt_info->mask_precision && stmt_info->mask_precision != ~0U;
++}
++
+ /* Return TRUE if a statement represented by STMT_INFO is a part of a
+    pattern.  */
+ 
+@@ -1620,7 +1646,7 @@ extern tree get_related_vectype_for_scal
+ 						 poly_uint64 = 0);
+ extern tree get_vectype_for_scalar_type (vec_info *, tree, unsigned int = 0);
+ extern tree get_vectype_for_scalar_type (vec_info *, tree, slp_tree);
+-extern tree get_mask_type_for_scalar_type (vec_info *, tree, slp_tree = 0);
++extern tree get_mask_type_for_scalar_type (vec_info *, tree, unsigned int = 0);
+ extern tree get_same_sized_vectype (tree, tree);
+ extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
+ extern bool vect_get_loop_mask_type (loop_vec_info);
+@@ -1673,7 +1699,7 @@ extern gcall *vect_gen_while (tree, tree
+ extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
+ extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *,
+ 						  tree *, unsigned int = 0);
+-extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, slp_tree = 0);
++extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info, unsigned int = 0);
+ 
+ /* In tree-vect-data-refs.c.  */
+ extern bool vect_can_force_dr_alignment_p (const_tree, poly_uint64);
+diff -Nurp a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
+--- a/gcc/tree-vect-patterns.c	2021-02-08 09:24:26.467633230 +0800
++++ b/gcc/tree-vect-patterns.c	2021-02-08 09:21:02.543633230 +0800
+@@ -112,7 +112,12 @@ vect_init_pattern_stmt (gimple *pattern_
+   STMT_VINFO_DEF_TYPE (pattern_stmt_info)
+     = STMT_VINFO_DEF_TYPE (orig_stmt_info);
+   if (!STMT_VINFO_VECTYPE (pattern_stmt_info))
+-    STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
++    {
++      gcc_assert (VECTOR_BOOLEAN_TYPE_P (vectype)
++		  == vect_use_mask_type_p (orig_stmt_info));
++      STMT_VINFO_VECTYPE (pattern_stmt_info) = vectype;
++      pattern_stmt_info->mask_precision = orig_stmt_info->mask_precision;
++    }
+   return pattern_stmt_info;
+ }
+ 
+@@ -131,17 +136,25 @@ vect_set_pattern_stmt (gimple *pattern_s
+ 
+ /* Add NEW_STMT to STMT_INFO's pattern definition statements.  If VECTYPE
+    is nonnull, record that NEW_STMT's vector type is VECTYPE, which might
+-   be different from the vector type of the final pattern statement.  */
++   be different from the vector type of the final pattern statement.
++   If VECTYPE is a mask type, SCALAR_TYPE_FOR_MASK is the scalar type
++   from which it was derived.  */
+ 
+ static inline void
+ append_pattern_def_seq (stmt_vec_info stmt_info, gimple *new_stmt,
+-			tree vectype = NULL_TREE)
++			tree vectype = NULL_TREE,
++			tree scalar_type_for_mask = NULL_TREE)
+ {
++  gcc_assert (!scalar_type_for_mask
++	      == (!vectype || !VECTOR_BOOLEAN_TYPE_P (vectype)));
+   vec_info *vinfo = stmt_info->vinfo;
+   if (vectype)
+     {
+       stmt_vec_info new_stmt_info = vinfo->add_stmt (new_stmt);
+       STMT_VINFO_VECTYPE (new_stmt_info) = vectype;
++      if (scalar_type_for_mask)
++	new_stmt_info->mask_precision
++	  = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (scalar_type_for_mask));
+     }
+   gimple_seq_add_stmt_without_update (&STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
+ 				      new_stmt);
+@@ -3875,107 +3888,22 @@ adjust_bool_stmts (hash_set <gimple *> &
+   return gimple_assign_lhs (pattern_stmt);
+ }
+ 
+-/* Helper for search_type_for_mask.  */
++/* Return the proper type for converting bool VAR into
++   an integer value or NULL_TREE if no such type exists.
++   The type is chosen so that the converted value has the
++   same number of elements as VAR's vector type.  */
+ 
+ static tree
+-search_type_for_mask_1 (tree var, vec_info *vinfo,
+-			hash_map<gimple *, tree> &cache)
++integer_type_for_mask (tree var, vec_info *vinfo)
+ {
+-  tree rhs1;
+-  enum tree_code rhs_code;
+-  tree res = NULL_TREE, res2;
+-
+   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (var)))
+     return NULL_TREE;
+ 
+   stmt_vec_info def_stmt_info = vect_get_internal_def (vinfo, var);
+-  if (!def_stmt_info)
++  if (!def_stmt_info || !vect_use_mask_type_p (def_stmt_info))
+     return NULL_TREE;
+ 
+-  gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt);
+-  if (!def_stmt)
+-    return NULL_TREE;
+-
+-  tree *c = cache.get (def_stmt);
+-  if (c)
+-    return *c;
+-
+-  rhs_code = gimple_assign_rhs_code (def_stmt);
+-  rhs1 = gimple_assign_rhs1 (def_stmt);
+-
+-  switch (rhs_code)
+-    {
+-    case SSA_NAME:
+-    case BIT_NOT_EXPR:
+-    CASE_CONVERT:
+-      res = search_type_for_mask_1 (rhs1, vinfo, cache);
+-      break;
+-
+-    case BIT_AND_EXPR:
+-    case BIT_IOR_EXPR:
+-    case BIT_XOR_EXPR:
+-      res = search_type_for_mask_1 (rhs1, vinfo, cache);
+-      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt), vinfo,
+-				     cache);
+-      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
+-	res = res2;
+-      break;
+-
+-    default:
+-      if (TREE_CODE_CLASS (rhs_code) == tcc_comparison)
+-	{
+-	  tree comp_vectype, mask_type;
+-
+-	  if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
+-	    {
+-	      res = search_type_for_mask_1 (rhs1, vinfo, cache);
+-	      res2 = search_type_for_mask_1 (gimple_assign_rhs2 (def_stmt),
+-					     vinfo, cache);
+-	      if (!res || (res2 && TYPE_PRECISION (res) > TYPE_PRECISION (res2)))
+-		res = res2;
+-	      break;
+-	    }
+-
+-	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+-	  if (comp_vectype == NULL_TREE)
+-	    {
+-	      res = NULL_TREE;
+-	      break;
+-	    }
+-
+-	  mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+-	  if (!mask_type
+-	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
+-	    {
+-	      res = NULL_TREE;
+-	      break;
+-	    }
+-
+-	  if (TREE_CODE (TREE_TYPE (rhs1)) != INTEGER_TYPE
+-	      || !TYPE_UNSIGNED (TREE_TYPE (rhs1)))
+-	    {
+-	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
+-	      res = build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
+-	    }
+-	  else
+-	    res = TREE_TYPE (rhs1);
+-	}
+-    }
+-
+-  cache.put (def_stmt, res);
+-  return res;
+-}
+-
+-/* Return the proper type for converting bool VAR into
+-   an integer value or NULL_TREE if no such type exists.
+-   The type is chosen so that converted value has the
+-   same number of elements as VAR's vector type.  */
+-
+-static tree
+-search_type_for_mask (tree var, vec_info *vinfo)
+-{
+-  hash_map<gimple *, tree> cache;
+-  return search_type_for_mask_1 (var, vinfo, cache);
++  return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
+ }
+ 
+ /* Function vect_recog_bool_pattern
+@@ -4067,7 +3995,7 @@ vect_recog_bool_pattern (stmt_vec_info s
+ 	}
+       else
+ 	{
+-	  tree type = search_type_for_mask (var, vinfo);
++	  tree type = integer_type_for_mask (var, vinfo);
+ 	  tree cst0, cst1, tmp;
+ 
+ 	  if (!type)
+@@ -4152,7 +4080,7 @@ vect_recog_bool_pattern (stmt_vec_info s
+ 	rhs = adjust_bool_stmts (bool_stmts, TREE_TYPE (vectype), stmt_vinfo);
+       else
+ 	{
+-	  tree type = search_type_for_mask (var, vinfo);
++	  tree type = integer_type_for_mask (var, vinfo);
+ 	  tree cst0, cst1, new_vectype;
+ 
+ 	  if (!type)
+@@ -4207,7 +4135,7 @@ build_mask_conversion (tree mask, tree v
+   masktype = truth_type_for (vectype);
+   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
+   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
+-  append_pattern_def_seq (stmt_vinfo, stmt, masktype);
++  append_pattern_def_seq (stmt_vinfo, stmt, masktype, TREE_TYPE (vectype));
+ 
+   return tmp;
+ }
+@@ -4275,7 +4203,7 @@ vect_recog_mask_conversion_pattern (stmt
+ 	}
+ 
+       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
+-      tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
++      tree mask_arg_type = integer_type_for_mask (mask_arg, vinfo);
+       if (!mask_arg_type)
+ 	return NULL;
+       vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
+@@ -4328,7 +4256,7 @@ vect_recog_mask_conversion_pattern (stmt
+ 
+       if (TREE_CODE (rhs1) == SSA_NAME)
+ 	{
+-	  rhs1_type = search_type_for_mask (rhs1, vinfo);
++	  rhs1_type = integer_type_for_mask (rhs1, vinfo);
+ 	  if (!rhs1_type)
+ 	    return NULL;
+ 	}
+@@ -4352,8 +4280,8 @@ vect_recog_mask_conversion_pattern (stmt
+ 	  rhs1_op1 = TREE_OPERAND (rhs1, 1);
+ 	  if (!rhs1_op0 || !rhs1_op1)
+ 	    return NULL;
+-	  rhs1_op0_type = search_type_for_mask (rhs1_op0, vinfo);
+-	  rhs1_op1_type = search_type_for_mask (rhs1_op1, vinfo);
++	  rhs1_op0_type = integer_type_for_mask (rhs1_op0, vinfo);
++	  rhs1_op1_type = integer_type_for_mask (rhs1_op1, vinfo);
+ 
+ 	  if (!rhs1_op0_type)
+ 	    rhs1_type = TREE_TYPE (rhs1_op0);
+@@ -4441,7 +4369,8 @@ vect_recog_mask_conversion_pattern (stmt
+ 	  pattern_stmt = gimple_build_assign (tmp, TREE_CODE (rhs1),
+ 			 		      rhs1_op0, rhs1_op1);
+ 	  rhs1 = tmp;
+-	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
++	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
++				  rhs1_type);
+ 	}
+ 
+       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
+@@ -4474,8 +4403,8 @@ vect_recog_mask_conversion_pattern (stmt
+ 
+   rhs2 = gimple_assign_rhs2 (last_stmt);
+ 
+-  rhs1_type = search_type_for_mask (rhs1, vinfo);
+-  rhs2_type = search_type_for_mask (rhs2, vinfo);
++  rhs1_type = integer_type_for_mask (rhs1, vinfo);
++  rhs2_type = integer_type_for_mask (rhs2, vinfo);
+ 
+   if (!rhs1_type || !rhs2_type
+       || TYPE_PRECISION (rhs1_type) == TYPE_PRECISION (rhs2_type))
+@@ -4558,7 +4487,7 @@ static tree
+ vect_convert_mask_for_vectype (tree mask, tree vectype,
+ 			       stmt_vec_info stmt_info, vec_info *vinfo)
+ {
+-  tree mask_type = search_type_for_mask (mask, vinfo);
++  tree mask_type = integer_type_for_mask (mask, vinfo);
+   if (mask_type)
+     {
+       tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
+@@ -4997,6 +4926,148 @@ vect_determine_precisions_from_users (st
+   vect_set_min_input_precision (stmt_info, type, min_input_precision);
+ }
+ 
++/* Return true if the statement described by STMT_INFO sets a boolean
++   SSA_NAME and if we know how to vectorize this kind of statement using
++   vector mask types.  */
++
++static bool
++possible_vector_mask_operation_p (stmt_vec_info stmt_info)
++{
++  tree lhs = gimple_get_lhs (stmt_info->stmt);
++  if (!lhs
++      || TREE_CODE (lhs) != SSA_NAME
++      || !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (lhs)))
++    return false;
++
++  if (gassign *assign = dyn_cast <gassign *> (stmt_info->stmt))
++    {
++      tree_code rhs_code = gimple_assign_rhs_code (assign);
++      switch (rhs_code)
++	{
++	CASE_CONVERT:
++	case SSA_NAME:
++	case BIT_NOT_EXPR:
++	case BIT_IOR_EXPR:
++	case BIT_XOR_EXPR:
++	case BIT_AND_EXPR:
++	  return true;
++
++	default:
++	  return TREE_CODE_CLASS (rhs_code) == tcc_comparison;
++	}
++    }
++  return false;
++}
++
++/* If STMT_INFO sets a boolean SSA_NAME, see whether we should use
++   a vector mask type instead of a normal vector type.  Record the
++   result in STMT_INFO->mask_precision.  */
++
++static void
++vect_determine_mask_precision (stmt_vec_info stmt_info)
++{
++  vec_info *vinfo = stmt_info->vinfo;
++
++  if (!possible_vector_mask_operation_p (stmt_info)
++      || stmt_info->mask_precision)
++    return;
++
++  auto_vec<stmt_vec_info, 32> worklist;
++  worklist.quick_push (stmt_info);
++  while (!worklist.is_empty ())
++    {
++      stmt_info = worklist.last ();
++      unsigned int orig_length = worklist.length ();
++
++      /* If at least one boolean input uses a vector mask type,
++	 pick the mask type with the narrowest elements.
++
++	 ??? This is the traditional behavior.  It should always produce
++	 the smallest number of operations, but isn't necessarily the
++	 optimal choice.  For example, if we have:
++
++	   a = b & c
++
++	 where:
++
++	 - the user of a wants it to have a mask type for 16-bit elements (M16)
++	 - b also uses M16
++	 - c uses a mask type for 8-bit elements (M8)
++
++	 then picking M8 gives:
++
++	 - 1 M16->M8 pack for b
++	 - 1 M8 AND for a
++	 - 2 M8->M16 unpacks for the user of a
++
++	 whereas picking M16 would have given:
++
++	 - 2 M8->M16 unpacks for c
++	 - 2 M16 ANDs for a
++
++	 The number of operations are equal, but M16 would have given
++	 a shorter dependency chain and allowed more ILP.  */
++      unsigned int precision = ~0U;
++      gassign *assign = as_a <gassign *> (stmt_info->stmt);
++      unsigned int nops = gimple_num_ops (assign);
++      for (unsigned int i = 1; i < nops; ++i)
++	{
++	  tree rhs = gimple_op (assign, i);
++	  if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs)))
++	    continue;
++
++	  stmt_vec_info def_stmt_info = vinfo->lookup_def (rhs);
++	  if (!def_stmt_info)
++	    /* Don't let external or constant operands influence the choice.
++	       We can convert them to whichever vector type we pick.  */
++	    continue;
++
++	  if (def_stmt_info->mask_precision)
++	    {
++	      if (precision > def_stmt_info->mask_precision)
++		precision = def_stmt_info->mask_precision;
++	    }
++	  else if (possible_vector_mask_operation_p (def_stmt_info))
++	    worklist.safe_push (def_stmt_info);
++	}
++
++      /* Defer the choice if we need to visit operands first.  */
++      if (orig_length != worklist.length ())
++	continue;
++
++      /* If the statement compares two values that shouldn't use vector masks,
++	 try comparing the values as normal scalars instead.  */
++      tree_code rhs_code = gimple_assign_rhs_code (assign);
++      if (precision == ~0U
++	  && TREE_CODE_CLASS (rhs_code) == tcc_comparison)
++	{
++	  tree rhs1_type = TREE_TYPE (gimple_assign_rhs1 (assign));
++	  scalar_mode mode;
++	  tree vectype, mask_type;
++	  if (is_a <scalar_mode> (TYPE_MODE (rhs1_type), &mode)
++	      && (vectype = get_vectype_for_scalar_type (vinfo, rhs1_type))
++	      && (mask_type = get_mask_type_for_scalar_type (vinfo, rhs1_type))
++	      && expand_vec_cmp_expr_p (vectype, mask_type, rhs_code))
++	    precision = GET_MODE_BITSIZE (mode);
++	}
++
++      if (dump_enabled_p ())
++	{
++	  if (precision == ~0U)
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "using normal nonmask vectors for %G",
++			     stmt_info->stmt);
++	  else
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "using boolean precision %d for %G",
++			     precision, stmt_info->stmt);
++	}
++
++      stmt_info->mask_precision = precision;
++      worklist.pop ();
++    }
++}
++
+ /* Handle vect_determine_precisions for STMT_INFO, given that we
+    have already done so for the users of its result.  */
+ 
+@@ -5009,6 +5080,7 @@ vect_determine_stmt_precisions (stmt_vec
+       vect_determine_precisions_from_range (stmt_info, stmt);
+       vect_determine_precisions_from_users (stmt_info, stmt);
+     }
++  vect_determine_mask_precision (stmt_info);
+ }
+ 
+ /* Walk backwards through the vectorizable region to determine the
++-search_type_for_mask (tree var, vec_info *vinfo)
++-{
++-  hash_map<gimple *, tree> cache;
++-  return search_type_for_mask_1 (var, vinfo, cache);
+++  return build_nonstandard_integer_type (def_stmt_info->mask_precision, 1);
++ }
++ 
++ /* Function vect_recog_bool_pattern
++@@ -4371,7 +4298,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
++ 
++ 	     it is better for b1 and b2 to use the mask type associated
++ 	     with int elements rather bool (byte) elements.  */
++-	  rhs1_type = search_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
+++	  rhs1_type = integer_type_for_mask (TREE_OPERAND (rhs1, 0), vinfo);
++ 	  if (!rhs1_type)
++ 	    rhs1_type = TREE_TYPE (TREE_OPERAND (rhs1, 0));
++ 	}
++@@ -4427,7 +4354,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
++ 	  tmp = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
++ 	  pattern_stmt = gimple_build_assign (tmp, rhs1);
++ 	  rhs1 = tmp;
++-	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2);
+++	  append_pattern_def_seq (stmt_vinfo, pattern_stmt, vectype2,
+++				  rhs1_type);
++ 	}
++ 
++       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype1),
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2021-02-08 09:24:26.471633230 +0800
++++ b/gcc/tree-vect-slp.c	2021-02-08 09:21:02.719633230 +0800
+@@ -906,17 +906,6 @@ vect_build_slp_tree_1 (unsigned char *sw
+ 	      || rhs_code == LROTATE_EXPR
+ 	      || rhs_code == RROTATE_EXPR)
+ 	    {
+-	      if (vectype == boolean_type_node)
+-		{
+-		  if (dump_enabled_p ())
+-		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-				     "Build SLP failed: shift of a"
+-				     " boolean.\n");
+-		  /* Fatal mismatch.  */
+-		  matches[0] = false;
+-		  return false;
+-		}
+-
+ 	      vec_mode = TYPE_MODE (vectype);
+ 
+ 	      /* First see if we have a vector/vector shift.  */
+@@ -1137,9 +1126,8 @@ vect_build_slp_tree_1 (unsigned char *sw
+   if (alt_stmt_code != ERROR_MARK
+       && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
+     {
+-      if (vectype == boolean_type_node
+-	  || !vect_two_operations_perm_ok_p (stmts, group_size,
+-					     vectype, alt_stmt_code))
++      if (!vect_two_operations_perm_ok_p (stmts, group_size,
++					  vectype, alt_stmt_code))
+ 	{
+ 	  for (i = 0; i < group_size; ++i)
+ 	    if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
+@@ -2746,24 +2734,6 @@ vect_slp_analyze_node_operations_1 (vec_
+   stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+   gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
+ 
+-  /* For BB vectorization vector types are assigned here.
+-     Memory accesses already got their vector type assigned
+-     in vect_analyze_data_refs.  */
+-  bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+-  if (bb_vinfo && STMT_VINFO_VECTYPE (stmt_info) == boolean_type_node)
+-    {
+-      tree vectype = vect_get_mask_type_for_stmt (stmt_info, node);
+-      if (!vectype)
+-	/* vect_get_mask_type_for_stmt has already explained the
+-	   failure.  */
+-	return false;
+-
+-      stmt_vec_info sstmt_info;
+-      unsigned int i;
+-      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
+-	STMT_VINFO_VECTYPE (sstmt_info) = vectype;
+-    }
+-
+   /* Calculate the number of vector statements to be created for the
+      scalar stmts in this node.  For SLP reductions it is equal to the
+      number of vector statements in the children (which has already been
+diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2021-02-08 09:24:26.371633230 +0800
++++ b/gcc/tree-vect-stmts.c	2021-02-08 09:21:02.543633230 +0800
+@@ -3334,6 +3334,15 @@ vectorizable_call (stmt_vec_info stmt_in
+       return false;
+     }
+ 
++  if (VECTOR_BOOLEAN_TYPE_P (vectype_out)
++      != VECTOR_BOOLEAN_TYPE_P (vectype_in))
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "mixed mask and nonmask vector types\n");
++      return false;
++    }
++
+   /* FORNOW */
+   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
+   nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+@@ -5938,6 +5947,21 @@ vectorizable_operation (stmt_vec_info st
+ 
+   orig_code = code = gimple_assign_rhs_code (stmt);
+ 
++  /* Shifts are handled in vectorizable_shift.  */
++  if (code == LSHIFT_EXPR
++      || code == RSHIFT_EXPR
++      || code == LROTATE_EXPR
++      || code == RROTATE_EXPR)
++   return false;
++
++  /* Comparisons are handled in vectorizable_comparison.  */
++  if (TREE_CODE_CLASS (code) == tcc_comparison)
++    return false;
++
++  /* Conditions are handled in vectorizable_condition.  */
++  if (code == COND_EXPR)
++    return false;
++
+   /* For pointer addition and subtraction, we should use the normal
+      plus and minus for the vector operation.  */
+   if (code == POINTER_PLUS_EXPR)
+@@ -5961,7 +5985,8 @@ vectorizable_operation (stmt_vec_info st
+ 
+   /* Most operations cannot handle bit-precision types without extra
+      truncations.  */
+-  if (!VECTOR_BOOLEAN_TYPE_P (vectype_out)
++  bool mask_op_p = VECTOR_BOOLEAN_TYPE_P (vectype_out);
++  if (!mask_op_p
+       && !type_has_mode_precision_p (TREE_TYPE (scalar_dest))
+       /* Exception are bitwise binary operations.  */
+       && code != BIT_IOR_EXPR
+@@ -6023,10 +6048,11 @@ vectorizable_operation (stmt_vec_info st
+   if (maybe_ne (nunits_out, nunits_in))
+     return false;
+ 
++  tree vectype2 = NULL_TREE, vectype3 = NULL_TREE;
+   if (op_type == binary_op || op_type == ternary_op)
+     {
+       op1 = gimple_assign_rhs2 (stmt);
+-      if (!vect_is_simple_use (op1, vinfo, &dt[1]))
++      if (!vect_is_simple_use (op1, vinfo, &dt[1], &vectype2))
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+@@ -6037,7 +6063,7 @@ vectorizable_operation (stmt_vec_info st
+   if (op_type == ternary_op)
+     {
+       op2 = gimple_assign_rhs3 (stmt);
+-      if (!vect_is_simple_use (op2, vinfo, &dt[2]))
++      if (!vect_is_simple_use (op2, vinfo, &dt[2], &vectype3))
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+@@ -6062,10 +6088,20 @@ vectorizable_operation (stmt_vec_info st
+ 
+   gcc_assert (ncopies >= 1);
+ 
+-  /* Shifts are handled in vectorizable_shift ().  */
+-  if (code == LSHIFT_EXPR || code == RSHIFT_EXPR || code == LROTATE_EXPR
+-      || code == RROTATE_EXPR)
+-   return false;
++  /* Reject attempts to combine mask types with nonmask types, e.g. if
++     we have an AND between a (nonmask) boolean loaded from memory and
++     a (mask) boolean result of a comparison.
++
++     TODO: We could easily fix these cases up using pattern statements.  */
++  if (VECTOR_BOOLEAN_TYPE_P (vectype) != mask_op_p
++      || (vectype2 && VECTOR_BOOLEAN_TYPE_P (vectype2) != mask_op_p)
++      || (vectype3 && VECTOR_BOOLEAN_TYPE_P (vectype3) != mask_op_p))
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "mixed mask and nonmask vector types\n");
++      return false;
++    }
+ 
+   /* Supportable by target?  */
+ 
+@@ -10410,14 +10446,15 @@ get_vectype_for_scalar_type (vec_info *v
+ 
+    Returns the mask type corresponding to a result of comparison
+    of vectors of specified SCALAR_TYPE as supported by target.
+-   NODE, if nonnull, is the SLP tree node that will use the returned
+-   vector type.  */
++   If GROUP_SIZE is nonzero and we're performing BB vectorization,
++   make sure that the number of elements in the vector is no bigger
++   than GROUP_SIZE.  */
+ 
+ tree
+ get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type,
+-			       slp_tree node)
++			       unsigned int group_size)
+ {
+-  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, node);
++  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type, group_size);
+ 
+   if (!vectype)
+     return NULL;
+@@ -11112,9 +11149,6 @@ vect_gen_while_not (gimple_seq *seq, tre
+ 
+    - Set *STMT_VECTYPE_OUT to:
+      - NULL_TREE if the statement doesn't need to be vectorized;
+-     - boolean_type_node if the statement is a boolean operation whose
+-       vector type can only be determined once all the other vector types
+-       are known; and
+      - the equivalent of STMT_VINFO_VECTYPE otherwise.
+ 
+    - Set *NUNITS_VECTYPE_OUT to the vector type that contains the maximum
+@@ -11171,11 +11205,22 @@ vect_get_vector_types_for_stmt (stmt_vec
+   tree scalar_type = NULL_TREE;
+   if (group_size == 0 && STMT_VINFO_VECTYPE (stmt_info))
+     {
+-      *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
++      vectype = STMT_VINFO_VECTYPE (stmt_info);
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+ 			 "precomputed vectype: %T\n", vectype);
+     }
++  else if (vect_use_mask_type_p (stmt_info))
++    {
++      unsigned int precision = stmt_info->mask_precision;
++      scalar_type = build_nonstandard_integer_type (precision, 1);
++      vectype = get_mask_type_for_scalar_type (vinfo, scalar_type, group_size);
++      if (!vectype)
++	return opt_result::failure_at (stmt, "not vectorized: unsupported"
++				       " data-type %T\n", scalar_type);
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
++    }
+   else
+     {
+       if (data_reference *dr = STMT_VINFO_DATA_REF (stmt_info))
+@@ -11185,28 +11230,6 @@ vect_get_vector_types_for_stmt (stmt_vec
+       else
+ 	scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
+ 
+-      /* Pure bool ops don't participate in number-of-units computation.
+-	 For comparisons use the types being compared.  */
+-      if (!STMT_VINFO_DATA_REF (stmt_info)
+-	  && VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
+-	  && is_gimple_assign (stmt)
+-	  && gimple_assign_rhs_code (stmt) != COND_EXPR)
+-	{
+-	  *stmt_vectype_out = boolean_type_node;
+-
+-	  tree rhs1 = gimple_assign_rhs1 (stmt);
+-	  if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
+-	      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (rhs1)))
+-	    scalar_type = TREE_TYPE (rhs1);
+-	  else
+-	    {
+-	      if (dump_enabled_p ())
+-		dump_printf_loc (MSG_NOTE, vect_location,
+-				 "pure bool operation.\n");
+-	      return opt_result::success ();
+-	    }
+-	}
+-
+       if (dump_enabled_p ())
+ 	{
+ 	  if (group_size)
+@@ -11224,18 +11247,15 @@ vect_get_vector_types_for_stmt (stmt_vec
+ 				       " unsupported data-type %T\n",
+ 				       scalar_type);
+ 
+-      if (!*stmt_vectype_out)
+-	*stmt_vectype_out = vectype;
+-
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", vectype);
+     }
++  *stmt_vectype_out = vectype;
+ 
+   /* Don't try to compute scalar types if the stmt produces a boolean
+      vector; use the existing vector type instead.  */
+   tree nunits_vectype = vectype;
+-  if (!VECTOR_BOOLEAN_TYPE_P (vectype)
+-      && *stmt_vectype_out != boolean_type_node)
++  if (!VECTOR_BOOLEAN_TYPE_P (vectype))
+     {
+       /* The number of units is set according to the smallest scalar
+ 	 type (or the largest vector size, but we only support one
+@@ -11260,9 +11280,8 @@ vect_get_vector_types_for_stmt (stmt_vec
+ 	}
+     }
+ 
+-  gcc_assert (*stmt_vectype_out == boolean_type_node
+-	      || multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
+-			     TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
++  gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
++			  TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
+ 
+   if (dump_enabled_p ())
+     {
+@@ -11274,82 +11293,3 @@ vect_get_vector_types_for_stmt (stmt_vec
+   *nunits_vectype_out = nunits_vectype;
+   return opt_result::success ();
+ }
+-
+-/* Try to determine the correct vector type for STMT_INFO, which is a
+-   statement that produces a scalar boolean result.  Return the vector
+-   type on success, otherwise return NULL_TREE.  NODE, if nonnull,
+-   is the SLP tree node that will use the returned vector type.  */
+-
+-opt_tree
+-vect_get_mask_type_for_stmt (stmt_vec_info stmt_info, slp_tree node)
+-{
+-  vec_info *vinfo = stmt_info->vinfo;
+-  gimple *stmt = stmt_info->stmt;
+-  tree mask_type = NULL;
+-  tree vectype, scalar_type;
+-
+-  if (is_gimple_assign (stmt)
+-      && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
+-      && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
+-    {
+-      scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+-      mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type, node);
+-
+-      if (!mask_type)
+-	return opt_tree::failure_at (stmt,
+-				     "not vectorized: unsupported mask\n");
+-    }
+-  else
+-    {
+-      tree rhs;
+-      ssa_op_iter iter;
+-      enum vect_def_type dt;
+-
+-      FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
+-	{
+-	  if (!vect_is_simple_use (rhs, stmt_info->vinfo, &dt, &vectype))
+-	    return opt_tree::failure_at (stmt,
+-					 "not vectorized:can't compute mask"
+-					 " type for statement, %G", stmt);
+-
+-	  /* No vectype probably means external definition.
+-	     Allow it in case there is another operand which
+-	     allows to determine mask type.  */
+-	  if (!vectype)
+-	    continue;
+-
+-	  if (!mask_type)
+-	    mask_type = vectype;
+-	  else if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
+-			     TYPE_VECTOR_SUBPARTS (vectype)))
+-	    return opt_tree::failure_at (stmt,
+-					 "not vectorized: different sized mask"
+-					 " types in statement, %T and %T\n",
+-					 mask_type, vectype);
+-	  else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
+-		   != VECTOR_BOOLEAN_TYPE_P (vectype))
+-	    return opt_tree::failure_at (stmt,
+-					 "not vectorized: mixed mask and "
+-					 "nonmask vector types in statement, "
+-					 "%T and %T\n",
+-					 mask_type, vectype);
+-	}
+-
+-      /* We may compare boolean value loaded as vector of integers.
+-	 Fix mask_type in such case.  */
+-      if (mask_type
+-	  && !VECTOR_BOOLEAN_TYPE_P (mask_type)
+-	  && gimple_code (stmt) == GIMPLE_ASSIGN
+-	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
+-	mask_type = truth_type_for (mask_type);
+-    }
+-
+-  /* No mask_type should mean loop invariant predicate.
+-     This is probably a subject for optimization in if-conversion.  */
+-  if (!mask_type)
+-    return opt_tree::failure_at (stmt,
+-				 "not vectorized: can't compute mask type "
+-				 "for statement: %G", stmt);
+-
+-  return opt_tree::success (mask_type);
+-}
diff --git a/fix-range-set-by-vectorization-on-niter-IVs.patch b/fix-range-set-by-vectorization-on-niter-IVs.patch
new file mode 100644
index 0000000..d64a4b9
--- /dev/null
+++ b/fix-range-set-by-vectorization-on-niter-IVs.patch
@@ -0,0 +1,74 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-optimization-98117-fix-range-set-by-vectorizati.patch
+cdcbef3c3310a14f2994982b44cb1f8e14c77232
+
+diff --git a/gcc/testsuite/gcc.dg/torture/pr98117.c b/gcc/testsuite/gcc.dg/torture/pr98117.c
+new file mode 100644
+index 00000000000..f2160257263
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr98117.c
+@@ -0,0 +1,19 @@
++/* { dg-do run } */
++/* { dg-additional-options "-fno-tree-scev-cprop" } */
++
++unsigned char c;
++void __attribute__((noipa))
++e()
++{
++  do
++    {
++    }
++  while (++c);
++}
++int main()
++{
++  e();
++  if (c != 0)
++    __builtin_abort ();
++  return 0;
++}
+diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
+index 36179188f6d..2370b879b21 100644
+--- a/gcc/tree-vect-loop-manip.c
++++ b/gcc/tree-vect-loop-manip.c
+@@ -2034,13 +2034,29 @@ vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
+       niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
+       gsi_insert_seq_on_edge_immediate (pe, stmts);
+       /* Peeling algorithm guarantees that vector loop bound is at least ONE,
+-	 we set range information to make niters analyzer's life easier.  */
++	 we set range information to make niters analyzer's life easier.
++	 Note the number of latch iteration value can be TYPE_MAX_VALUE so
++	 we have to represent the vector niter TYPE_MAX_VALUE + 1 >> log_vf.  */
+       if (stmts != NULL && log_vf)
+-	set_range_info (niters_vector, VR_RANGE,
+-			wi::to_wide (build_int_cst (type, 1)),
+-			wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
+-						  TYPE_MAX_VALUE (type),
+-						  log_vf)));
++	{
++	  if (niters_no_overflow)
++	    set_range_info (niters_vector, VR_RANGE,
++			    wi::one (TYPE_PRECISION (type)),
++			    wi::rshift (wi::max_value (TYPE_PRECISION (type),
++						       TYPE_SIGN (type)),
++					exact_log2 (const_vf),
++					TYPE_SIGN (type)));
++	  /* For VF == 1 the vector IV might also overflow so we cannot
++	     assert a minimum value of 1.  */
++	  else if (const_vf > 1)
++	    set_range_info (niters_vector, VR_RANGE,
++			    wi::one (TYPE_PRECISION (type)),
++			    wi::rshift (wi::max_value (TYPE_PRECISION (type),
++						       TYPE_SIGN (type))
++					- (const_vf - 1),
++					exact_log2 (const_vf), TYPE_SIGN (type))
++			    + 1);
++	}
+     }
+   *niters_vector_ptr = niters_vector;
+   *step_vector_ptr = step_vector;
+-- 
+2.19.1
+
diff --git a/gcc.spec b/gcc.spec
index 2822547..368e0e8 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,4 +1,4 @@
-%global DATE 20210204
+%global DATE 20210428
 
 %global gcc_version 9.3.1
 %global gcc_major 9.3.1
@@ -59,7 +59,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{DATE}.16
+Release: %{DATE}.19
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
 URL: https://gcc.gnu.org
 
@@ -221,10 +221,21 @@ Patch104: fix-avx512vl-vcvttpd2dq-2-fail.patch
 Patch105: fix-issue604-ldist-dependency-fixup.patch
 Patch106: Apply-maximum-nunits-for-BB-SLP.patch
 Patch107: Fix-interaction-between-aka-changes-and-DR1558.patch
-Patch108: Handle-POLY_INT_CSTs-in-declare_return_value.patch
-Patch109: Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch
-Patch110: fix-strncpy-inline-warning.patch
-
+Patch108: fix-range-set-by-vectorization-on-niter-IVs.patch
+Patch109: optabs-Dont-use-scalar-conversions-for-vectors.patch
+Patch110: add-fp-model-options.patch
+Patch111: fix-CTOR-vectorization.patch
+Patch112: PR92429-do-not-fold-when-updating.patch
+Patch113: Handle-POLY_INT_CSTs-in-declare_return_value.patch
+Patch114: Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch
+Patch115: fix-strncpy-inline-warning.patch
+Patch116: fix-ICE-in-vect.patch
+Patch118: Fix-type-mismatch-in-SLPed-constructors.patch
+Patch119: add-check-for-pressure-in-sche1.patch
+Patch120: revert-moutline-atomics.patch
+Patch121: fix-ICE-in-eliminate-stmt.patch
+Patch122: revise-type-before-build-MULT.patch
+Patch123: Simplify-X-C1-C2.patch
 
 %global gcc_target_platform %{_arch}-linux-gnu
 
@@ -777,6 +788,18 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch108 -p1
 %patch109 -p1
 %patch110 -p1
+%patch111 -p1
+%patch112 -p1
+%patch113 -p1
+%patch114 -p1
+%patch115 -p1
+%patch116 -p1
+%patch118 -p1
+%patch119 -p1
+%patch120 -p1
+%patch121 -p1
+%patch122 -p1
+%patch123 -p1
 
 
 %build
@@ -785,8 +808,7 @@ export CONFIG_SITE=NONE
 
 CC=gcc
 CXX=g++
-OPT_FLAGS=`echo %{optflags}|sed -e 's/\(-Wp,\)\?-D_FORTIFY_SOURCE=[12]//g'`
-OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'`
+OPT_FLAGS=`echo %{optflags}|sed -e 's/-m64//g;s/-m32//g;s/-m31//g'`
 OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-mfpmath=sse/-mfpmath=sse -msse2/g'`
 OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/ -pipe / /g'`
 OPT_FLAGS=`echo $OPT_FLAGS|sed -e 's/-Werror=format-security/ /g'`
@@ -827,7 +849,7 @@ enablelgo=,go
 %if %{build_d}
 enableld=,d
 %endif
-OPT_FLAGS="$OPT_FLAGS -fPIE -Wl,-z,relro,-z,now"
+OPT_FLAGS="$OPT_FLAGS -O2 -Wp,-D_FORTIFY_SOURCE=2 -fstack-protector-strong -fPIE -Wl,-z,relro,-z,now"
 OPT_LDFLAGS="$OPT_LDFLAGS -Wl,-z,relro,-z,now"
 export extra_ldflags_libobjc="-Wl,-z,relro,-z,now"
 export FCFLAGS="$OPT_FLAGS"
@@ -1804,6 +1826,7 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdnoreturn.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/stdatomic.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/gcov.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/simdmath.h
 %ifarch %{ix86} x86_64
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/mmintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/xmmintrin.h
@@ -2231,6 +2254,7 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_arithmetic.mod
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_exceptions.mod
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/ieee_features.mod
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/finclude/simdmath_f.h
 %{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}/f951
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libgfortran.spec
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/libcaf_single.a
@@ -2708,6 +2732,30 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Wed Apr 28 2021 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20210428.19
+- add-fp-model-options.patch: New file
+- enable-simd-math.patch: Enable simd math library in C and Fortran
+- fix-CTOR-vectorization.patch: New file
+- fix-range-set-by-vectorization-on-niter-IVs.patch: New file
+- medium-code-mode.patch: Fix bugs when used with fpic
+- optabs-Dont-use-scalar-conversions-for-vectors.patch: New file
+- PR92429-do-not-fold-when-updating.patch: New file
+- redundant-loop-elimination.patch: Fix some programming specifications
+- fix-ICE-in-vect.patch: New file
+- Fix-type-mismatch-in-SLPed-constructors.patch: New file
+- add-check-for-pressure-in-sche1.patch: New file
+- revert-moutline-atomics.patch: New file
+- fix-ICE-in-eliminate-stmt.patch: New file
+- revise-type-before-build-MULT.patch: New file
+- Simplify-X-C1-C2.patch: New file
+- gcc.spec: Add new patches
+
+* Mon Mar 15 2021 tianwei <tianwei12@huawei.com> - 9.3.1-20210204.17
+- Type:bugfix
+- ID:NA
+- SUG:NA
+- DESC:add SP and FS for x86
+
 * Thu Feb 04 2021 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20210204.16
 - Handle-POLY_INT_CSTs-in-declare_return_value.patch: New file
 - Handle-POLY_INT_CST-in-copy_reference_ops_from_ref.patch: New file
diff --git a/medium-code-mode.patch b/medium-code-mode.patch
index cf629d2..f63e920 100644
--- a/medium-code-mode.patch
+++ b/medium-code-mode.patch
@@ -1,105 +1,98 @@
 diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
---- a/gcc/config/aarch64/aarch64.c	2020-07-16 14:54:30.588000000 +0800
-+++ b/gcc/config/aarch64/aarch64.c	2020-07-16 15:06:33.000000000 +0800
-@@ -2030,6 +2030,32 @@ aarch64_load_symref_appropriately (rtx d
+--- a/gcc/config/aarch64/aarch64.c	2021-02-18 11:03:29.728000000 +0800
++++ b/gcc/config/aarch64/aarch64.c	2021-02-18 14:59:54.432000000 +0800
+@@ -2417,6 +2417,29 @@ aarch64_load_symref_appropriately (rtx d
  	emit_insn (gen_add_losym (dest, tmp_reg, imm));
  	return;
        }
 +    case SYMBOL_MEDIUM_ABSOLUTE:
-+	{
-+		rtx tmp_reg = dest;
-+		machine_mode mode = GET_MODE (dest);
++      {
++	rtx tmp_reg = dest;
++	machine_mode mode = GET_MODE (dest);
 +
-+		gcc_assert (mode == Pmode || mode == ptr_mode);
-+		if (can_create_pseudo_p ())
-+	  		tmp_reg = gen_reg_rtx (mode);
++	gcc_assert (mode == Pmode || mode == ptr_mode);
++	if (can_create_pseudo_p ())
++	  tmp_reg = gen_reg_rtx (mode);
 +
-+		if (mode == DImode)
-+		{
-+			emit_insn (
-+			gen_load_symbol_medium_di (dest, tmp_reg, imm));
-+		}
-+		else
-+		{
-+			emit_insn (
-+			gen_load_symbol_medium_si (dest, tmp_reg, imm));
-+		}
-+		if (REG_P (dest))
-+		{
-+			set_unique_reg_note (
-+			get_last_insn (), REG_EQUIV, copy_rtx (imm));
-+		}
-+		return;
-+	}
++	if (mode == DImode)
++	  {
++	    emit_insn (gen_load_symbol_medium_di (dest, tmp_reg, imm));
++	  }
++	else
++	  {
++	    emit_insn (gen_load_symbol_medium_si (dest, tmp_reg, imm));
++	  }
++	if (REG_P (dest))
++	  {
++	    set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (imm));
++	  }
++	return;
++      }
  
      case SYMBOL_TINY_ABSOLUTE:
        emit_insn (gen_rtx_SET (dest, imm));
-@@ -2152,6 +2178,64 @@ aarch64_load_symref_appropriately (rtx d
+@@ -2539,6 +2562,60 @@ aarch64_load_symref_appropriately (rtx d
  	return;
        }
  
-+	case SYMBOL_MEDIUM_GOT_4G:
-+	{
-+		rtx tmp_reg = dest;
-+		machine_mode mode = GET_MODE (dest);
-+		if (can_create_pseudo_p ())
-+		{
-+			tmp_reg = gen_reg_rtx (mode);
-+		}
-+		rtx insn;
-+		rtx mem;
-+		rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++    case SYMBOL_MEDIUM_GOT_4G:
++      {
++	rtx tmp_reg = dest;
++	machine_mode mode = GET_MODE (dest);
++	if (can_create_pseudo_p ())
++	  {
++	    tmp_reg = gen_reg_rtx (mode);
++	  }
++	rtx insn;
++	rtx mem;
++	rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++
++	if (mode == DImode)
++	  {
++	    emit_insn (gen_load_symbol_medium_di (tmp_reg, dest, s));
++	  }
++	else
++	  {
++	    emit_insn (gen_load_symbol_medium_si (tmp_reg, dest, s));
++	  }
++	if (REG_P (dest))
++	  {
++	    set_unique_reg_note (get_last_insn (), REG_EQUAL, copy_rtx (s));
++	  }
 +
-+		if (mode == DImode)
-+		{
-+			emit_insn (
-+			gen_load_symbol_medium_di (tmp_reg, dest, s));
-+		}
-+		else
-+		{
-+			emit_insn (
-+			gen_load_symbol_medium_si (tmp_reg, dest, s));
-+		}
-+		if (REG_P (dest))
-+		{
-+			set_unique_reg_note (
-+			get_last_insn (), REG_EQUIV, copy_rtx (s));
-+		}
++	if (mode == ptr_mode)
++	  {
++	    if (mode == DImode)
++	      {
++		emit_insn (gen_get_gotoff_di (dest, imm));
++		insn = gen_ldr_got_medium_di (dest, tmp_reg, dest);
++	      }
++	    else
++	      {
++		emit_insn (gen_get_gotoff_si (dest, imm));
++		insn = gen_ldr_got_medium_si (dest, tmp_reg, dest);
++	      }
++	    mem = XVECEXP (SET_SRC (insn), 0, 0);
++	  }
++	else
++	  {
++	    gcc_assert (mode == Pmode);
++	    emit_insn (gen_get_gotoff_di (dest, imm));
++	    insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest);
++	    mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
++	  }
 +
-+		if (mode == ptr_mode)
-+		{
-+			if (mode == DImode)
-+			{
-+				emit_insn (gen_get_gotoff_di (dest, imm));
-+				insn = gen_ldr_got_medium_di (
-+					   dest, tmp_reg, dest);
-+			}
-+			else
-+			{
-+				emit_insn (gen_get_gotoff_si (dest, imm));
-+				insn = gen_ldr_got_medium_si (
-+					   dest, tmp_reg, dest);
-+			}
-+			mem = XVECEXP (SET_SRC (insn), 0, 0);
-+		}
-+		else
-+		{
-+			gcc_assert (mode == Pmode);
-+			emit_insn (gen_get_gotoff_di (dest, imm));
-+			insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest);
-+			mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
-+		}
++	gcc_assert (GET_CODE (mem) == MEM);
++	MEM_READONLY_P (mem) = 1;
++	MEM_NOTRAP_P (mem) = 1;
++	emit_insn (insn);
++	return;
++      }
 +
-+		gcc_assert (GET_CODE (mem) == MEM);
-+		MEM_READONLY_P (mem) = 1;
-+		MEM_NOTRAP_P (mem) = 1;
-+		emit_insn (insn);
-+		return;
-+	}
      case SYMBOL_SMALL_TLSGD:
        {
  	rtx_insn *insns;
-@@ -3372,11 +3456,12 @@ aarch64_expand_mov_immediate (rtx dest,
+@@ -4531,11 +4608,12 @@ aarch64_expand_mov_immediate (rtx dest,
  
  	  return;
  
@@ -114,7 +107,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
  	case SYMBOL_TINY_GOT:
  	case SYMBOL_TINY_TLSIE:
  	  if (const_offset != 0)
-@@ -3395,6 +3480,7 @@ aarch64_expand_mov_immediate (rtx dest,
+@@ -4554,6 +4632,7 @@ aarch64_expand_mov_immediate (rtx dest,
  	case SYMBOL_TLSLE24:
  	case SYMBOL_TLSLE32:
  	case SYMBOL_TLSLE48:
@@ -122,30 +115,61 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
  	  aarch64_load_symref_appropriately (dest, imm, sty);
  	  return;
  
-@@ -10334,6 +10420,13 @@ cost_plus:
+@@ -8450,7 +8529,14 @@ aarch64_classify_address (struct aarch64
+ 	  split_const (info->offset, &sym, &offs);
+ 	  if (GET_CODE (sym) == SYMBOL_REF
+ 	      && (aarch64_classify_symbol (sym, INTVAL (offs))
+-		  == SYMBOL_SMALL_ABSOLUTE))
++		  == SYMBOL_SMALL_ABSOLUTE
++		  /* Fix fail on dbl_mov_immediate_1.c.  If end up here with
++		     MEDIUM_ABSOLUTE, the symbol is a constant number that is
++		     forced to memory in reload pass, which is ok to go on with
++		     the original design that subtitude the mov to
++		     'adrp and ldr :losum'.  */
++		  || aarch64_classify_symbol (sym, INTVAL (offs))
++		     == SYMBOL_MEDIUM_ABSOLUTE))
+ 	    {
+ 	      /* The symbol and offset must be aligned to the access size.  */
+ 	      unsigned int align;
+@@ -10365,7 +10451,13 @@ static inline bool
+ aarch64_can_use_per_function_literal_pools_p (void)
+ {
+   return (aarch64_pcrelative_literal_loads
+-	  || aarch64_cmodel == AARCH64_CMODEL_LARGE);
++	  || aarch64_cmodel == AARCH64_CMODEL_LARGE
++	  /* Fix const9.C so that constants goes to function_literal_pools.
++	     According to the orignal design of aarch64 mcmodel=medium, we
++	     don't care where this symbol is put.  For the benefit of code size
++	     and behaviour consistent with other mcmodel, put it into
++	     function_literal_pools.  */
++	  || aarch64_cmodel == AARCH64_CMODEL_MEDIUM);
+ }
+ 
+ static bool
+@@ -11993,6 +12085,13 @@ cost_plus:
  	  if (speed)
  	    *cost += extra_cost->alu.arith;
  	}
-+		else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM
-+			 || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
-+	  {
-+		/* 4 movs  adr  sub  add  2movs  ldr.  */
-+		if (speed)
-+		  *cost += 7*extra_cost->alu.arith;
-+	  }
++      else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM
++	       || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
++	{
++	  /* 4 movs  adr  sub  add  2movs  ldr.  */
++	  if (speed)
++	    *cost += 7*extra_cost->alu.arith;
++	}
  
        if (flag_pic)
  	{
-@@ -10341,6 +10434,8 @@ cost_plus:
+@@ -12000,6 +12099,8 @@ cost_plus:
  	  *cost += COSTS_N_INSNS (1);
  	  if (speed)
  	    *cost += extra_cost->ldst.load;
 +	  if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
-+	  	*cost += 2*extra_cost->alu.arith;
++	    *cost += 2*extra_cost->alu.arith;
  	}
        return true;
  
-@@ -11395,6 +11490,7 @@ initialize_aarch64_tls_size (struct gcc_
+@@ -13176,6 +13277,7 @@ initialize_aarch64_tls_size (struct gcc_
        if (aarch64_tls_size > 32)
  	aarch64_tls_size = 32;
        break;
@@ -153,17 +177,17 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
      case AARCH64_CMODEL_LARGE:
        /* The maximum TLS size allowed under large is 16E.
  	 FIXME: 16E should be 64bit, we only support 48bit offset now.  */
-@@ -12187,6 +12283,9 @@ initialize_aarch64_code_model (struct gc
+@@ -13968,6 +14070,9 @@ initialize_aarch64_code_model (struct gc
  	   aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
  #endif
  	   break;
-+     case AARCH64_CMODEL_MEDIUM:
-+	 	aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC;
++	 case AARCH64_CMODEL_MEDIUM:
++	   aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC;
 +	   break;
  	 case AARCH64_CMODEL_LARGE:
  	   sorry ("code model %qs with %<-f%s%>", "large",
  		  opts->x_flag_pic > 1 ? "PIC" : "pic");
-@@ -12205,6 +12304,7 @@ static void
+@@ -13986,6 +14091,7 @@ static void
  aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
  {
    ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
@@ -171,7 +195,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
    ptr->x_aarch64_branch_protection_string
      = opts->x_aarch64_branch_protection_string;
  }
-@@ -12220,6 +12320,7 @@ aarch64_option_restore (struct gcc_optio
+@@ -14001,6 +14107,7 @@ aarch64_option_restore (struct gcc_optio
    opts->x_explicit_arch = ptr->x_explicit_arch;
    selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
    opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
@@ -179,7 +203,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
    opts->x_aarch64_branch_protection_string
      = ptr->x_aarch64_branch_protection_string;
    if (opts->x_aarch64_branch_protection_string)
-@@ -13067,6 +13168,8 @@ aarch64_classify_symbol (rtx x, HOST_WID
+@@ -14868,6 +14975,8 @@ aarch64_classify_symbol (rtx x, HOST_WID
  
  	case AARCH64_CMODEL_SMALL_SPIC:
  	case AARCH64_CMODEL_SMALL_PIC:
@@ -188,7 +212,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
  	case AARCH64_CMODEL_SMALL:
  	  return SYMBOL_SMALL_ABSOLUTE;
  
-@@ -13100,6 +13203,7 @@ aarch64_classify_symbol (rtx x, HOST_WID
+@@ -14904,6 +15013,7 @@ aarch64_classify_symbol (rtx x, HOST_WID
  	  return SYMBOL_TINY_ABSOLUTE;
  
  	case AARCH64_CMODEL_SMALL:
@@ -196,75 +220,83 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
  	  /* Same reasoning as the tiny code model, but the offset cap here is
  	     1MB, allowing +/-3.9GB for the offset to the symbol.  */
  
-@@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID
+@@ -14927,7 +15037,50 @@ aarch64_classify_symbol (rtx x, HOST_WID
  		    ?  SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
  	  return SYMBOL_SMALL_ABSOLUTE;
  
 +	case AARCH64_CMODEL_MEDIUM:
-+	{
-+		tree decl_local = SYMBOL_REF_DECL (x);
-+		if (decl_local != NULL
-+		    && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
-+		{
-+			HOST_WIDE_INT size = tree_to_uhwi (
-+						 DECL_SIZE_UNIT (decl_local));
-+			/* If the data is smaller than the threshold, goto
-+			   the small code model.  Else goto the large code
-+			   model.  */
-+			if (size >= HOST_WIDE_INT (aarch64_data_threshold))
-+				goto AARCH64_LARGE_ROUTINE;
-+		}
-+		goto AARCH64_SMALL_ROUTINE;
-+	}
++	  {
++	    tree decl_local = SYMBOL_REF_DECL (x);
++	    if (decl_local != NULL
++		&& tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
++	      {
++		HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local));
++		/* If the data is smaller than the threshold, goto
++		   the small code model.  Else goto the large code
++		   model.  */
++		if (size >= HOST_WIDE_INT (aarch64_data_threshold))
++		  goto AARCH64_LARGE_ROUTINE;
++	      }
++	    goto AARCH64_SMALL_ROUTINE;
++	  }
 +
 +	case AARCH64_CMODEL_MEDIUM_PIC:
-+	{
-+		tree decl_local = SYMBOL_REF_DECL (x);
-+		if (decl_local != NULL
-+		    && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
-+		{
-+			HOST_WIDE_INT size = tree_to_uhwi (
-+						 DECL_SIZE_UNIT (decl_local));
-+			if (size < HOST_WIDE_INT (aarch64_data_threshold))
-+		   	{
-+				if (!aarch64_symbol_binds_local_p (x))
-+				{
-+					return SYMBOL_SMALL_GOT_4G;
-+				}
-+				return SYMBOL_SMALL_ABSOLUTE;
-+		   	}
-+		}
-+		if (!aarch64_symbol_binds_local_p (x))
-+		{
-+			return SYMBOL_MEDIUM_GOT_4G;
-+		}
-+		return SYMBOL_MEDIUM_ABSOLUTE;
-+	}
++	  {
++	    tree decl_local = SYMBOL_REF_DECL (x);
++	    if (decl_local != NULL
++		&& tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
++	      {
++		HOST_WIDE_INT size = tree_to_uhwi (DECL_SIZE_UNIT (decl_local));
++		if (size < HOST_WIDE_INT (aarch64_data_threshold))
++		  {
++		    if (!aarch64_symbol_binds_local_p (x))
++		      {
++			/* flag_pic is 2 only when -fPIC is on, when we should
++			   use 4G GOT.  */
++			return flag_pic == 2 ? SYMBOL_SMALL_GOT_4G
++					     : SYMBOL_SMALL_GOT_28K ;
++		      }
++		    return SYMBOL_SMALL_ABSOLUTE;
++		  }
++	      }
++	    if (!aarch64_symbol_binds_local_p (x))
++	      {
++		return SYMBOL_MEDIUM_GOT_4G;
++	      }
++	    return SYMBOL_MEDIUM_ABSOLUTE;
++	  }
++
  	case AARCH64_CMODEL_LARGE:
 +	AARCH64_LARGE_ROUTINE:
  	  /* This is alright even in PIC code as the constant
  	     pool reference is always PC relative and within
  	     the same translation unit.  */
-@@ -15364,6 +15509,8 @@ aarch64_asm_preferred_eh_data_format (in
+@@ -17789,6 +17942,8 @@ aarch64_asm_preferred_eh_data_format (in
       case AARCH64_CMODEL_SMALL:
       case AARCH64_CMODEL_SMALL_PIC:
       case AARCH64_CMODEL_SMALL_SPIC:
-+	  case AARCH64_CMODEL_MEDIUM:
-+	  case AARCH64_CMODEL_MEDIUM_PIC:
++     case AARCH64_CMODEL_MEDIUM:
++     case AARCH64_CMODEL_MEDIUM_PIC:
         /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
  	  for everything.  */
         type = DW_EH_PE_sdata4;
-@@ -18454,7 +18601,8 @@ aarch64_empty_mask_is_expensive (unsigne
+@@ -21014,7 +21169,14 @@ aarch64_empty_mask_is_expensive (unsigne
  bool
  aarch64_use_pseudo_pic_reg (void)
  {
 -  return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
++  /* flag_pic is 2 when -fPIC is on, where we do not need the pseudo
++     pic reg.  In medium code mode, when combine with -fpie/-fpic, there are
++     possibility that some symbol size smaller than the -mlarge-data-threshold
++     will still use SMALL_SPIC relocation, which need the pseudo pic reg.
++     Fix spill_1.c fail.  */
 +  return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
-+  	  || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC ;
++	 || (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC
++	     && flag_pic != 2);
  }
  
  /* Implement TARGET_UNSPEC_MAY_TRAP_P.  */
-@@ -18464,6 +18612,7 @@ aarch64_unspec_may_trap_p (const_rtx x,
+@@ -21024,6 +21186,7 @@ aarch64_unspec_may_trap_p (const_rtx x,
  {
    switch (XINT (x, 1))
      {
@@ -273,8 +305,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
      case UNSPEC_GOTSMALLPIC28K:
      case UNSPEC_GOTTINYPIC:
 diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
---- a/gcc/config/aarch64/aarch64.h	2020-07-16 14:54:30.592000000 +0800
-+++ b/gcc/config/aarch64/aarch64.h	2020-07-16 14:55:05.672000000 +0800
+--- a/gcc/config/aarch64/aarch64.h	2021-02-18 11:03:28.336000000 +0800
++++ b/gcc/config/aarch64/aarch64.h	2021-02-18 10:57:45.488000000 +0800
 @@ -33,6 +33,10 @@
  
  #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
@@ -287,9 +319,9 @@ diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
  
  #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
 diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
---- a/gcc/config/aarch64/aarch64.md	2020-07-16 14:54:30.588000000 +0800
-+++ b/gcc/config/aarch64/aarch64.md	2020-07-16 14:55:05.676000000 +0800
-@@ -209,6 +209,11 @@
+--- a/gcc/config/aarch64/aarch64.md	2021-02-18 11:03:28.340000000 +0800
++++ b/gcc/config/aarch64/aarch64.md	2021-02-18 10:57:45.488000000 +0800
+@@ -224,6 +224,11 @@
      UNSPEC_RSQRTS
      UNSPEC_NZCV
      UNSPEC_XPACLRI
@@ -301,7 +333,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
      UNSPEC_LD1_SVE
      UNSPEC_ST1_SVE
      UNSPEC_LDNT1_SVE
-@@ -6548,6 +6553,39 @@
+@@ -6689,6 +6694,39 @@
    [(set_attr "type" "load_4")]
  )
  
@@ -341,7 +373,7 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
  (define_insn "ldr_got_small_28k_<mode>"
    [(set (match_operand:PTR 0 "register_operand" "=r")
  	(unspec:PTR [(mem:PTR (lo_sum:PTR
-@@ -6709,6 +6747,23 @@
+@@ -6852,6 +6890,23 @@
     (set_attr "length" "12")]
  )
  
@@ -366,8 +398,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
    [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)]
    "TARGET_TLS_DESC"
 diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
---- a/gcc/config/aarch64/aarch64.opt	2020-07-16 14:54:30.580000000 +0800
-+++ b/gcc/config/aarch64/aarch64.opt	2020-07-16 14:55:05.676000000 +0800
+--- a/gcc/config/aarch64/aarch64.opt	2021-02-18 11:03:28.340000000 +0800
++++ b/gcc/config/aarch64/aarch64.opt	2021-02-18 10:57:45.488000000 +0800
 @@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_cor
  TargetVariable
  enum aarch64_arch explicit_arch = aarch64_no_arch
@@ -396,8 +428,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
  Target Report RejectNegative Mask(BIG_END)
  Assume target CPU is configured as big endian.
 diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
---- a/gcc/config/aarch64/aarch64-opts.h	2020-07-16 14:54:30.584000000 +0800
-+++ b/gcc/config/aarch64/aarch64-opts.h	2020-07-16 14:55:05.676000000 +0800
+--- a/gcc/config/aarch64/aarch64-opts.h	2020-03-12 19:07:21.000000000 +0800
++++ b/gcc/config/aarch64/aarch64-opts.h	2021-02-18 10:57:45.488000000 +0800
 @@ -66,6 +66,10 @@ enum aarch64_code_model {
    /* -fpic for small memory model.
       GOT size to 28KiB (4K*8-4K) or 3580 entries.  */
@@ -410,8 +442,8 @@ diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts
       The PIC variant is not yet implemented.  */
    AARCH64_CMODEL_LARGE
 diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
---- a/gcc/config/aarch64/aarch64-protos.h	2020-07-16 14:54:30.584000000 +0800
-+++ b/gcc/config/aarch64/aarch64-protos.h	2020-07-16 14:55:05.676000000 +0800
+--- a/gcc/config/aarch64/aarch64-protos.h	2021-02-18 11:03:29.432000000 +0800
++++ b/gcc/config/aarch64/aarch64-protos.h	2021-02-18 10:57:45.488000000 +0800
 @@ -95,9 +95,11 @@
   */
  enum aarch64_symbol_type
diff --git a/optabs-Dont-use-scalar-conversions-for-vectors.patch b/optabs-Dont-use-scalar-conversions-for-vectors.patch
new file mode 100644
index 0000000..91407d8
--- /dev/null
+++ b/optabs-Dont-use-scalar-conversions-for-vectors.patch
@@ -0,0 +1,69 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-optabs-Don-t-use-scalar-conversions-for-vectors-PR93.patch
+b6268016bf46dd63227dcbb73d13c30a3b4b9d2a
+
+diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
+index 3d829c27826..badd30bfda8 100644
+--- a/gcc/optabs-tree.c
++++ b/gcc/optabs-tree.c
+@@ -284,9 +284,14 @@ supportable_convert_operation (enum tree_code code,
+   machine_mode m1,m2;
+   bool truncp;
+ 
++  gcc_assert (VECTOR_TYPE_P (vectype_out) && VECTOR_TYPE_P (vectype_in));
++
+   m1 = TYPE_MODE (vectype_out);
+   m2 = TYPE_MODE (vectype_in);
+ 
++  if (!VECTOR_MODE_P (m1) || !VECTOR_MODE_P (m2))
++    return false;
++
+   /* First check if we can done conversion directly.  */
+   if ((code == FIX_TRUNC_EXPR
+        && can_fix_p (m1,m2,TYPE_UNSIGNED (vectype_out), &truncp)
+diff --git a/gcc/testsuite/gcc.dg/vect/pr93843-1.c b/gcc/testsuite/gcc.dg/vect/pr93843-1.c
+new file mode 100644
+index 00000000000..23a79ca4c96
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/pr93843-1.c
+@@ -0,0 +1,21 @@
++char a;
++struct S { short b, c; } d;
++
++__attribute__((noipa)) void
++foo (int x)
++{
++  if (x != 4)
++    __builtin_abort ();
++}
++
++int
++main ()
++{
++  short *g = &d.c, *h = &d.b;
++  char e = 4 - a;
++  int f;
++  *h = *g = e;
++  for (f = 0; f < 2; f++)
++    foo (d.c);
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.dg/vect/pr93843-2.c b/gcc/testsuite/gcc.dg/vect/pr93843-2.c
+new file mode 100644
+index 00000000000..5fae3e5be17
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/pr93843-2.c
+@@ -0,0 +1,11 @@
++char in[2] = {2, 2};
++short out[2] = {};
++
++int
++main()
++{
++  for (int i = 0; i < 2; ++i)
++    out[i] = in[i];
++  asm("":::"memory");
++  if (out[0] != 2) __builtin_abort();
++}
diff --git a/redundant-loop-elimination.patch b/redundant-loop-elimination.patch
index fb33bec..53a5063 100644
--- a/redundant-loop-elimination.patch
+++ b/redundant-loop-elimination.patch
@@ -1,6 +1,6 @@
 diff -Nurp a/gcc/common.opt b/gcc/common.opt
---- a/gcc/common.opt	2020-11-23 03:24:54.760000000 -0500
-+++ b/gcc/common.opt	2020-11-23 03:23:59.716000000 -0500
+--- a/gcc/common.opt	2021-02-18 21:32:50.724000000 -0500
++++ b/gcc/common.opt	2021-02-18 21:33:36.920000000 -0500
 @@ -1150,6 +1150,10 @@ fcompare-elim
  Common Report Var(flag_compare_elim_after_reload) Optimization
  Perform comparison elimination after register allocation has finished.
@@ -13,8 +13,8 @@ diff -Nurp a/gcc/common.opt b/gcc/common.opt
  Common Var(flag_conserve_stack) Optimization
  Do not perform optimizations increasing noticeably stack usage.
 diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
---- a/gcc/tree-ssa-phiopt.c	2020-11-23 03:24:54.760000000 -0500
-+++ b/gcc/tree-ssa-phiopt.c	2020-11-23 03:27:42.824000000 -0500
+--- a/gcc/tree-ssa-phiopt.c	2021-02-18 21:32:52.648000000 -0500
++++ b/gcc/tree-ssa-phiopt.c	2021-02-19 01:55:10.128000000 -0500
 @@ -71,6 +71,7 @@ static hash_set<tree> * get_non_trapping
  static void replace_phi_edge_with_variable (basic_block, edge, gimple *, tree);
  static void hoist_adjacent_loads (basic_block, basic_block,
@@ -48,7 +48,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
 +     ...
 +*/
 +static bool
-+check_uses_cond (tree ssa_name, gimple *stmt,
++check_uses_cond (const_tree ssa_name, gimple *stmt,
 +		 hash_set<tree> *hset ATTRIBUTE_UNUSED)
 +{
 +  tree_code code = gimple_cond_code (stmt);
@@ -76,7 +76,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
 +     _tmp = SSA_NAME | _tmp2;
 +*/
 +static bool
-+check_uses_assign (tree ssa_name, gimple *stmt, hash_set<tree> *hset)
++check_uses_assign (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
 +{
 +  tree_code code = gimple_assign_rhs_code (stmt);
 +  tree lhs, rhs1, rhs2;
@@ -113,7 +113,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
 +     # result = PHI <SSA_NAME (bb1), 0 (bb2), 0 (bb3)>
 +*/
 +static bool
-+check_uses_phi (tree ssa_name, gimple *stmt, hash_set<tree> *hset)
++check_uses_phi (const_tree ssa_name, gimple *stmt, hash_set<tree> *hset)
 +{
 +  for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
 +    {
@@ -223,7 +223,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
 +}
 +
 +static bool
-+check_def_gimple (gimple *def1, gimple *def2, tree result)
++check_def_gimple (gimple *def1, gimple *def2, const_tree result)
 +{
 +  /* def1 and def2 should be POINTER_PLUS_EXPR.  */
 +  if (!is_gimple_assign (def1) || !is_gimple_assign (def2)
@@ -255,7 +255,7 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
 +}
 +
 +static bool
-+check_loop_body (basic_block bb0, basic_block bb2, tree result)
++check_loop_body (basic_block bb0, basic_block bb2, const_tree result)
 +{
 +  gimple *g01 = first_stmt (bb0);
 +  if (!g01 || !is_gimple_assign (g01)
@@ -373,8 +373,8 @@ diff -Nurp a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
 +   ...
 +*/
 +static bool
-+check_gimple_order (basic_block bb1, tree base, tree cst, tree result,
-+		    gimple *&output)
++check_gimple_order (basic_block bb1, const_tree base, const_tree cst,
++		    const_tree result, gimple *&output)
 +{
 +  gimple *g1 = first_stmt (bb1);
 +  if (!g1 || !is_gimple_assign (g1)
diff --git a/revert-moutline-atomics.patch b/revert-moutline-atomics.patch
new file mode 100644
index 0000000..59b5a94
--- /dev/null
+++ b/revert-moutline-atomics.patch
@@ -0,0 +1,418 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-aarch64-Implement-moutline-atomics.patch
+3950b229a5ed6710f30241c2ddc3c74909bf4740
+
+diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2021-03-11 17:12:30.380000000 +0800
++++ b/gcc/config/aarch64/aarch64.c	2021-03-11 17:13:29.992000000 +0800
+@@ -18150,82 +18150,6 @@ aarch64_emit_unlikely_jump (rtx insn)
+   add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
+ }
+ 
+-/* We store the names of the various atomic helpers in a 5x4 array.
+-   Return the libcall function given MODE, MODEL and NAMES.  */
+-
+-rtx
+-aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx,
+-			const atomic_ool_names *names)
+-{
+-  memmodel model = memmodel_base (INTVAL (model_rtx));
+-  int mode_idx, model_idx;
+-
+-  switch (mode)
+-    {
+-    case E_QImode:
+-      mode_idx = 0;
+-      break;
+-    case E_HImode:
+-      mode_idx = 1;
+-      break;
+-    case E_SImode:
+-      mode_idx = 2;
+-      break;
+-    case E_DImode:
+-      mode_idx = 3;
+-      break;
+-    case E_TImode:
+-      mode_idx = 4;
+-      break;
+-    default:
+-      gcc_unreachable ();
+-    }
+-
+-  switch (model)
+-    {
+-    case MEMMODEL_RELAXED:
+-      model_idx = 0;
+-      break;
+-    case MEMMODEL_CONSUME:
+-    case MEMMODEL_ACQUIRE:
+-      model_idx = 1;
+-      break;
+-    case MEMMODEL_RELEASE:
+-      model_idx = 2;
+-      break;
+-    case MEMMODEL_ACQ_REL:
+-    case MEMMODEL_SEQ_CST:
+-      model_idx = 3;
+-      break;
+-    default:
+-      gcc_unreachable ();
+-    }
+-
+-  return init_one_libfunc_visibility (names->str[mode_idx][model_idx],
+-				      VISIBILITY_HIDDEN);
+-}
+-
+-#define DEF0(B, N) \
+-  { "__aarch64_" #B #N "_relax", \
+-    "__aarch64_" #B #N "_acq", \
+-    "__aarch64_" #B #N "_rel", \
+-    "__aarch64_" #B #N "_acq_rel" }
+-
+-#define DEF4(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \
+-		 { NULL, NULL, NULL, NULL }
+-#define DEF5(B)  DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16)
+-
+-static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } };
+-const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } };
+-const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } };
+-const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } };
+-const atomic_ool_names aarch64_ool_ldclr_names = { { DEF4(ldclr) } };
+-const atomic_ool_names aarch64_ool_ldeor_names = { { DEF4(ldeor) } };
+-
+-#undef DEF0
+-#undef DEF4
+-#undef DEF5
+-
+ /* Expand a compare and swap pattern.  */
+ 
+ void
+@@ -18272,17 +18196,6 @@ aarch64_expand_compare_and_swap (rtx ope
+ 						   newval, mod_s));
+       cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+     }
+-  else if (TARGET_OUTLINE_ATOMICS)
+-    {
+-      /* Oldval must satisfy compare afterward.  */
+-      if (!aarch64_plus_operand (oldval, mode))
+-	oldval = force_reg (mode, oldval);
+-      rtx func = aarch64_atomic_ool_func (mode, mod_s, &aarch64_ool_cas_names);
+-      rval = emit_library_call_value (func, NULL_RTX, LCT_NORMAL, r_mode,
+-				      oldval, mode, newval, mode,
+-				      XEXP (mem, 0), Pmode);
+-      cc_reg = aarch64_gen_compare_reg_maybe_ze (NE, rval, oldval, mode);
+-    }
+   else
+     {
+       /* The oldval predicate varies by mode.  Test it and force to reg.  */
+diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+--- a/gcc/config/aarch64/aarch64.opt	2021-03-11 17:12:30.380000000 +0800
++++ b/gcc/config/aarch64/aarch64.opt	2021-03-11 17:13:29.992000000 +0800
+@@ -272,6 +272,3 @@ user-land code.
+ TargetVariable
+ long aarch64_stack_protector_guard_offset = 0
+ 
+-moutline-atomics
+-Target Report Mask(OUTLINE_ATOMICS) Save
+-Generate local calls to out-of-line atomic operations.
+diff -Nurp a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
+--- a/gcc/config/aarch64/atomics.md	2021-03-11 17:12:30.380000000 +0800
++++ b/gcc/config/aarch64/atomics.md	2021-03-11 17:13:29.992000000 +0800
+@@ -186,27 +186,16 @@
+   (match_operand:SI 3 "const_int_operand")]
+   ""
+   {
++    rtx (*gen) (rtx, rtx, rtx, rtx);
++
+     /* Use an atomic SWP when available.  */
+     if (TARGET_LSE)
+-      {
+-	emit_insn (gen_aarch64_atomic_exchange<mode>_lse
+-		   (operands[0], operands[1], operands[2], operands[3]));
+-      }
+-    else if (TARGET_OUTLINE_ATOMICS)
+-      {
+-	machine_mode mode = <MODE>mode;
+-	rtx func = aarch64_atomic_ool_func (mode, operands[3],
+-					    &aarch64_ool_swp_names);
+-	rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL,
+-					    mode, operands[2], mode,
+-					    XEXP (operands[1], 0), Pmode);
+-        emit_move_insn (operands[0], rval);
+-      }
++      gen = gen_aarch64_atomic_exchange<mode>_lse;
+     else
+-      {
+-	emit_insn (gen_aarch64_atomic_exchange<mode>
+-		   (operands[0], operands[1], operands[2], operands[3]));
+-      }
++      gen = gen_aarch64_atomic_exchange<mode>;
++
++    emit_insn (gen (operands[0], operands[1], operands[2], operands[3]));
++
+     DONE;
+   }
+ )
+@@ -291,39 +280,6 @@
+ 	  }
+ 	operands[1] = force_reg (<MODE>mode, operands[1]);
+       }
+-    else if (TARGET_OUTLINE_ATOMICS)
+-      {
+-        const atomic_ool_names *names;
+-	switch (<CODE>)
+-	  {
+-	  case MINUS:
+-	    operands[1] = expand_simple_unop (<MODE>mode, NEG, operands[1],
+-					      NULL, 1);
+-	    /* fallthru */
+-	  case PLUS:
+-	    names = &aarch64_ool_ldadd_names;
+-	    break;
+-	  case IOR:
+-	    names = &aarch64_ool_ldset_names;
+-	    break;
+-	  case XOR:
+-	    names = &aarch64_ool_ldeor_names;
+-	    break;
+-	  case AND:
+-	    operands[1] = expand_simple_unop (<MODE>mode, NOT, operands[1],
+-					      NULL, 1);
+-	    names = &aarch64_ool_ldclr_names;
+-	    break;
+-	  default:
+-	    gcc_unreachable ();
+-	  }
+-        machine_mode mode = <MODE>mode;
+-	rtx func = aarch64_atomic_ool_func (mode, operands[2], names);
+-	emit_library_call_value (func, NULL_RTX, LCT_NORMAL, mode,
+-				 operands[1], mode,
+-				 XEXP (operands[0], 0), Pmode);
+-        DONE;
+-      }
+     else
+       gen = gen_aarch64_atomic_<atomic_optab><mode>;
+ 
+@@ -449,40 +405,6 @@
+ 	}
+       operands[2] = force_reg (<MODE>mode, operands[2]);
+     }
+-  else if (TARGET_OUTLINE_ATOMICS)
+-    {
+-      const atomic_ool_names *names;
+-      switch (<CODE>)
+-	{
+-	case MINUS:
+-	  operands[2] = expand_simple_unop (<MODE>mode, NEG, operands[2],
+-					    NULL, 1);
+-	  /* fallthru */
+-	case PLUS:
+-	  names = &aarch64_ool_ldadd_names;
+-	  break;
+-	case IOR:
+-	  names = &aarch64_ool_ldset_names;
+-	  break;
+-	case XOR:
+-	  names = &aarch64_ool_ldeor_names;
+-	  break;
+-	case AND:
+-	  operands[2] = expand_simple_unop (<MODE>mode, NOT, operands[2],
+-					    NULL, 1);
+-	  names = &aarch64_ool_ldclr_names;
+-	  break;
+-	default:
+-	  gcc_unreachable ();
+-	}
+-      machine_mode mode = <MODE>mode;
+-      rtx func = aarch64_atomic_ool_func (mode, operands[3], names);
+-      rtx rval = emit_library_call_value (func, operands[0], LCT_NORMAL, mode,
+-					  operands[2], mode,
+-					  XEXP (operands[1], 0), Pmode);
+-      emit_move_insn (operands[0], rval);
+-      DONE;
+-    }
+   else
+     gen = gen_aarch64_atomic_fetch_<atomic_optab><mode>;
+ 
+@@ -572,7 +494,7 @@
+ {
+   /* Use an atomic load-operate instruction when possible.  In this case
+      we will re-compute the result from the original mem value. */
+-  if (TARGET_LSE || TARGET_OUTLINE_ATOMICS)
++  if (TARGET_LSE)
+     {
+       rtx tmp = gen_reg_rtx (<MODE>mode);
+       operands[2] = force_reg (<MODE>mode, operands[2]);
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c	2021-03-11 17:12:34.168000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_reg_1.c	2021-03-11 17:13:30.656000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */
++/* { dg-options "-O2 -march=armv8-a+nolse" } */
+ /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
+ 
+ int
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c	2021-03-11 17:12:34.168000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic_cmp_exchange_zero_strong_1.c	2021-03-11 17:13:30.656000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -march=armv8-a+nolse -mno-outline-atomics" } */
++/* { dg-options "-O2 -march=armv8-a+nolse" } */
+ /* { dg-skip-if "" { *-*-* } { "-mcpu=*" } { "" } } */
+ 
+ int
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c	2021-03-11 17:12:33.988000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-comp-swap-release-acquire.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
+ 
+ #include "atomic-comp-swap-release-acquire.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c	2021-03-11 17:12:33.988000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acq_rel.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-acq_rel.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c	2021-03-11 17:12:33.988000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-acquire.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-acquire.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c	2021-03-11 17:12:33.992000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-char.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-char.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c	2021-03-11 17:12:33.992000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-consume.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-consume.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c	2021-03-11 17:12:33.992000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-imm.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ int v = 0;
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c	2021-03-11 17:12:33.992000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-int.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-int.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c	2021-03-11 17:12:33.992000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-long.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ long v = 0;
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c	2021-03-11 17:12:33.992000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-relaxed.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-relaxed.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c	2021-03-11 17:12:34.012000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-release.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-release.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c	2021-03-11 17:12:34.012000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-seq_cst.c	2021-03-11 17:13:30.648000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-seq_cst.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c
+--- a/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c	2021-03-11 17:12:34.168000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/atomic-op-short.c	2021-03-11 17:13:30.652000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "atomic-op-short.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c
+--- a/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c	2021-03-11 17:12:34.168000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/sync-comp-swap.c	2021-03-11 17:13:30.656000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2 -fno-ipa-icf" } */
+ 
+ #include "sync-comp-swap.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c
+--- a/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c	2021-03-11 17:12:34.168000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/sync-op-acquire.c	2021-03-11 17:13:30.656000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "sync-op-acquire.x"
+ 
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c
+--- a/gcc/testsuite/gcc.target/aarch64/sync-op-full.c	2021-03-11 17:12:34.168000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/sync-op-full.c	2021-03-11 17:13:30.656000000 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-march=armv8-a+nolse -O2 -mno-outline-atomics" } */
++/* { dg-options "-march=armv8-a+nolse -O2" } */
+ 
+ #include "sync-op-full.x"
+ 
diff --git a/revise-type-before-build-MULT.patch b/revise-type-before-build-MULT.patch
new file mode 100644
index 0000000..ddcb05e
--- /dev/null
+++ b/revise-type-before-build-MULT.patch
@@ -0,0 +1,80 @@
+diff -uprN a/gcc/testsuite/gcc.dg/affine-add-1.c b/gcc/testsuite/gcc.dg/affine-add-1.c
+--- a/gcc/testsuite/gcc.dg/affine-add-1.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/affine-add-1.c	2021-03-18 19:41:21.308000000 +0800
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O2" } */
++
++extern unsigned char a[][13][23][15][11];
++short b;
++int c, d;
++void e(int f, int g[][3][4][3]) {
++  for (char h = 0;; h = 2)
++    for (; f;)
++      for (short i;; i = d)
++        for (char j; j; j = c)
++          for (char k = 0; k < 4; k = g[h][b][i][j])
++            a[h][b][i][j][k] = 0;
++}
++unsigned char a[3][13][23][15][11];
++int main() {}
+diff -uprN a/gcc/testsuite/g++.dg/affine-add-1.C b/gcc/testsuite/g++.dg/affine-add-1.C
+--- a/gcc/testsuite/g++.dg/affine-add-1.C	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/g++.dg/affine-add-1.C	2021-03-18 19:40:28.432000000 +0800
+@@ -0,0 +1,33 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++#include <algorithm>
++
++extern int a[];
++extern unsigned char b[][151800];
++extern long long c[][20][23][22][11];
++char d, e;
++int f;
++unsigned g;
++long h;
++void i(unsigned long long s, unsigned short j) {
++  for (char k = 0; k < 12; k += 3)
++    for (short l = 0; l < 9; l = std::min(j, (unsigned short)4050683)) {
++      for (bool m(h); m < bool(~0); m = 1)
++        for (int t = 0; t < 4; t = std::min(s, (unsigned long long)40808803))
++          for (int n = 0; n < 9; n += e)
++            a[n] = 0;
++      for (char o = 0; o < g; o = 4)
++        for (bool p; p < f; p = d) {
++          for (long q(s); q < 4ULL; q += 1ULL)
++            b[k][o + q] = 0;
++          for (int r = 0; r < 11; r += ~0 || 0)
++            c[k][l][o][d][r] = 0;
++        }
++    }
++}
++int a[0];
++unsigned char b[3][151800];
++long long c[3][20][23][22][11];
++int main() {}
++
+diff -uprN a/gcc/tree-affine.c b/gcc/tree-affine.c
+--- a/gcc/tree-affine.c	2021-03-15 18:55:31.928000000 +0800
++++ b/gcc/tree-affine.c	2021-03-18 16:34:05.932000000 +0800
+@@ -184,9 +184,16 @@ aff_combination_add_elt (aff_tree *comb,
+   if (scale == 1)
+     elt = fold_convert (type, elt);
+   else
+-    elt = fold_build2 (MULT_EXPR, type,
+-		       fold_convert (type, elt),
+-		       wide_int_to_tree (type, scale));
++    {
++      if (POINTER_TYPE_P (TREE_TYPE (elt)))
++	{
++	  elt = copy_node (elt);
++	  TREE_TYPE (elt) = sizetype;
++	}
++      elt = fold_build2 (MULT_EXPR, type,
++			 fold_convert (type, elt),
++			 wide_int_to_tree (type, scale));
++    }
+ 
+   if (comb->rest)
+     comb->rest = fold_build2 (PLUS_EXPR, type, comb->rest,
-- 
Gitee