diff --git a/0097-Improve-non-loop-disambiguation.patch b/0097-Improve-non-loop-disambiguation.patch
new file mode 100644
index 0000000000000000000000000000000000000000..ae609d29474c7a05896508e06227959e405b2ad5
--- /dev/null
+++ b/0097-Improve-non-loop-disambiguation.patch
@@ -0,0 +1,101 @@
+From 6de2e0d400cbe46da482a672810c37b1832c408c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Thu, 25 Jul 2024 19:45:43 +0800
+Subject: [PATCH] Improve non-loop disambiguation
+
+This optimization is brought from https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=038b077689bb5310386b04d40a2cea234f01e6aa.
+
+When dr_may_alias_p is called without a loop context, it tries
+to use the tree-affine interface to calculate the difference
+between the two addresses and use that difference to check whether
+the gap between the accesses is known at compile time.  However, as the
+example in the PR shows, this doesn't expand SSA_NAMEs and so can easily
+be defeated by things like reassociation.
+
+One fix would have been to use aff_combination_expand to expand the
+SSA_NAMEs, but we'd then need some way of maintaining the associated
+cache.  This patch instead reuses the innermost_loop_behavior fields
+(which exist even when no loop context is provided).
+
+It might still be useful to do the aff_combination_expand thing too,
+if an example turns out to need it.
+---
+ gcc/common.opt                              |  4 ++++
+ gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c | 16 +++++++++++++++
+ gcc/tree-data-ref.cc                        | 22 +++++++++++++++++++++
+ 3 files changed, 42 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..75bf9c9c1 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3217,6 +3217,10 @@ ftree-loop-vectorize
+ Common Var(flag_tree_loop_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable loop vectorization on trees.
+ 
++falias-analysis-expand-ssa
++Common Var(flag_alias_analysis_expand_ssa) Init(0)
++Enable expanded SSA name analysis during alias analysis.
++
+ ftree-slp-vectorize
+ Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable basic block vectorization (SLP) on trees.
+diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
+new file mode 100644
+index 000000000..5ff8a8a62
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr106019.c
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-falias-analysis-expand-ssa" } */
++
++void f(double *p, long i)
++{
++    p[i+0] += 1;
++    p[i+1] += 1;
++}
++void g(double *p, long i)
++{
++    double *q = p + i;
++    q[0] += 1;
++    q[1] += 1;
++}
++
++/* { dg-final { scan-tree-dump-not "can't determine dependence" slp2 } } */
+diff --git a/gcc/tree-data-ref.cc b/gcc/tree-data-ref.cc
+index e6ae9e847..a05073c51 100644
+--- a/gcc/tree-data-ref.cc
++++ b/gcc/tree-data-ref.cc
+@@ -2993,6 +2993,28 @@ dr_may_alias_p (const struct data_reference *a, const struct data_reference *b,
+      disambiguation.  */
+   if (!loop_nest)
+     {
++      if (flag_alias_analysis_expand_ssa)
++	{
++	  tree tree_size_a = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (a)));
++	  tree tree_size_b = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (b)));
++
++	  if (DR_BASE_ADDRESS (a)
++	     && DR_BASE_ADDRESS (b)
++	     && operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b))
++	     && operand_equal_p (DR_OFFSET (a), DR_OFFSET (b))
++	     && poly_int_tree_p (tree_size_a)
++	     && poly_int_tree_p (tree_size_b)
++	     && !ranges_maybe_overlap_p (wi::to_widest (DR_INIT (a)),
++					 wi::to_widest (tree_size_a),
++					 wi::to_widest (DR_INIT (b)),
++					 wi::to_widest (tree_size_b)))
++	     {
++	       gcc_assert (integer_zerop (DR_STEP (a))
++	     		   && integer_zerop (DR_STEP (b)));
++	       return false;
++	     }
++	}
++
+       aff_tree off1, off2;
+       poly_widest_int size1, size2;
+       get_inner_reference_aff (DR_REF (a), &off1, &size1);
+-- 
+2.33.0
+
diff --git a/0098-CHREC-multiplication-and-undefined-overflow.patch b/0098-CHREC-multiplication-and-undefined-overflow.patch
new file mode 100644
index 0000000000000000000000000000000000000000..9f9a6b7410fd4a910d9eb899401a81f62500a797
--- /dev/null
+++ b/0098-CHREC-multiplication-and-undefined-overflow.patch
@@ -0,0 +1,265 @@
+From c4e4fef145c1e402f0558cc35f6c1ed0a08beffb Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Thu, 25 Jul 2024 20:16:52 +0800
+Subject: [PATCH] CHREC multiplication and undefined overflow
+
+This optimization is brought from https://gcc.gnu.org/pipermail/gcc-patches/2024-February/646531.html
+
+When folding a multiply CHRECs are handled like {a, +, b} * c
+is {a*c, +, b*c} but that isn't generally correct when overflow
+invokes undefined behavior.  The following uses unsigned arithmetic
+unless either a is zero or a and b have the same sign.
+
+I've used simple early outs for INTEGER_CSTs and otherwise use
+a range-query since we lack a tree_expr_nonpositive_p and
+get_range_pos_neg isn't a good fit.
+---
+ gcc/common.opt                          |  4 ++
+ gcc/testsuite/gcc.dg/pr68317.c          |  6 +-
+ gcc/testsuite/gcc.dg/torture/pr114074.c | 31 ++++++++++
+ gcc/tree-chrec.cc                       | 81 +++++++++++++++++++++----
+ gcc/tree-chrec.h                        |  2 +-
+ gcc/value-range.cc                      | 12 ++++
+ gcc/value-range.h                       |  2 +
+ 7 files changed, 123 insertions(+), 15 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/torture/pr114074.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..d3af3ba39 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1771,6 +1771,10 @@ floop-interchange
+ Common Var(flag_loop_interchange) Optimization
+ Enable loop interchange on trees.
+ 
++fchrec-mul-fold-strict-overflow
++Common Var(flag_chrec_mul_fold_strict_overflow) Init(0)
++Enable strict overflow handling during constant folding of multiply CHRECs.
++
+ floop-block
+ Common Alias(floop-nest-optimize)
+ Enable loop nest transforms.  Same as -floop-nest-optimize.
+diff --git a/gcc/testsuite/gcc.dg/pr68317.c b/gcc/testsuite/gcc.dg/pr68317.c
+index bd053a752..671a67d95 100644
+--- a/gcc/testsuite/gcc.dg/pr68317.c
++++ b/gcc/testsuite/gcc.dg/pr68317.c
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -fdisable-tree-ethread" } */
++/* { dg-options "-O2 -fdisable-tree-ethread -fchrec-mul-fold-strict-overflow" } */
+ 
+ /* Note: Threader will collapse loop.  */
+ 
+@@ -12,8 +12,8 @@ foo ()
+ {
+  int32_t index = 0;
+ 
+- for (index; index <= 10; index--) // expected warning here
++ for (index; index <= 10; index--) /* { dg-warning "iteration \[0-9\]+ invokes undefined behavior" } */
+    /* Result of the following multiply will overflow
+       when converted to signed int32_t.  */
+-   bar ((0xcafe + index) * 0xdead);  /* { dg-warning "iteration \[0-9\]+ invokes undefined behavior" } */
++   bar ((0xcafe + index) * 0xdead);
+ }
+diff --git a/gcc/testsuite/gcc.dg/torture/pr114074.c b/gcc/testsuite/gcc.dg/torture/pr114074.c
+new file mode 100644
+index 000000000..9a383d8fc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr114074.c
+@@ -0,0 +1,31 @@
++/* { dg-do run } */
++<<<<<<< HEAD
++/* { dg-options "-fchrec-mul-fold-strict-overflow" } */
++=======
++/* { dg-options "-fchrec-mul-fold-strict-overflow"" } */
++>>>>>>> 47092575e7696f5a21cf75284fe3d4feb0c813ab
++int a, b, d;
++
++__attribute__((noipa)) void
++foo (void)
++{
++  ++d;
++}
++
++int
++main ()
++{
++  for (a = 0; a > -3; a -= 2)
++    {
++      int c = a;
++      b = __INT_MAX__ - 3000;
++      a = ~c * b;
++      foo ();
++      if (!a)
++	break;
++      a = c;
++    }
++  if (d != 2)
++    __builtin_abort ();
++  return 0;
++}
+diff --git a/gcc/tree-chrec.cc b/gcc/tree-chrec.cc
+index c44cea754..3323901bc 100644
+--- a/gcc/tree-chrec.cc
++++ b/gcc/tree-chrec.cc
+@@ -38,6 +38,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "gimple.h"
+ #include "tree-ssa-loop.h"
+ #include "dumpfile.h"
++#include "value-range.h"
++#include "value-query.h"
+ #include "tree-scalar-evolution.h"
+ 
+ /* Extended folder for chrecs.  */
+@@ -404,6 +406,13 @@ chrec_fold_multiply (tree type,
+       || automatically_generated_chrec_p (op1))
+     return chrec_fold_automatically_generated_operands (op0, op1);
+ 
++  if (flag_chrec_mul_fold_strict_overflow)
++    {
++      if (TREE_CODE (op0) != POLYNOMIAL_CHREC
++	  && TREE_CODE (op1) == POLYNOMIAL_CHREC)
++	std::swap (op0, op1);
++    }
++
+   switch (TREE_CODE (op0))
+     {
+     case POLYNOMIAL_CHREC:
+@@ -428,10 +437,53 @@ chrec_fold_multiply (tree type,
+ 	  if (integer_zerop (op1))
+ 	    return build_int_cst (type, 0);
+ 
+-	  return build_polynomial_chrec
+-	    (CHREC_VARIABLE (op0),
+-	     chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
+-	     chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
++	  if (flag_chrec_mul_fold_strict_overflow)
++	    {
++	      /* When overflow is undefined and CHREC_LEFT/RIGHT do not have the
++		 same sign or CHREC_LEFT is zero then folding the multiply into
++		 the addition does not have the same behavior on overflow.  Use
++		 unsigned arithmetic in that case.  */
++	      value_range rl, rr;
++	      if (!ANY_INTEGRAL_TYPE_P (type)
++		  || TYPE_OVERFLOW_WRAPS (type)
++		  || integer_zerop (CHREC_LEFT (op0))
++		  || (TREE_CODE (CHREC_LEFT (op0)) == INTEGER_CST
++		  && TREE_CODE (CHREC_RIGHT (op0)) == INTEGER_CST
++		  && (tree_int_cst_sgn (CHREC_LEFT (op0))
++		      == tree_int_cst_sgn (CHREC_RIGHT (op0))))
++		  || (get_range_query (cfun)->range_of_expr (rl, CHREC_LEFT (op0))
++		  && !rl.undefined_p ()
++		  && (rl.nonpositive_p () || rl.nonnegative_p ())
++		  && get_range_query (cfun)->range_of_expr (rr,
++							CHREC_RIGHT (op0))
++		  && !rr.undefined_p ()
++		  && ((rl.nonpositive_p () && rr.nonpositive_p ())
++		  || (rl.nonnegative_p () && rr.nonnegative_p ()))))
++		{
++		  tree left = chrec_fold_multiply (type, CHREC_LEFT (op0), op1);
++		  tree right = chrec_fold_multiply (type, CHREC_RIGHT (op0), op1);
++		  return build_polynomial_chrec (CHREC_VARIABLE (op0), left, right);
++		}
++	      else
++		{
++		  tree utype = unsigned_type_for (type);
++		  tree uop1 = chrec_convert_rhs (utype, op1);
++		  tree uleft0 = chrec_convert_rhs (utype, CHREC_LEFT (op0));
++		  tree uright0 = chrec_convert_rhs (utype, CHREC_RIGHT (op0));
++		  tree left = chrec_fold_multiply (utype, uleft0, uop1);
++		  tree right = chrec_fold_multiply (utype, uright0, uop1);
++		  tree tem = build_polynomial_chrec (CHREC_VARIABLE (op0),
++							left, right);
++		  return chrec_convert_rhs (type, tem);
++		}
++	     }
++	   else
++	     {
++	       return build_polynomial_chrec
++		  (CHREC_VARIABLE (op0),
++		   chrec_fold_multiply (type, CHREC_LEFT (op0), op1),
++		   chrec_fold_multiply (type, CHREC_RIGHT (op0), op1));
++	     }
+ 	}
+ 
+     CASE_CONVERT:
+@@ -449,13 +501,20 @@ chrec_fold_multiply (tree type,
+       switch (TREE_CODE (op1))
+ 	{
+ 	case POLYNOMIAL_CHREC:
+-	  gcc_checking_assert
+-	    (!chrec_contains_symbols_defined_in_loop (op1,
+-						      CHREC_VARIABLE (op1)));
+-	  return build_polynomial_chrec
+-	    (CHREC_VARIABLE (op1),
+-	     chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
+-	     chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
++	  if (flag_chrec_mul_fold_strict_overflow)
++	    {
++	      gcc_unreachable ();
++	    }
++	  else
++	   {
++	     gcc_checking_assert
++		(!chrec_contains_symbols_defined_in_loop (op1,
++				CHREC_VARIABLE (op1)));
++	     return build_polynomial_chrec
++		(CHREC_VARIABLE (op1),
++		 chrec_fold_multiply (type, CHREC_LEFT (op1), op0),
++		 chrec_fold_multiply (type, CHREC_RIGHT (op1), op0));
++	    }
+ 
+ 	CASE_CONVERT:
+ 	  if (tree_contains_chrecs (op1, NULL))
+diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h
+index fcf41710d..cdc97d5d9 100644
+--- a/gcc/tree-chrec.h
++++ b/gcc/tree-chrec.h
+@@ -63,7 +63,7 @@ extern tree chrec_fold_plus (tree, tree, tree);
+ extern tree chrec_fold_minus (tree, tree, tree);
+ extern tree chrec_fold_multiply (tree, tree, tree);
+ extern tree chrec_convert (tree, tree, gimple *, bool = true, tree = NULL);
+-extern tree chrec_convert_rhs (tree, tree, gimple *);
++extern tree chrec_convert_rhs (tree, tree, gimple * = NULL);
+ extern tree chrec_convert_aggressive (tree, tree, bool *);
+ 
+ /* Operations.  */
+diff --git a/gcc/value-range.cc b/gcc/value-range.cc
+index 000bbcf89..a1dc10a24 100644
+--- a/gcc/value-range.cc
++++ b/gcc/value-range.cc
+@@ -656,6 +656,18 @@ irange::contains_p (tree cst) const
+ 
+   return false;
+ }
++bool
++irange::nonnegative_p () const
++{
++  return wi::ge_p (lower_bound (), 0, TYPE_SIGN (type ()));
++}
++
++bool
++irange::nonpositive_p () const
++{
++  return wi::le_p (upper_bound (), 0, TYPE_SIGN (type ()));
++}
++
+ 
+ 
+ /* Normalize addresses into constants.  */
+diff --git a/gcc/value-range.h b/gcc/value-range.h
+index d4cba22d5..2dc0907de 100644
+--- a/gcc/value-range.h
++++ b/gcc/value-range.h
+@@ -69,6 +69,8 @@ public:
+   bool varying_p () const;
+   bool singleton_p (tree *result = NULL) const;
+   bool contains_p (tree) const;
++  bool nonnegative_p () const;
++  bool nonpositive_p () const;
+ 
+   // In-place operators.
+   void union_ (const irange &);
+-- 
+2.33.0
+
diff --git a/0099-Enable-Transposed-SLP.patch b/0099-Enable-Transposed-SLP.patch
new file mode 100644
index 0000000000000000000000000000000000000000..b4e8b24b669790890da83fa4966a18efb18f90ae
--- /dev/null
+++ b/0099-Enable-Transposed-SLP.patch
@@ -0,0 +1,5624 @@
+From 0dd3b8532f35486bd5db2c71342c8dfed4c0893a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Thu, 25 Jul 2024 17:25:23 +0800
+Subject: [PATCH] Enable Transposed SLP.
+
+---
+ gcc/common.opt                          |    4 +
+ gcc/testsuite/gcc.dg/vect/transpose-1.c |   53 +
+ gcc/testsuite/gcc.dg/vect/transpose-2.c |   50 +
+ gcc/testsuite/gcc.dg/vect/transpose-3.c |   54 +
+ gcc/testsuite/gcc.dg/vect/transpose-4.c |   53 +
+ gcc/testsuite/gcc.dg/vect/transpose-5.c |   74 ++
+ gcc/testsuite/gcc.dg/vect/transpose-6.c |   67 +
+ gcc/testsuite/gcc.dg/vect/transpose-7.c |   53 +
+ gcc/testsuite/gcc.dg/vect/transpose-8.c |   53 +
+ gcc/testsuite/gcc.dg/vect/vect.exp      |    7 +
+ gcc/tree-loop-distribution.cc           | 1464 ++++++++++++++++++++-
+ gcc/tree-vect-data-refs.cc              |  237 ++++
+ gcc/tree-vect-loop.cc                   |   42 +-
+ gcc/tree-vect-patterns.cc               |    4 +-
+ gcc/tree-vect-slp.cc                    | 1553 ++++++++++++++++++++---
+ gcc/tree-vect-stmts.cc                  |  973 +++++++++++++-
+ gcc/tree-vectorizer.h                   |   96 +-
+ 17 files changed, 4648 insertions(+), 189 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-8.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..5958c4e0b 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3221,6 +3221,10 @@ ftree-slp-vectorize
+ Common Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable basic block vectorization (SLP) on trees.
+ 
++ftree-slp-transpose-vectorize
++Common Var(flag_tree_slp_transpose_vectorize) Optimization Init(0)
++Enable basic block vectorization (SLP) for transposed stores and loads on trees.
++
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+ -fvect-cost-model=[unlimited|dynamic|cheap|very-cheap]	Specifies the cost model for vectorization.
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-1.c b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+new file mode 100644
+index 000000000..8237a8b9e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 16;
++  int i2 = 8;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 2;
++	input2[i] = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1264)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-2.c b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+new file mode 100644
+index 000000000..fdf4dbd96
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+@@ -0,0 +1,50 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 8
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned short c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i] + c2[i] + c3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 5;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1440)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-3.c b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+new file mode 100644
+index 000000000..e492e3717
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse -fno-tree-fre" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned short *pix1, int i_pix1, unsigned short *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned short input1[M];
++  unsigned short input2[M];
++  int i1 = 8;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1680)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-4.c b/gcc/testsuite/gcc.dg/vect/transpose-4.c
+new file mode 100644
+index 000000000..0b4adea9b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-4.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned *pix1, int i_pix1, unsigned *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned input1[M];
++  unsigned input2[M];
++  int i1 = 12;
++  int i2 = 6;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 7;
++	input2[i] = i * 3;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 3616)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-5.c b/gcc/testsuite/gcc.dg/vect/transpose-5.c
+new file mode 100644
+index 000000000..040dedf1b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-5.c
+@@ -0,0 +1,74 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-dse -fno-tree-fre" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include <math.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++#define eps 1e-8
++
++double foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  unsigned a0[N];
++  unsigned a1[N];
++  unsigned a2[N];
++  unsigned a3[N];
++
++  int b0[N];
++  int b1[N];
++  int b2[N];
++  int b3[N];
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      a0[i] = (pix1[0] - pix2[0]) + ((pix1[4] + pix2[4]) << 16);
++      a1[i] = (pix1[1] - pix2[1]) + ((pix1[5] + pix2[5]) << 16);
++      a2[i] = (pix1[2] - pix2[2]) + ((pix1[6] + pix2[6]) << 16);
++      a3[i] = (pix1[3] - pix2[3]) + ((pix1[7] + pix2[7]) << 16);
++    }
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      b0[i] = (pix1[0] - pix2[0]) + (pix1[4] + pix2[4]);
++      b1[i] = (pix1[1] - pix2[1]) + (pix1[5] + pix2[5]);
++      b2[i] = (pix1[2] - pix2[2]) + (pix1[6] + pix2[6]);
++      b3[i] = (pix1[3] - pix2[3]) + (pix1[7] + pix2[7]);
++    }
++
++  double sum = 0;
++  for (int i = 0; i < N; i++)
++    {
++      sum += a0[i] + a1[i] + a2[i] + a3[i] + b0[i] + b1[i] + b2[i] + b3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 8;
++  int i2 = 3;
++  unsigned char m = 2;
++  unsigned short n = 12;
++  float t = 3.0;
++  double k = 4.2;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 6;
++	input2[i] = i * 3;
++    }
++  double sum = foo (input1, i1, input2, i2);
++  if (fabs (sum - 78648144) > eps)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-6.c b/gcc/testsuite/gcc.dg/vect/transpose-6.c
+new file mode 100644
+index 000000000..3e134ac02
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-6.c
+@@ -0,0 +1,67 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_float } */
++#include <stdio.h>
++#include <stdlib.h>
++#include <math.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++#define eps 1e-8
++
++float foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  unsigned a0[N];
++  unsigned a1[N];
++  unsigned a2[N];
++  unsigned a3[N];
++
++  float c0[N];
++  float c1[N];
++  float c2[N];
++  float c3[N];
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      a0[i] = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
++      a1[i] = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
++      a2[i] = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
++      a3[i] = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
++
++      c0[i] = (pix1[0] * pix2[0]) + (pix1[4] * pix2[4]);
++      c1[i] = (pix1[1] * pix2[1]) + (pix1[5] * pix2[5]);
++      c2[i] = (pix1[2] * pix2[2]) + (pix1[6] * pix2[6]);
++      c3[i] = (pix1[3] * pix2[3]) + (pix1[7] * pix2[7]);
++    }
++
++  float sum = 0;
++  for (int i = 0; i < N; i++)
++    {
++      sum += a0[i] + a1[i] + a2[i] + a3[i] + c0[i] + c1[i] + c2[i] + c3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 18;
++  int i2 = 6;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i * 2;
++    }
++  float sum = foo (input1, i1, input2, i2);
++  if (fabs (sum - 106041168) > eps) 
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-7.c b/gcc/testsuite/gcc.dg/vect/transpose-7.c
+new file mode 100644
+index 000000000..8ba1b1b6d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-7.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize -fno-tree-dse" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 16
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned char c0[N], c1[N];
++  for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++    }
++  for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++   }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 6;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 5;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 3280)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-8.c b/gcc/testsuite/gcc.dg/vect/transpose-8.c
+new file mode 100644
+index 000000000..a154f012a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-8.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 32
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned char c0[N], c1[N];
++  for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++    }
++  for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++   }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 6;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 5;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 7584)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp
+index dcaef1e0a..ae5212411 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect.exp
++++ b/gcc/testsuite/gcc.dg/vect/vect.exp
+@@ -117,6 +117,13 @@ et-dg-runtest dg-runtest [lsort \
+ 	[glob -nocomplain $srcdir/$subdir/no-vfa-*.\[cS\]]] \
+ 	"" $DEFAULT_VECTCFLAGS
+ 
++# -ftree-slp-transpose-vectorize SLP tests
++set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
++lappend VECT_SLP_CFLAGS "-ftree-slp-transpose-vectorize"
++et-dg-runtest dg-runtest [lsort \
++	[glob -nocomplain $srcdir/$subdir/transpose-*.\[cS\]]] \
++	"" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3"
++
+ # -ffast-math tests
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+ lappend DEFAULT_VECTCFLAGS "-ffast-math"
+diff --git a/gcc/tree-loop-distribution.cc b/gcc/tree-loop-distribution.cc
+index 606eb05e6..8d118e987 100644
+--- a/gcc/tree-loop-distribution.cc
++++ b/gcc/tree-loop-distribution.cc
+@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3.  If not see
+    |   D(I) = A(I-1)*E
+    |ENDDO
+ 
++   If an unvectorizable loop has grouped loads, and calculations from grouped
++   loads are isomorphic, build temp arrays using stmts where isomorphic
++   calculations end.  Afer distribution, the partition built from temp
++   arrays can be vectorized in pass SLP after loop unrolling.  For example,
++
++   |DO I = 1, N
++   |    A = FOO (ARG_1);
++   |    B = FOO (ARG_2);
++   |    C = BAR_0 (A);
++   |    D = BAR_1 (B);
++   |ENDDO
++
++   is transformed to
++
++   |DO I = 1, N
++   |    J = FOO (ARG_1);
++   |    K = FOO (ARG_2);
++   |    X[I] = J;
++   |    Y[I] = K;
++   |    A = X[I];
++   |    B = Y[I];
++   |    C = BAR_0 (A);
++   |    D = BAR_1 (B);
++   |ENDDO
++
++   and is then distributed to
++
++   |DO I = 1, N
++   |    J = FOO (ARG_1);
++   |    K = FOO (ARG_2);
++   |    X[I] = J;
++   |    Y[I] = K;
++   |ENDDO
++
++   |DO I = 1, N
++   |    A = X[I];
++   |    B = Y[I];
++   |    C = BAR_0 (A);
++   |    D = BAR_1 (B);
++   |ENDDO
++
+    Loop distribution is the dual of loop fusion.  It separates statements
+    of a loop (or loop nest) into multiple loops (or loop nests) with the
+    same loop header.  The major goal is to separate statements which may
+@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3.  If not see
+ 
+      1) Seed partitions with specific type statements.  For now we support
+ 	two types seed statements: statement defining variable used outside
+-	of loop; statement storing to memory.
++	of loop; statement storing to memory.  Moreover, for unvectorizable
++	loops, we try to find isomorphic stmts from grouped load and build
++	temp arrays as new seed statements.
+      2) Build reduced dependence graph (RDG) for loop to be distributed.
+ 	The vertices (RDG:V) model all statements in the loop and the edges
+ 	(RDG:E) model flow and control dependencies between statements.
+@@ -90,6 +133,8 @@ along with GCC; see the file COPYING3.  If not see
+ 	data reuse.  */
+ 
+ #include "config.h"
++#define INCLUDE_MAP
++#define INCLUDE_ALGORITHM
+ #include "system.h"
+ #include "coretypes.h"
+ #include "backend.h"
+@@ -115,6 +160,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-vectorizer.h"
+ #include "tree-eh.h"
+ #include "gimple-fold.h"
++#include "optabs-tree.h"
+ #include "tree-affine.h"
+ #include "intl.h"
+ #include "rtl.h"
+@@ -188,6 +234,52 @@ struct rdg_vertex
+ #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
+ #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
+ 
++/* Results of isomorphic group analysis.  */
++#define UNINITIALIZED	(0)
++#define ISOMORPHIC	(1)
++#define HETEROGENEOUS	(1 << 1)
++#define UNCERTAIN	(1 << 2)
++
++/* Information of a stmt while analyzing isomorphic use in group.  */
++
++typedef struct _group_info
++{
++  gimple *stmt;
++
++  /* True if stmt can be a cut point.  */
++  bool cut_point;
++
++  /* For use_stmt with two rhses, one of which is the lhs of stmt.
++     If the other is unknown to be isomorphic, mark it uncertain.  */
++  bool uncertain;
++
++  /* Searching of isomorphic stmt reaches heterogeneous groups or reaches
++     MEM stmts.  */
++  bool done;
++
++  _group_info ()
++    {
++      stmt = NULL;
++      cut_point = false;
++      uncertain = false;
++      done = false;
++    }
++} *group_info;
++
++/* PAIR of cut points and corresponding profit.  */
++typedef std::pair<vec<gimple *> *, int> stmts_profit;
++
++/* MAP of vector factor VF and corresponding stmts_profit PAIR.  */
++typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map;
++
++/* PAIR of group_num and iteration_num.  We consider rhses from the same
++   group and interation are isomorphic.  */
++typedef std::pair<unsigned, unsigned> group_iteration;
++
++/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and
++   the iteration_num when we insert this stmt to this map.  */
++typedef std::map<tree, group_iteration> isomer_stmt_lhs;
++
+ /* Data dependence type.  */
+ 
+ enum rdg_dep_type
+@@ -600,13 +692,14 @@ class loop_distribution
+   /* Returns true when PARTITION1 and PARTITION2 access the same memory
+      object in RDG.  */
+   bool share_memory_accesses (struct graph *rdg,
+-			      partition *partition1, partition *partition2);
++			      partition *partition1, partition *partition2,
++			      hash_set<tree> *excluded_arrays);
+ 
+   /* For each seed statement in STARTING_STMTS, this function builds
+      partition for it by adding depended statements according to RDG.
+      All partitions are recorded in PARTITIONS.  */
+   void rdg_build_partitions (struct graph *rdg,
+-			     vec<gimple *> starting_stmts,
++			     vec<gimple *> *starting_stmts,
+ 			     vec<partition *> *partitions);
+ 
+   /* Compute partition dependence created by the data references in DRS1
+@@ -643,15 +736,50 @@ class loop_distribution
+ 
+   /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
+      ALIAS_DDRS contains ddrs which need runtime alias check.  */
+-  void finalize_partitions (class loop *loop, vec<struct partition *>
+-			    *partitions, vec<ddr_p> *alias_ddrs);
++  void finalize_partitions (class loop *loop,
++			    vec<struct partition *> *partitions,
++			    vec<ddr_p> *alias_ddrs, bitmap producers);
++
++  /* Analyze loop form and if it's vectorizable to decide if we need to
++     insert temp arrays to distribute it.  */
++  bool may_insert_temp_arrays (loop_p loop, struct graph *&rdg,
++			       control_dependences *cd);
++
++  /* Reset gimple_uid of GIMPLE_DEBUG and GIMPLE_LABEL to -1.  */
++  void reset_gimple_uid (loop_p loop);
++
++  bool check_loop_vectorizable (loop_p loop);
++
++  inline void rebuild_rdg (loop_p loop, struct graph *&rdg,
++			   control_dependences *cd);
++
++  /* If loop is not distributed, remove inserted temp arrays.  */
++  void remove_insertion (loop_p loop, struct graph *flow_only_rdg,
++			 bitmap producers, struct partition *partition);
++
++  /* Insert temp arrays if isomorphic computation exists.  Temp arrays will be
++     regarded as SEED_STMTS for building partitions in succeeding processes.  */
++  bool insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
++			   hash_set<tree> *tmp_array_vars, bitmap producers);
++
++  void build_producers (loop_p loop, bitmap producers,
++			vec<gimple *> &transformed);
++
++  void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv,
++		     bitmap cut_points, hash_set <tree> *tmp_array_vars,
++		     bitmap producers);
++
++  /* Fuse PARTITIONS built from inserted temp arrays into one partition,
++     fuse the rest into another.  */
++  void merge_remaining_partitions (vec<struct partition *> *partitions,
++				   bitmap producers);
+ 
+   /* Distributes the code from LOOP in such a way that producer statements
+      are placed before consumer statements.  Tries to separate only the
+      statements from STMTS into separate loops.  Returns the number of
+      distributed loops.  Set NB_CALLS to number of generated builtin calls.
+      Set *DESTROY_P to whether LOOP needs to be destroyed.  */
+-  int distribute_loop (class loop *loop, const vec<gimple *> &stmts,
++  int distribute_loop (class loop *loop, vec<gimple *> &stmts,
+ 		       control_dependences *cd, int *nb_calls, bool *destroy_p,
+ 		       bool only_patterns_p);
+ 
+@@ -1893,7 +2021,8 @@ loop_distribution::classify_partition (loop_p loop,
+ 
+ bool
+ loop_distribution::share_memory_accesses (struct graph *rdg,
+-		       partition *partition1, partition *partition2)
++		       partition *partition1, partition *partition2,
++		       hash_set <tree> *excluded_arrays)
+ {
+   unsigned i, j;
+   bitmap_iterator bi, bj;
+@@ -1927,7 +2056,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg,
+ 	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+ 	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+ 	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+-	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
++	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)
++	      /* An exception, if PARTITION1 and PARTITION2 contain the
++		 temp array we inserted, do not merge them.  */
++	      && !excluded_arrays->contains (DR_REF (dr1)))
+ 	    return true;
+ 	}
+     }
+@@ -1941,14 +2073,14 @@ loop_distribution::share_memory_accesses (struct graph *rdg,
+ 
+ void
+ loop_distribution::rdg_build_partitions (struct graph *rdg,
+-					 vec<gimple *> starting_stmts,
++					 vec<gimple *> *starting_stmts,
+ 					 vec<partition *> *partitions)
+ {
+   auto_bitmap processed;
+   int i;
+   gimple *stmt;
+ 
+-  FOR_EACH_VEC_ELT (starting_stmts, i, stmt)
++  FOR_EACH_VEC_ELT (*starting_stmts, i, stmt)
+     {
+       int v = rdg_vertex_for_stmt (rdg, stmt);
+ 
+@@ -2912,13 +3044,47 @@ fuse_memset_builtins (vec<struct partition *> *partitions)
+     }
+ }
+ 
++void
++loop_distribution::merge_remaining_partitions
++			(vec<struct partition *> *partitions,
++			 bitmap producers)
++{
++  struct partition *partition = NULL;
++  struct partition *p1 = NULL, *p2 = NULL;
++  for (unsigned i = 0; partitions->iterate (i, &partition); i++)
++    {
++      if (bitmap_intersect_p (producers, partition->stmts))
++	{
++	  if (p1 == NULL)
++	    {
++	      p1 = partition;
++	      continue;
++	    }
++	  partition_merge_into (NULL, p1, partition, FUSE_FINALIZE);
++	}
++      else
++	{
++	  if (p2 == NULL)
++	    {
++	      p2 = partition;
++	      continue;
++	    }
++	  partition_merge_into (NULL, p2, partition, FUSE_FINALIZE);
++	}
++      partitions->unordered_remove (i);
++      partition_free (partition);
++      i--;
++    }
++}
++
+ void
+ loop_distribution::finalize_partitions (class loop *loop,
+ 					vec<struct partition *> *partitions,
+-					vec<ddr_p> *alias_ddrs)
++					vec<ddr_p> *alias_ddrs,
++					bitmap producers)
+ {
+   unsigned i;
+-  struct partition *partition, *a;
++  struct partition *partition;
+ 
+   if (partitions->length () == 1
+       || alias_ddrs->length () > 0)
+@@ -2950,13 +3116,7 @@ loop_distribution::finalize_partitions (class loop *loop,
+       || (loop->inner == NULL
+ 	  && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
+     {
+-      a = (*partitions)[0];
+-      for (i = 1; partitions->iterate (i, &partition); ++i)
+-	{
+-	  partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
+-	  partition_free (partition);
+-	}
+-      partitions->truncate (1);
++      merge_remaining_partitions (partitions, producers);
+     }
+ 
+   /* Fuse memset builtins if possible.  */
+@@ -2964,6 +3124,1216 @@ loop_distribution::finalize_partitions (class loop *loop,
+     fuse_memset_builtins (partitions);
+ }
+ 
++/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function
++   vect_analyze_loop, reset them to -1.  */
++
++void
++loop_distribution::reset_gimple_uid (loop_p loop)
++{
++  basic_block *bbs = get_loop_body_in_custom_order (loop, this,
++						    bb_top_order_cmp_r);
++  for (int i = 0; i < int (loop->num_nodes); i++)
++    {
++      basic_block bb = bbs[i];
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  gimple *stmt = gsi_stmt (gsi);
++	  if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL)
++	    gimple_set_uid (stmt, -1);
++	}
++    }
++  free (bbs);
++}
++
++bool
++loop_distribution::check_loop_vectorizable (loop_p loop)
++{
++  vec_info_shared shared;
++  vect_analyze_loop (loop, &shared, true);
++  loop_vec_info vinfo = loop_vec_info_for_loop (loop);
++  reset_gimple_uid (loop);
++  if (vinfo == NULL)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file,
++		 "Loop %d no temp array insertion: bad data access pattern,"
++		 " unable to generate loop_vinfo.\n", loop->num);
++      return false;
++    }
++  if (vinfo->vectorizable)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Loop %d no temp array insertion: original loop"
++			    " can be vectorized without distribution.\n",
++			    loop->num);
++      delete vinfo;
++      loop->aux = NULL;
++      return false;
++    }
++  if (vinfo->grouped_loads.length () == 0)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Loop %d no temp array insertion: original loop"
++			    " has no grouped loads.\n" , loop->num);
++      delete vinfo;
++      loop->aux = NULL;
++      return false;
++    }
++  return true;
++}
++
++inline void
++loop_distribution::rebuild_rdg (loop_p loop, struct graph *&rdg,
++				control_dependences *cd)
++{
++  free_rdg (rdg);
++  rdg = build_rdg (loop, cd);
++  gcc_checking_assert (rdg != NULL);
++}
++
++bool
++loop_distribution::may_insert_temp_arrays (loop_p loop, struct graph *&rdg,
++					   control_dependences *cd)
++{
++  if (!(flag_tree_slp_transpose_vectorize && flag_tree_loop_vectorize))
++    return false;
++
++  /* Only loops with two basic blocks HEADER and LATCH are supported.  HEADER
++     is the main body of a LOOP and LATCH is the basic block that controls the
++     LOOP execution.  Size of temp array is determined by loop execution time,
++     so it must be a const.  */
++  tree loop_extent = number_of_latch_executions (loop);
++  if (loop->inner != NULL || loop->num_nodes > 2
++      || TREE_CODE (loop_extent) != INTEGER_CST)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Loop %d: no temp array insertion: bad loop"
++			    " form.\n", loop->num);
++      return false;
++    }
++
++  if (loop->dont_vectorize)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Loop %d: no temp array insertion: this loop"
++			    " should never be vectorized.\n",
++			    loop->num);
++      return false;
++    }
++
++  /* Do not distribute a LOOP that is able to be vectorized without
++     distribution.  */
++  if (!check_loop_vectorizable (loop))
++    {
++      rebuild_rdg (loop, rdg, cd);
++      return false;
++    }
++
++  rebuild_rdg (loop, rdg, cd);
++  return true;
++}
++
++/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n.
++   Otherwise, return 0.  */
++
++static unsigned
++get_max_vf (loop_vec_info vinfo)
++{
++  unsigned size = 0;
++  unsigned max = 0;
++  stmt_vec_info stmt_info;
++  unsigned i = 0;
++  FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
++    {
++      size = stmt_info->size;
++      if (!pow2p_hwi (size))
++	return 0;
++      max = size > max ? size : max;
++    }
++  return max;
++}
++
++/* Convert grouped_loads from linked list to vector with length vf.  Init
++   group_info of each stmt in the same group and put then into a vector.  And
++   these vectors consist WORKLISTS.  We will re-analyze a group if it is
++   uncertain, so we regard WORKLISTS as a circular queue.  */
++
++static unsigned
++build_queue (loop_vec_info vinfo, unsigned vf,
++	     vec<vec<group_info> *> &worklists)
++{
++  stmt_vec_info stmt_info;
++  unsigned i = 0;
++  group_info ginfo = NULL;
++  vec<group_info> *worklist = NULL;
++  FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
++    {
++      unsigned group_size = stmt_info->size;
++      stmt_vec_info c_stmt_info = stmt_info;
++      bool succ = true;
++      while (group_size >= vf)
++	{
++	  vec_alloc (worklist, vf);
++	  for (unsigned j = 0; j < vf; ++j)
++	    {
++	      if (c_stmt_info == NULL)
++		{
++		  succ = false;
++		  break;
++		}
++	      ginfo = new _group_info ();
++	      ginfo->stmt = c_stmt_info->stmt;
++	      worklist->safe_push (ginfo);
++	      c_stmt_info = c_stmt_info->next_element;
++	    }
++	  if (!succ)
++	    {
++	      unsigned k = 0;
++	      ginfo = NULL;
++	      FOR_EACH_VEC_ELT (*worklist, k, ginfo)
++		delete ginfo;
++	      vec_free (worklist);
++	      break;
++	    }
++	  worklists.safe_push (worklist);
++	  group_size -= vf;
++	}
++    }
++  return worklists.length ();
++}
++
++static bool
++check_same_oprand_type (tree op1, tree op2)
++{
++  tree type1 = TREE_TYPE (op1);
++  tree type2 = TREE_TYPE (op2);
++  if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE)
++    return false;
++
++  return (TREE_CODE (type1) == TREE_CODE (type2)
++	  && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2)
++	  && TYPE_PRECISION (type1) == TYPE_PRECISION (type2));
++}
++
++static bool
++bit_field_p (gimple *stmt)
++{
++  unsigned i = 0;
++  auto_vec<data_reference_p, 2> datarefs_vec;
++  data_reference_p dr;
++  if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec))
++    return true;
++
++  FOR_EACH_VEC_ELT (datarefs_vec, i, dr)
++    {
++      if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
++	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
++	return true;
++    }
++  return false;
++}
++
++static inline bool
++shift_operation (enum tree_code op)
++{
++  return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR
++	 || op == RROTATE_EXPR;
++}
++
++/* Return relationship between USE_STMT and the first use_stmt of the group.
++   RHS1 is the lhs of stmt recorded in group_info.  If another rhs of use_stmt
++   is not a constant, return UNCERTAIN and re-check it later.  */
++
++static unsigned
++check_isomorphic (gimple *use_stmt, gimple *first,
++		  tree rhs1, vec<tree> &hetero_lhs)
++{
++  /* Check same operation.  */
++  enum tree_code rhs_code_first = gimple_assign_rhs_code (first);
++  enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt);
++  if (rhs_code_first != rhs_code_current)
++    return HETEROGENEOUS;
++
++  /* For shift operations, oprands should be equal.  */
++  if (shift_operation (rhs_code_current))
++    {
++      tree shift_op_first = gimple_assign_rhs2 (first);
++      tree shift_op_current = gimple_assign_rhs2 (use_stmt);
++      if (!operand_equal_p (shift_op_first, shift_op_current, 0)
++	  || !TREE_CONSTANT (shift_op_first))
++	return HETEROGENEOUS;
++
++      return ISOMORPHIC;
++    }
++  /* Type convertion expr or assignment.  */
++  if (gimple_num_ops (first) == 2)
++    return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR
++	      || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS;
++
++  /* We find USE_STMT from lhs of a stmt, denote it as rhs1 of USE_STMT and
++     the other one as rhs2.  Check if define-stmt of current rhs2 is isomorphic
++     with define-stmt of rhs2 in the first USE_STMT at this group.  */
++  tree rhs2_first = gimple_assign_rhs1 (use_stmt) == rhs1
++		    ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first);
++  tree rhs2_curr = gimple_assign_rhs1 (use_stmt) == rhs1
++	      ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt);
++
++  if (check_same_oprand_type (rhs2_first, rhs2_curr))
++    {
++      if (TREE_CONSTANT (rhs2_curr))
++	return ISOMORPHIC;
++      else if (hetero_lhs.contains (rhs2_curr))
++	return HETEROGENEOUS;
++
++      /* Provisionally set the stmt as uncertain and analyze the whole group
++	 in function CHECK_UNCERTAIN later if all use_stmts are uncertain.  */
++      return UNCERTAIN;
++    }
++  return HETEROGENEOUS;
++}
++
++static bool
++unsupported_operations (gimple *stmt)
++{
++  enum tree_code code = gimple_assign_rhs_code (stmt);
++  return code == COND_EXPR;
++}
++
++/* Check if the single use_stmt of STMT is isomorphic with the first one's
++   use_stmt in current group.  */
++
++static unsigned
++check_use_stmt (group_info elmt, gimple *&first,
++		vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs)
++{
++  if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN)
++    return HETEROGENEOUS;
++  use_operand_p dummy;
++  tree lhs = gimple_assign_lhs (elmt->stmt);
++  gimple *use_stmt = NULL;
++  single_imm_use (lhs, &dummy, &use_stmt);
++  /* STMTs with three rhs are not supported, e.g., GIMPLE_COND.  */
++  if (use_stmt == NULL || gimple_code (use_stmt) != GIMPLE_ASSIGN
++      || unsupported_operations (use_stmt) || bit_field_p (use_stmt))
++    return HETEROGENEOUS;
++  tmp_stmts.safe_push (use_stmt);
++  if (first == NULL)
++    {
++      first = use_stmt;
++      return UNINITIALIZED;
++    }
++  /* Check if current use_stmt and the first menber's use_stmt in the group
++     are of the same type.  */
++  tree first_lhs = gimple_assign_lhs (first);
++  tree curr_lhs = gimple_assign_lhs (use_stmt);
++  if (!check_same_oprand_type (first_lhs, curr_lhs))
++    return HETEROGENEOUS;
++  return check_isomorphic (use_stmt, first, lhs, hetero_lhs);
++}
++
++/* Replace stmt field in group with stmts in TMP_STMTS, and insert their
++   lhs_info to ISOMER_LHS.  */
++
++static void
++update_isomer_lhs (vec<group_info> *group, unsigned group_num,
++		   unsigned iteration, isomer_stmt_lhs &isomer_lhs,
++		   vec<gimple *> &tmp_stmts, int &profit,
++		   vec<unsigned> &merged_groups)
++{
++  group_info elmt = NULL;
++  /* Do not insert temp array if isomorphic stmts from grouped load have
++     only casting operations.  Once isomorphic calculation has 3 oprands,
++     such as plus operation, this group can be regarded as cut point.  */
++  bool operated = (gimple_num_ops (tmp_stmts[0]) == 3);
++  /* Do not insert temp arrays if search of iosomophic stmts reaches
++     MEM stmts.  */
++  bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL;
++  bool merge = false;
++  for (unsigned i = 0; i < group->length (); i++)
++    {
++      elmt = (*group)[i];
++      elmt->stmt = has_vdef ? NULL : tmp_stmts[i];
++      elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated);
++      elmt->uncertain = false;
++      elmt->done = has_vdef;
++      tree lhs = gimple_assign_lhs (tmp_stmts[i]);
++      if (isomer_lhs.find (lhs) != isomer_lhs.end ())
++	{
++	  merge = true;
++	  continue;
++	}
++      isomer_lhs[lhs] = std::make_pair (group_num, iteration);
++    }
++  if (merge)
++    {
++      merged_groups.safe_push (group_num);
++      profit = 0;
++      return;
++    }
++  enum vect_cost_for_stmt kind = scalar_stmt;
++  int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++  profit = (tmp_stmts.length () - 1) * scalar_cost;
++}
++
++/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num
++   and iteration are same, GROUP is isomorphic.  */
++
++static unsigned
++check_isomorphic_rhs (vec<group_info> *group, vec<gimple *> &tmp_stmts,
++		      isomer_stmt_lhs &isomer_lhs)
++{
++  group_info elmt = NULL;
++  gimple *stmt = NULL;
++  unsigned j = 0;
++  unsigned group_num = -1u;
++  unsigned iteration = -1u;
++  tree rhs1 = NULL;
++  tree rhs2 = NULL;
++  unsigned status = UNINITIALIZED;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      rhs1 = gimple_assign_lhs (elmt->stmt);
++      stmt = tmp_stmts[j];
++      rhs2 = (rhs1 == gimple_assign_rhs1 (stmt))
++	     ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
++      isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2);
++      if (iter != isomer_lhs.end ())
++	{
++	  if (group_num == -1u)
++	    {
++	      group_num = iter->second.first;
++	      iteration = iter->second.second;
++	      status |= ISOMORPHIC;
++	      continue;
++	    }
++	  if (iter->second.first == group_num
++	      && iter->second.second == iteration)
++	    {
++	      status |= ISOMORPHIC;
++	      continue;
++	    }
++	  return HETEROGENEOUS;
++	}
++      else
++	status |= UNCERTAIN;
++    }
++  return status;
++}
++
++/* Update group_info for uncertain groups.  */
++
++static void
++update_uncertain_stmts (vec<group_info> *group, unsigned group_num,
++			 unsigned iteration, vec<gimple *> &tmp_stmts)
++{
++  unsigned j = 0;
++  group_info elmt = NULL;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      elmt->uncertain = true;
++      elmt->done = false;
++    }
++}
++
++/* Push stmts in TMP_STMTS into HETERO_LHS.  */
++
++static void
++set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs,
++	    vec<gimple *> &tmp_stmts)
++{
++  group_info elmt = NULL;
++  unsigned i = 0;
++  for (i = 0; i < group->length (); i++)
++    {
++      elmt = (*group)[i];
++      elmt->uncertain = false;
++      elmt->done = true;
++    }
++  gimple *stmt = NULL;
++  FOR_EACH_VEC_ELT (tmp_stmts, i, stmt)
++    if (stmt != NULL)
++      hetero_lhs.safe_push (gimple_assign_lhs (stmt));
++}
++
++/* Given an uncertain group, TMP_STMTS are use_stmts of stmts in GROUP.
++   Rhs1 is the lhs of stmt in GROUP, rhs2 is the other rhs of USE_STMT.
++
++   Try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num
++   and iteration, this uncertain group is isomorphic.
++
++   If no rhs matched, this GROUP remains uncertain and update group_info.
++
++   Otherwise, this GROUP is heterogeneous and return true to end analysis
++   for this group.  */
++
++static bool
++check_uncertain (vec<group_info> *group, unsigned group_num,
++		 unsigned iteration, int &profit,
++		 vec<gimple *> &tmp_stmts, isomer_stmt_lhs &isomer_lhs,
++		 vec<tree> &hetero_lhs, vec<unsigned> &merged_groups)
++{
++  unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs);
++  bool done = false;
++  switch (status)
++    {
++      case UNCERTAIN:
++	update_uncertain_stmts (group, group_num, iteration, tmp_stmts);
++	break;
++      case ISOMORPHIC:
++	update_isomer_lhs (group, group_num, iteration, isomer_lhs,
++			   tmp_stmts, profit, merged_groups);
++	break;
++      default:
++	set_hetero (group, hetero_lhs, tmp_stmts);
++	done = true;
++    }
++  return done;
++}
++
++/* Return false if analysis of this group is not finished, e.g., isomorphic or
++   uncertain.  Calculate the profit if vectorized.  */
++
++static bool
++check_group (vec<group_info> *group, unsigned group_num, unsigned iteration,
++	     int &profit, vec<unsigned> &merged_groups,
++	     isomer_stmt_lhs &isomer_lhs, vec<tree> &hetero_lhs)
++{
++  unsigned j = 0;
++  group_info elmt = NULL;
++  gimple *first = NULL;
++  unsigned res = 0;
++  /* Record single use stmts in TMP_STMTS and decide whether replace stmts in
++     ginfo in succeeding processes.  */
++  auto_vec<gimple *, 12> tmp_stmts;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      if (merged_groups.contains (group_num))
++	return true;
++      res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs);
++    }
++
++  /* Update each group member according to RES.  */
++  switch (res)
++    {
++      case ISOMORPHIC:
++	update_isomer_lhs (group, group_num, iteration, isomer_lhs,
++			   tmp_stmts, profit, merged_groups);
++	return false;
++      case UNCERTAIN:
++	return check_uncertain (group, group_num, iteration, profit,
++				tmp_stmts, isomer_lhs, hetero_lhs,
++				merged_groups);
++      default:
++	set_hetero (group, hetero_lhs, tmp_stmts);
++	return true;
++    }
++}
++
++/* Return true if all analysises are done except uncertain groups.  */
++
++static bool
++end_of_search (vec<vec<group_info> *> &circular_queue,
++	       vec<unsigned> &merged_groups)
++{
++  unsigned i = 0;
++  vec<group_info> *group = NULL;
++  group_info elmt = NULL;
++  FOR_EACH_VEC_ELT (circular_queue, i, group)
++    {
++      if (merged_groups.contains (i))
++	continue;
++      elmt = (*group)[0];
++      /* If there is any isomorphic use_stmts, continue analysis of isomorphic
++	 use_stmts.  */
++      if (!elmt->done && !elmt->uncertain)
++	return false;
++    }
++  return true;
++}
++
++/* Push valid stmts to STMTS as cutpoints.  */
++
++static bool
++check_any_cutpoints (vec<vec<group_info> *> &circular_queue,
++		     vec<gimple *> *&stmts, vec<unsigned> &merged_groups)
++{
++  unsigned front = 0;
++  vec<group_info> *group = NULL;
++  group_info elmt = NULL;
++  unsigned max = circular_queue.length () * circular_queue[0]->length ();
++  vec_alloc (stmts, max);
++  while (front < circular_queue.length ())
++    {
++      unsigned i = 0;
++      if (merged_groups.contains (front))
++	{
++	  front++;
++	  continue;
++	}
++      group = circular_queue[front++];
++      FOR_EACH_VEC_ELT (*group, i, elmt)
++	if (elmt->stmt != NULL && elmt->done && elmt->cut_point)
++	  stmts->safe_push (elmt->stmt);
++    }
++  return stmts->length () != 0;
++}
++
++/* Grouped loads are isomorphic.  Make pair for group number and iteration,
++   map load stmt to this pair.  We set iteration 0 here.  */
++
++static void
++init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs)
++{
++  vec<group_info> *group = NULL;
++  group_info elmt = NULL;
++  unsigned i = 0;
++  FOR_EACH_VEC_ELT (groups, i, group)
++    {
++      unsigned j = 0;
++      FOR_EACH_VEC_ELT (*group, j, elmt)
++	isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0);
++    }
++}
++
++/* It's not a strict analysis of load/store profit.  Assume scalar and vector
++   load/store are of the same cost.  The result PROFIT equals profit form
++   vectorizing of scalar loads/stores minus cost of a vectorized load/store.  */
++
++static int
++load_store_profit (unsigned scalar_mem_ops, unsigned vf, unsigned new_mem_ops)
++{
++  int profit = 0;
++  enum vect_cost_for_stmt kind = scalar_load;
++  int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++  profit += (scalar_mem_ops - (scalar_mem_ops / vf)) * scalar_cost;
++  profit -= new_mem_ops / vf * scalar_cost;
++  kind = scalar_store;
++  scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++  profit -= new_mem_ops / vf * scalar_cost;
++  return profit;
++}
++
++/* Breadth first search the graph consisting of define-use chain starting from
++   the circular queue initialized by function BUILD_QUEUE.  Find single use of
++   each stmt in group and check if they are isomorphic.  Isomorphic is defined
++   as same rhs type, same operator, and isomorphic calculation of each rhs
++   starting from load.  If another rhs is uncertain to be isomorphic, put it
++   at the end of circular queue and re-analyze it during the next iteration.
++   If a group shares the same use_stmt with another group, skip one of them in
++   succeedor prcoesses as merged.  Iterate the circular queue until all
++   remianing groups heterogeneous or reaches MEN stmts.  If all other groups
++   have finishes the analysis, and the remaining groups are uncertain,
++   return false to avoid endless loop.  */
++
++bool
++bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue,
++		       stmts_profit &profit_pair, unsigned vf,
++		       bool &reach_vdef)
++{
++  isomer_stmt_lhs isomer_lhs;
++  auto_vec<tree> hetero_lhs;
++  auto_vec<unsigned> merged_groups;
++  vec<group_info> *group = NULL;
++  /* True if analysis finishes.  */
++  bool done = false;
++  int profit_sum = 0;
++  vec<gimple *> *stmts = NULL;
++  init_isomer_lhs (circular_queue, isomer_lhs);
++  for (unsigned i = 1; !done; ++i)
++    {
++      unsigned front = 0;
++      /* Re-initialize DONE to TRUE while a new iteration begins.  */
++      done = true;
++      while (front < circular_queue.length ())
++	{
++	  int profit = 0;
++	  group = circular_queue[front];
++	  done &= check_group (group, front, i, profit, merged_groups,
++			       isomer_lhs, hetero_lhs);
++	  profit_sum += profit;
++	  if (profit != 0 && (*group)[0]->stmt == NULL)
++	    {
++	      reach_vdef = true;
++	      return false;
++	    }
++	  ++front;
++	}
++      /* Uncertain result, return.  */
++      if (!done && end_of_search (circular_queue, merged_groups))
++	return false;
++    }
++  if (check_any_cutpoints (circular_queue, stmts, merged_groups))
++    {
++      profit_pair.first = stmts;
++      unsigned loads = circular_queue.length () * circular_queue[0]->length ();
++      profit_pair.second = profit_sum + load_store_profit (loads, vf,
++							   stmts->length ());
++      if (profit_pair.second > 0)
++	return true;
++    }
++  return false;
++}
++
++/* Free memory allocated by ginfo.  */
++
++static void
++free_ginfos (vec<vec<group_info> *> &worklists)
++{
++  vec<group_info> *worklist;
++  unsigned i = 0;
++  while (i < worklists.length ())
++    {
++      worklist = worklists[i++];
++      group_info ginfo;
++      unsigned j = 0;
++      FOR_EACH_VEC_ELT (*worklist, j, ginfo)
++	delete ginfo;
++      vec_free (worklist);
++    }
++}
++
++static void
++release_tmp_stmts (vf_stmts_profit_map &candi_stmts)
++{
++  vf_stmts_profit_map::iterator iter;
++  for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
++    iter->second.first->release ();
++}
++
++/* Choose the group of stmt with maximun profit.  */
++
++static bool
++decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts)
++{
++  vf_stmts_profit_map::iterator iter;
++  int profit = 0;
++  int max = 0;
++  vec<gimple *> *tmp = NULL;
++  for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
++    {
++      profit = iter->second.second;
++      if (profit > max)
++	{
++	  tmp = iter->second.first;
++	  max = profit;
++	}
++    }
++  if (max == 0)
++    {
++      release_tmp_stmts (candi_stmts);
++      return false;
++    }
++  unsigned i = 0;
++  gimple *stmt = NULL;
++  FOR_EACH_VEC_ELT (*tmp, i, stmt)
++    stmts.safe_push (stmt);
++  release_tmp_stmts (candi_stmts);
++  return stmts.length () != 0;
++}
++
++/* Find isomorphic stmts from grouped loads with vector factor VF.
++
++   Given source code as follows and ignore casting.
++
++   a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16);
++   a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16);
++   a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16);
++   a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16);
++
++   We get grouped loads in VINFO as
++
++   GROUP_1		GROUP_2
++   _1 = *a		_11 = *b
++   _2 = *(a + 1)	_12 = *(b + 1)
++   _3 = *(a + 2)	_13 = *(b + 2)
++   _4 = *(a + 3)	_14 = *(b + 3)
++   _5 = *(a + 4)	_15 = *(b + 4)
++   _6 = *(a + 5)	_16 = *(b + 5)
++   _7 = *(a + 6)	_17 = *(b + 6)
++   _8 = *(a + 7)	_18 = *(b + 7)
++
++   First we try VF = 8, we get two worklists
++
++   WORKLIST_1		WORKLIST_2
++   _1 = *a		_11 = *b
++   _2 = *(a + 1)	_12 = *(b + 1)
++   _3 = *(a + 2)	_13 = *(b + 2)
++   _4 = *(a + 3)	_14 = *(b + 3)
++   _5 = *(a + 4)	_15 = *(b + 4)
++   _6 = *(a + 5)	_16 = *(b + 5)
++   _7 = *(a + 6)	_17 = *(b + 6)
++   _8 = *(a + 7)	_18 = *(b + 7)
++
++   We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic,
++   so we try VF = VF / 2.
++
++   GROUP_1		GROUP_2
++   _1 = *a		_5 = *(a + 4)
++   _2 = *(a + 1)	_6 = *(a + 5)
++   _3 = *(a + 2)	_7 = *(a + 6)
++   _4 = *(a + 3)	_8 = *(a + 7)
++
++   GROUP_3		GROUP_4
++   _11 = *b		_15 = *(b + 4)
++   _12 = *(b + 1)	_16 = *(b + 5)
++   _13 = *(b + 2)	_17 = *(b + 6)
++   _14 = *(b + 3)	_18 = *(b + 7)
++
++   We first analyze group_1, and find all operations are isomorphic, then
++   replace stmts in group_1 with their use_stmts.  Group_2 as well.
++
++   GROUP_1		GROUP_2
++   _111 = _1 + _11	_115 = _5 - _15
++   _112 = _2 + _12	_116 = _6 - _16
++   _113 = _3 + _13	_117 = _7 - _17
++   _114 = _4 + _14	_118 = _8 - _18
++
++   When analyzing group_3 and group_4, we find their use_stmts are the same
++   as group_1 and group_2.  So group_3 is regarded as being merged to group_1
++   and group_4 being merged to group_2.  In future procedures, we will skip
++   group_3 and group_4.
++
++   We repeat such processing until opreations are not isomorphic or searching
++   reaches MEM stmts.  In our given case, searching end up at a0, a1, a2 and
++   a3.  */
++
++static bool
++find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
++{
++  unsigned vf = get_max_vf (vinfo);
++  if (vf == 0)
++    return false;
++  auto_vec<vec<group_info> *> circular_queue;
++  /* Map of vector factor and corresponding vectorizing profit.  */
++  stmts_profit profit_map;
++  /* Map of cut_points and vector factor.  */
++  vf_stmts_profit_map candi_stmts;
++  bool reach_vdef = false;
++  while (vf > 2)
++    {
++      if (build_queue (vinfo, vf, circular_queue) == 0)
++	return false;
++      if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef))
++	{
++	  if (reach_vdef)
++	    {
++	      release_tmp_stmts (candi_stmts);
++	      free_ginfos (circular_queue);
++	      circular_queue.release ();
++	      return false;
++	    }
++	  vf /= 2;
++	  free_ginfos (circular_queue);
++	  circular_queue.release ();
++	  continue;
++	}
++      candi_stmts[vf] = profit_map;
++      free_ginfos (circular_queue);
++      vf /= 2;
++      circular_queue.release ();
++    }
++  return decide_stmts_by_profit (candi_stmts, stmts);
++}
++
++/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index
++   and all indices are the same.  */
++
++static tree
++find_index (vec<gimple *> seed_stmts)
++{
++  if (seed_stmts.length () == 0)
++    return NULL;
++  bool found_index = false;
++  tree index = NULL;
++  unsigned ui = 0;
++  for (ui = 0; ui < seed_stmts.length (); ui++)
++    {
++      if (!gimple_vdef (seed_stmts[ui]))
++	return NULL;
++      tree lhs = gimple_assign_lhs (seed_stmts[ui]);
++      unsigned num_index = 0;
++      while (TREE_CODE (lhs) == ARRAY_REF)
++	{
++	  if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME)
++	    {
++	      num_index++;
++	      if (num_index > 1)
++		return NULL;
++	      if (index == NULL)
++		{
++		  index = TREE_OPERAND (lhs, 1);
++		  found_index = true;
++		}
++	      else if (index != TREE_OPERAND (lhs, 1))
++		return NULL;
++	    }
++	  lhs = TREE_OPERAND (lhs, 0);
++	}
++      if (!found_index)
++	return NULL;
++    }
++  return index;
++}
++
++/* Check if expression of phi is an increament of a const.  */
++
++static void
++check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc)
++{
++  struct graph_edge *e_phi;
++  for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next)
++    {
++      struct vertex *v_inc = &(rdg->vertices[e_phi->dest]);
++      if (!is_gimple_assign (RDGV_STMT (v_inc))
++	  || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR)
++	continue;
++      tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc));
++      tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc));
++      if (!(integer_onep (rhs1) || integer_onep (rhs2)))
++	continue;
++      struct graph_edge *e_inc;
++      /* find cycle with only two vertices inc and phi: inc <--> phi.  */
++      bool found_cycle = false;
++      for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next)
++	{
++	  if (e_inc->dest == e_phi->src)
++	    {
++	      found_cycle = true;
++	      break;
++	    }
++	}
++      if (!found_cycle)
++	continue;
++      found_inc = true;
++    }
++}
++
++/* Check if phi satisfies form like PHI <0, i>.  */
++
++static inline bool
++iv_check_phi_stmt (gimple *phi_stmt)
++{
++  return gimple_phi_num_args (phi_stmt) == 2
++	 && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0))
++	     || integer_zerop (gimple_phi_arg_def (phi_stmt, 1)));
++}
++
++/* Make sure the iteration varible is a phi.  */
++
++static tree
++get_iv_from_seed (struct graph *flow_only_rdg, vec<gimple *> seed_stmts)
++{
++  tree index = find_index (seed_stmts);
++  if (index == NULL)
++    return NULL;
++  for (int i = 0; i < flow_only_rdg->n_vertices; i++)
++    {
++      struct vertex *v = &(flow_only_rdg->vertices[i]);
++      if (RDGV_STMT (v) != seed_stmts[0])
++	continue;
++      struct graph_edge *e;
++      bool found_phi = false;
++      for (e = v->pred; e; e = e->pred_next)
++	{
++	  struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]);
++	  gimple *phi_stmt = RDGV_STMT (v_phi);
++	  if (gimple_code (phi_stmt) != GIMPLE_PHI
++	      || gimple_phi_result (phi_stmt) != index)
++	    continue;
++	  if (!iv_check_phi_stmt (phi_stmt))
++	    return NULL;
++	  /* find inc expr in succ of phi.  */
++	  bool found_inc = false;
++	  check_phi_inc (v_phi, flow_only_rdg, found_inc);
++	  if (!found_inc)
++	    return NULL;
++	  found_phi = true;
++	  break;
++	}
++      if (!found_phi)
++	return NULL;
++      break;
++    }
++  return index;
++}
++
++/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in
++   FLOW_ONLY_RDG.  */
++
++static bool
++check_no_dependency (struct graph *flow_only_rdg, bitmap root_map)
++{
++  bitmap_iterator bi;
++  unsigned ui;
++  auto_vec<unsigned, 16> visited_nodes;
++  auto_bitmap visited_map;
++  EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi)
++    visited_nodes.safe_push (ui);
++  for (ui = 0; ui < visited_nodes.length (); ui++)
++    {
++      struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]);
++      struct graph_edge *e;
++      for (e = v->succ; e; e = e->succ_next)
++	{
++	  if (bitmap_bit_p (root_map, e->dest))
++	    return false;
++	  if (bitmap_bit_p (visited_map, e->dest))
++	    continue;
++	  visited_nodes.safe_push (e->dest);
++	  bitmap_set_bit (visited_map, e->dest);
++	}
++    }
++  return true;
++}
++
++/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure
++   there is no dependency among those STMT we found.  */
++
++static unsigned
++get_cut_points (struct graph *flow_only_rdg, bitmap cut_points,
++		loop_vec_info vinfo)
++{
++  unsigned n_stmts = 0;
++
++  /* STMTS that may be CUT_POINTS.  */
++  auto_vec<gimple *> stmts;
++  if (!find_isomorphic_stmts (vinfo, stmts))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "No temp array insertion: no isomorphic stmts"
++			    " were found.\n");
++      return 0;
++    }
++
++  for (int i = 0; i < flow_only_rdg->n_vertices; i++)
++    {
++      if (stmts.contains (RDG_STMT (flow_only_rdg, i)))
++	bitmap_set_bit (cut_points, i);
++    }
++  n_stmts = bitmap_count_bits (cut_points);
++
++  bool succ = check_no_dependency (flow_only_rdg, cut_points);
++  if (!succ)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "No temp array inserted: data dependency"
++			    " among isomorphic stmts.\n");
++      return 0;
++    }
++  return n_stmts;
++}
++
++static void
++build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi,
++		  poly_uint64 array_extent, tree iv,
++		  hash_set<tree> *tmp_array_vars, vec<gimple *> *transformed)
++{
++  gimple *stmt = RDGV_STMT (v);
++  tree lhs = gimple_assign_lhs (stmt);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "original stmt:\t");
++      print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
++    }
++  tree var_ssa = duplicate_ssa_name (lhs, stmt);
++  gimple_assign_set_lhs (stmt, var_ssa);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "changed to:\t");
++      print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS);
++    }
++  gimple_set_uid (gsi_stmt (gsi), -1);
++  tree vect_elt_type = TREE_TYPE (lhs);
++  tree array_type = build_array_type_nelts (vect_elt_type, array_extent);
++  tree array = create_tmp_var (array_type);
++  tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
++  tmp_array_vars->add (array_ssa);
++  gimple *store = gimple_build_assign (array_ssa, var_ssa);
++  tree new_vdef = make_ssa_name (gimple_vop (cfun), store);
++  gsi_insert_after (&gsi, store, GSI_NEW_STMT);
++  gimple_set_vdef (store, new_vdef);
++  transformed->safe_push (store);
++  gimple_set_uid (gsi_stmt (gsi), -1);
++  tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
++  tmp_array_vars->add (array_ssa2);
++  gimple *load = gimple_build_assign (lhs, array_ssa2);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "insert stmt:\t");
++      print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS);
++      fprintf (dump_file, " and stmt:\t");
++      print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS);
++    }
++  gimple_set_vuse (load, new_vdef);
++  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
++  gimple_set_uid (gsi_stmt (gsi), -1);
++}
++
++/* Set bitmap PRODUCERS based on vec TRANSFORMED.  */
++
++void
++loop_distribution::build_producers (loop_p loop, bitmap producers,
++				    vec<gimple *> &transformed)
++{
++  auto_vec<gimple *, 10> stmts;
++  stmts_from_loop (loop, &stmts);
++  int i = 0;
++  gimple *stmt = NULL;
++
++  FOR_EACH_VEC_ELT (stmts, i, stmt)
++    gimple_set_uid (stmt, i);
++  i = 0;
++  FOR_EACH_VEC_ELT (transformed, i, stmt)
++    bitmap_set_bit (producers, stmt->uid);
++}
++
++/* Transform stmt
++
++   A = FOO (ARG_1);
++
++   to
++
++   STMT_1: A1 = FOO (ARG_1);
++   STMT_2: X[I] = A1;
++   STMT_3: A = X[I];
++
++   Producer is STMT_2 who defines the temp array and consumer is
++   STMT_3 who uses the temp array.  */
++
++void
++loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg,
++				 tree iv, bitmap cut_points,
++				 hash_set<tree> *tmp_array_vars,
++				 bitmap producers)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "=== do insertion ===\n");
++
++  auto_vec<gimple *> transformed;
++
++  /* Execution times of loop.  */
++  poly_uint64 array_extent
++    = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1;
++
++  basic_block *bbs = get_loop_body_in_custom_order (loop, this,
++						    bb_top_order_cmp_r);
++
++  for (int i = 0; i < int (loop->num_nodes); i++)
++    {
++      basic_block bb = bbs[i];
++
++      /* Find all cut points in bb and transform them.  */
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  unsigned j = gimple_uid (gsi_stmt (gsi));
++	  if (bitmap_bit_p (cut_points, j))
++	    {
++	      struct vertex *v = &(flow_only_rdg->vertices[j]);
++	      build_temp_array (v, gsi, array_extent, iv, tmp_array_vars,
++				&transformed);
++	    }
++	}
++    }
++  build_producers (loop, producers, transformed);
++  update_ssa (TODO_update_ssa);
++  free (bbs);
++}
++
++/* After temp array insertion, given stmts
++   STMT_1: M = FOO (ARG_1);
++   STMT_2: X[I] = M;
++   STMT_3: A = X[I];
++   STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next.
++   Replace M with A, and remove STMT_2 and STMT_3.  */
++
++static void
++reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition,
++		     gimple_stmt_iterator &gsi, int j)
++{
++  struct vertex *v = &(flow_only_rdg->vertices[j]);
++  gimple *stmt = RDGV_STMT (v);
++  gimple *prev = stmt->prev;
++  gimple *next = stmt->next;
++  tree n_lhs = gimple_assign_lhs (next);
++  gimple_assign_set_lhs (prev, n_lhs);
++  unlink_stmt_vdef (stmt);
++  if (partition)
++    bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
++  gsi_remove (&gsi, true);
++  release_defs (stmt);
++  if (partition)
++    bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
++  gsi_remove (&gsi, true);
++}
++
++void
++loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg,
++		  bitmap producers, struct partition *partition)
++{
++  basic_block *bbs = get_loop_body_in_custom_order (loop, this,
++						    bb_top_order_cmp_r);
++  for (int i = 0; i < int (loop->num_nodes); i++)
++    {
++      basic_block bb = bbs[i];
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  unsigned j = gimple_uid (gsi_stmt (gsi));
++	  if (bitmap_bit_p (producers, j))
++	    reset_gimple_assign (flow_only_rdg, partition, gsi, j);
++	}
++    }
++  update_ssa (TODO_update_ssa);
++  free (bbs);
++}
++
++/* Insert temp arrays if isomorphic computation exists.  Temp arrays will be
++   regarded as SEED_STMTS for building partitions in succeeding processes.  */
++
++bool
++loop_distribution::insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
++			hash_set<tree> *tmp_array_vars, bitmap producers)
++{
++  struct graph *flow_only_rdg = build_rdg (loop, NULL);
++  gcc_checking_assert (flow_only_rdg != NULL);
++  tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts);
++  if (iv == NULL)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Loop %d no temp array insertion: failed to get"
++			    " iteration variable.\n", loop->num);
++      free_rdg (flow_only_rdg);
++      return false;
++  }
++  auto_bitmap cut_points;
++  loop_vec_info vinfo = loop_vec_info_for_loop (loop);
++  unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo);
++  delete vinfo;
++  loop->aux = NULL;
++  if (n_cut_points == 0)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "Loop %d no temp array insertion: no cut points"
++			    " found.\n", loop->num);
++      free_rdg (flow_only_rdg);
++      return false;
++    }
++  do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers);
++  if (dump_enabled_p ())
++    {
++      dump_user_location_t loc = find_loop_location (loop);
++      dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:"
++		       " %d temp arrays inserted in Loop %d.\n",
++		       n_cut_points, loop->num);
++    }
++  free_rdg (flow_only_rdg);
++  return true;
++}
++
++static bool find_seed_stmts_for_distribution (class loop *, vec<gimple *> *);
++
+ /* Distributes the code from LOOP in such a way that producer statements
+    are placed before consumer statements.  Tries to separate only the
+    statements from STMTS into separate loops.  Returns the number of
+@@ -2972,7 +4342,7 @@ loop_distribution::finalize_partitions (class loop *loop,
+ 
+ int
+ loop_distribution::distribute_loop (class loop *loop,
+-		 const vec<gimple *> &stmts,
++		 vec<gimple *> &stmts,
+ 		 control_dependences *cd, int *nb_calls, bool *destroy_p,
+ 		 bool only_patterns_p)
+ {
+@@ -3021,6 +4391,33 @@ loop_distribution::distribute_loop (class loop *loop,
+       return 0;
+     }
+ 
++  /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize.
++     If LOOP has grouped loads, recursively find isomorphic stmts and insert
++     temp arrays, rebuild RDG and call find_seed_stmts_for_distribution
++     to replace STMTS.  */
++
++  hash_set<tree> tmp_array_vars;
++
++  /* STMTs that define those inserted TMP_ARRAYs.  */
++  auto_bitmap producers;
++
++  /* New SEED_STMTS after insertion.  */
++  auto_vec<gimple *> work_list;
++  bool insert_success = false;
++  if (may_insert_temp_arrays (loop, rdg, cd))
++    {
++      if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers))
++	{
++	  if (find_seed_stmts_for_distribution (loop, &work_list))
++	    {
++	      insert_success = true;
++	    }
++	  else
++	    remove_insertion (loop, rdg, producers, NULL);
++	  rebuild_rdg (loop, rdg, cd);
++	}
++     }
++
+   data_reference_p dref;
+   for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
+     dref->aux = (void *) (uintptr_t) i;
+@@ -3029,7 +4426,10 @@ loop_distribution::distribute_loop (class loop *loop,
+     dump_rdg (dump_file, rdg);
+ 
+   auto_vec<struct partition *, 3> partitions;
+-  rdg_build_partitions (rdg, stmts, &partitions);
++  if (work_list.length() > stmts.length())
++	rdg_build_partitions (rdg, &work_list, &partitions);
++  else
++	rdg_build_partitions (rdg, &stmts, &partitions);
+ 
+   auto_vec<ddr_p> alias_ddrs;
+ 
+@@ -3101,7 +4501,7 @@ loop_distribution::distribute_loop (class loop *loop,
+       for (int j = i + 1;
+ 	   partitions.iterate (j, &partition); ++j)
+ 	{
+-	  if (share_memory_accesses (rdg, into, partition))
++	  if (share_memory_accesses (rdg, into, partition, &tmp_array_vars))
+ 	    {
+ 	      partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
+ 	      partitions.unordered_remove (j);
+@@ -3151,7 +4551,7 @@ loop_distribution::distribute_loop (class loop *loop,
+ 	}
+     }
+ 
+-  finalize_partitions (loop, &partitions, &alias_ddrs);
++  finalize_partitions (loop, &partitions, &alias_ddrs, producers);
+ 
+   /* If there is a reduction in all partitions make sure the last one
+      is not classified for builtin code generation.  */
+@@ -3169,6 +4569,24 @@ loop_distribution::distribute_loop (class loop *loop,
+     }
+ 
+   nbp = partitions.length ();
++
++  /* If we have inserted TMP_ARRAYs but there is only one partition left in
++     the succeeding processes, remove those inserted TMP_ARRAYs back to the
++     original version.  */
++
++  if (nbp == 1 && insert_success)
++    {
++      struct partition *partition = NULL;
++      partitions.iterate (0, &partition);
++      remove_insertion (loop, rdg, producers, partition);
++      if (dump_enabled_p ())
++	{
++	  dump_user_location_t loc = find_loop_location (loop);
++	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:"
++			   " unable to distribute loop %d.\n", loop->num);
++	}
++    }
++
+   if (nbp == 0
+       || (nbp == 1 && !partition_builtin_p (partitions[0]))
+       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
+diff --git a/gcc/tree-vect-data-refs.cc b/gcc/tree-vect-data-refs.cc
+index 04e68f621..aae7f62f3 100644
+--- a/gcc/tree-vect-data-refs.cc
++++ b/gcc/tree-vect-data-refs.cc
+@@ -2791,6 +2791,9 @@ vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
+       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
+ 
+       DR_GROUP_SIZE (stmt_info) = groupsize;
++
++      DR_GROUP_SLP_TRANSPOSE (stmt_info) = false;
++
+       if (dump_enabled_p ())
+ 	{
+ 	  dump_printf_loc (MSG_NOTE, vect_location,
+@@ -2820,6 +2823,20 @@ vect_analyze_group_access_1 (vec_info *vinfo, dr_vec_info *dr_info)
+ 			     DR_GROUP_GAP (stmt_info));
+ 	}
+ 
++      /* SLP: create an SLP data structure for every interleaving group of
++	 loads for further analysis in vect_analyse_slp.  */
++      if (DR_IS_READ (dr) && !slp_impossible)
++	{
++	  if (loop_vinfo)
++	    {
++	      LOOP_VINFO_GROUPED_LOADS (loop_vinfo).safe_push (stmt_info);
++	    }
++	  if (bb_vinfo)
++	    {
++	      BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (stmt_info);
++	    }
++	}
++
+       /* SLP: create an SLP data structure for every interleaving group of
+ 	 stores for further analysis in vect_analyse_slp.  */
+       if (DR_IS_WRITE (dr) && !slp_impossible)
+@@ -5636,6 +5653,226 @@ vect_permute_store_chain (vec_info *vinfo, vec<tree> &dr_chain,
+     }
+ }
+ 
++/* Encoding the PERM_MASK_FIRST.  */
++
++static void
++vect_indices_encoding_first (tree vectype, unsigned int array_num,
++			     tree &perm_mask_high_first,
++			     tree &perm_mask_low_first)
++{
++  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
++  vec_perm_builder sel (nelt, nelt, 1);
++  sel.quick_grow (nelt);
++  unsigned int group_num = nelt / array_num;
++  unsigned int index = 0;
++  unsigned int array = 0;
++  unsigned int group = 0;
++
++  /* The encoding has 1 pattern in the fisrt stage.  */
++  for (array = 0; array < array_num / 2; array++)
++    {
++      for (group = 0; group < group_num * 2; group++)
++	{
++	  sel[index++] = array + array_num * group;
++	}
++    }
++  vec_perm_indices indices (sel, 2, nelt);
++  perm_mask_high_first = vect_gen_perm_mask_checked (vectype, indices);
++
++  index = 0;
++  for (array = array_num / 2; array < array_num; array++)
++    {
++      for (group = 0; group < group_num * 2; group++)
++	{
++	  sel[index++] = array + array_num * group;
++	}
++    }
++  indices.new_vector (sel, 2, nelt);
++  perm_mask_low_first = vect_gen_perm_mask_checked (vectype, indices);
++}
++
++/* Encoding the PERM_MASK.  */
++
++static void
++vect_indices_encoding (tree vectype, unsigned int array_num,
++		       tree &perm_mask_high, tree &perm_mask_low)
++{
++  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
++  vec_perm_builder sel (nelt, nelt, 1);
++  sel.quick_grow (nelt);
++  unsigned int group_num = nelt / array_num;
++  unsigned int index = 0;
++  unsigned int array = 0;
++  unsigned int group = 0;
++
++  /* The encoding has 2 patterns in the folllowing stages.  */
++  for (array = 0; array < array_num / 2; array++)
++    {
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = group + group_num * array;
++	}
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = nelt + group + group_num * array;
++	}
++    }
++  vec_perm_indices indices (sel, 2, nelt);
++  perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
++
++  index = 0;
++  for (array = array_num / 2; array < array_num; array++)
++    {
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = group + group_num * array;
++	}
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = nelt + group + group_num * array;
++	}
++    }
++  indices.new_vector (sel, 2, nelt);
++  perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
++}
++
++/* Function vect_transpose_store_chain.
++
++   Given a chain of interleaved stores in DR_CHAIN of LENGTH and ARRAY_NUM that
++   must be a power of 2.  Generate interleave_high/low stmts to reorder
++   the data correctly for the stores.  Return the final references for stores
++   in RESULT_CHAIN.  This function is similar to vect_permute_store_chain (),
++   we interleave the contents of the vectors in their order.
++
++   E.g., LENGTH is 4, the scalar type is short (i.e., VF is 8) and ARRAY_NUM
++   is 4.  That is, the input is 4 vectors each containing 8 elements.
++   And 2 (VF / ARRAY_NUM) of 8 elements come from the same array.  we interleave
++   the contents of the four vectors in their order.  We assign a number to each
++   element, the input sequence is:
++
++   1st vec:   0  1  2  3  4  5  6  7
++   2nd vec:   8  9 10 11 12 13 14 15
++   3rd vec:  16 17 18 19 20 21 22 23
++   4th vec:  24 25 26 27 28 29 30 31
++
++   The output sequence should be:
++
++   1st vec:   0  4  8 12 16 20 24 28
++   2nd vec:   1  5  9 13 17 21 25 29
++   3rd vec:   2  6 10 14 18 22 26 30
++   4th vec:   3  7 11 15 19 23 27 31
++
++   In our example,
++   We get 2 (VF / ARRAY_NUM) elements together in every vector.
++
++   I1:   0  4  1  5  2  6  3  7
++   I2:   8 12  9 13 10 14 11 15
++   I3:  16 20 17 21 18 22 19 23
++   I4:  24 28 25 29 26 30 27 31
++
++   Then, we use interleave_high/low instructions to create such output.
++   Every 2 (VF / ARRAY_NUM) elements are regarded as a whole.  The permutation
++   is done in log LENGTH stages.
++
++   I1: interleave_high (1st vec, 3rd vec)
++   I2: interleave_low (1st vec, 3rd vec)
++   I3: interleave_high (2nd vec, 4th vec)
++   I4: interleave_low (2nd vec, 4th vec)
++
++   The first stage of the sequence should be:
++
++   I1:   0  4 16 20  1  5 17 21
++   I2:   2  6 18 22  3  7 19 23
++   I3:   8 12 24 28  9 13 25 29
++   I4:  10 14 26 30 11 15 27 31
++
++   The following stage sequence should be, i.e. the final result is:
++
++   I1:   0  4  8 12 16 20 24 28
++   I2:   1  5  9 13 17 21 25 29
++   I3:   2  6 10 14 18 22 26 30
++   I4:   3  7 11 15 19 23 27 31.  */
++
++void
++vect_transpose_store_chain (vec_info *vinfo, vec<tree> dr_chain,
++			    unsigned int length, unsigned int array_num,
++			    stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
++			    vec<tree> *result_chain)
++{
++  gimple *perm_stmt = NULL;
++  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
++  tree perm_mask_low_first = NULL;
++  tree perm_mask_high_first = NULL;
++  tree perm_mask_low = NULL;
++  tree perm_mask_high = NULL;
++  unsigned int log_length = exact_log2 (length);
++
++  /* Only power of 2 is supported.  */
++  gcc_assert (pow2p_hwi (length));
++
++  /* The encoding has 2 types, one for the grouped pattern in the fisrt stage,
++     another for the interleaved patterns in the following stages.  */
++  gcc_assert (array_num != 0);
++
++  /* Create grouped stmt (in the first stage):
++	group = nelt / array_num;
++	high_first = VEC_PERM_EXPR <vect1, vect2,
++		{0, array_num, 2*array_num, ..., (2*group-1)*array_num,
++		1, 1+array_num, 1+2*array_num, ..., 1+(2*group-1)*array_num,
++		...,
++		array_num/2-1, (array_num/2-1)+array_num, ...,
++		(array_num/2-1)+(2*group-1)*array_num}>
++	low_first = VEC_PERM_EXPR <vect1, vect2,
++		{array_num/2, array_num/2+array_num, array_num/2+2*array_num,
++		..., array_num/2+(2*group-1)*array_num,
++		array_num/2+1, array_num/2+1+array_num,
++		..., array_num/2+1+(2*group-1)*array_num,
++		...,
++		array_num-1, array_num-1+array_num,
++		..., array_num-1+(2*group-1)*array_num}>  */
++  vect_indices_encoding_first (vectype, array_num, perm_mask_high_first,
++			       perm_mask_low_first);
++
++  /* Create interleaving stmt (in the following stages):
++	high = VEC_PERM_EXPR <vect1, vect2, {0, 1, ..., group-1,
++		nelt, nelt+1, ..., nelt+group-1,
++		group, group+1, ..., 2*group-1,
++		nelt+group, nelt+group+1, ..., nelt+2*group-1,
++		...}>
++	low = VEC_PERM_EXPR <vect1, vect2,
++		{nelt/2, nelt/2+1, ..., nelt/2+group-1,
++		nelt*3/2, nelt*3/2+1, ..., nelt*3/2+group-1,
++		nelt/2+group, nelt/2+group+1, ..., nelt/2+2*group-1,
++		nelt*3/2+group, nelt*3/2+group+1, ..., nelt*3/2+2*group-1,
++		...}>  */
++  vect_indices_encoding (vectype, array_num, perm_mask_high, perm_mask_low);
++
++  for (unsigned int perm_time = 0; perm_time < log_length; perm_time++)
++    {
++      for (unsigned int index = 0; index < length / 2; index++)
++	{
++	  tree vect1 = dr_chain[index];
++	  tree vect2 = dr_chain[index + length / 2];
++
++	  tree high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
++	  perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1, vect2,
++					   perm_time == 0 ? perm_mask_high_first
++							  : perm_mask_high);
++	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
++	  (*result_chain)[2 * index] = high;
++
++	  tree low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
++	  perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1, vect2,
++					   perm_time == 0 ? perm_mask_low_first
++							  : perm_mask_low);
++	  vect_finish_stmt_generation (vinfo, stmt_info, perm_stmt, gsi);
++	  (*result_chain)[2 * index+1] = low;
++	}
++      memcpy (dr_chain.address (), result_chain->address (),
++	      length * sizeof (tree));
++    }
++}
++
+ /* Function vect_setup_realignment
+ 
+    This function is called when vectorizing an unaligned load using
+diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc
+index 3435f9378..f296e9415 100644
+--- a/gcc/tree-vect-loop.cc
++++ b/gcc/tree-vect-loop.cc
+@@ -2856,7 +2856,7 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
+ 		     loop_vec_info main_loop_vinfo,
+ 		     const vector_modes &vector_modes, unsigned &mode_i,
+ 		     machine_mode &autodetected_vector_mode,
+-		     bool &fatal)
++		     bool &fatal, bool result_only_p)
+ {
+   loop_vec_info loop_vinfo
+     = vect_create_loop_vinfo (loop, shared, loop_form_info, main_loop_vinfo);
+@@ -2865,6 +2865,8 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
+   loop_vinfo->vector_mode = vector_mode;
+   unsigned int suggested_unroll_factor = 1;
+ 
++  /* Loop_vinfo for loop-distribution pass.  */
++  opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL);
+   /* Run the main analysis.  */
+   opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal,
+ 					&suggested_unroll_factor);
+@@ -2933,7 +2935,21 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
+ 
+   if (!res)
+     {
+-      delete loop_vinfo;
++
++	/* If current analysis shows LOOP is unable to vectorize, loop_vinfo
++	will be deleted.  If LOOP is under ldist analysis, backup it before
++	it is deleted and return it if all modes are analyzed and still
++	fail to vectorize.  */
++      if (result_only_p && (mode_i == vector_modes.length ()
++	    || autodetected_vector_mode == VOIDmode))
++	{
++	    fail_loop_vinfo = opt_loop_vec_info::success (loop_vinfo);
++	    loop->aux = (loop_vec_info) fail_loop_vinfo;
++	}
++      else
++	{
++	    delete loop_vinfo;
++	}
+       if (fatal)
+ 	gcc_checking_assert (main_loop_vinfo == NULL);
+       return opt_loop_vec_info::propagate_failure (res);
+@@ -2946,9 +2962,11 @@ vect_analyze_loop_1 (class loop *loop, vec_info_shared *shared,
+ 
+    Apply a set of analyses on LOOP, and create a loop_vec_info struct
+    for it.  The different analyses will record information in the
+-   loop_vec_info struct.  */
++   loop_vec_info struct.  When RESULT_ONLY_P is true, quit analysis
++   if loop is vectorizable, otherwise, do not delete vinfo. */
+ opt_loop_vec_info
+-vect_analyze_loop (class loop *loop, vec_info_shared *shared)
++vect_analyze_loop (class loop *loop, vec_info_shared *shared,
++		   bool result_only_p)
+ {
+   DUMP_VECT_SCOPE ("analyze_loop_nest");
+ 
+@@ -2996,6 +3014,12 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ 			     && !unlimited_cost_model (loop));
+   machine_mode autodetected_vector_mode = VOIDmode;
+   opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
++  /* Loop_vinfo for loop-distribution pass.  */
++  opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL);
++  if (result_only_p)
++  {
++     vect_slp_init ();
++  }
+   unsigned int mode_i = 0;
+   unsigned HOST_WIDE_INT simdlen = loop->simdlen;
+ 
+@@ -3019,10 +3043,16 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+       opt_loop_vec_info loop_vinfo
+ 	= vect_analyze_loop_1 (loop, shared, &loop_form_info,
+ 			       NULL, vector_modes, mode_i,
+-			       autodetected_vector_mode, fatal);
++			       autodetected_vector_mode, fatal, result_only_p);
+       if (fatal)
+ 	break;
+ 
++      if (result_only_p && (mode_i == vector_modes.length ()
++	  || autodetected_vector_mode == VOIDmode))
++	{
++		return loop_vinfo;
++	}
++
+       if (loop_vinfo)
+ 	{
+ 	  /*  Analyzis has been successful so update the VF value.  The
+@@ -3132,7 +3162,7 @@ vect_analyze_loop (class loop *loop, vec_info_shared *shared)
+ 	= vect_analyze_loop_1 (loop, shared, &loop_form_info,
+ 			       first_loop_vinfo,
+ 			       vector_modes, mode_i,
+-			       autodetected_vector_mode, fatal);
++			       autodetected_vector_mode, fatal, result_only_p);
+       if (fatal)
+ 	break;
+ 
+diff --git a/gcc/tree-vect-patterns.cc b/gcc/tree-vect-patterns.cc
+index e1bcab0f7..c0c15773d 100644
+--- a/gcc/tree-vect-patterns.cc
++++ b/gcc/tree-vect-patterns.cc
+@@ -5632,8 +5632,8 @@ static vect_recog_func vect_vect_recog_func_ptrs[] = {
+      internal functions.  */
+   { vect_recog_gather_scatter_pattern, "gather_scatter" },
+   { vect_recog_mask_conversion_pattern, "mask_conversion" },
+-  { vect_recog_widen_plus_pattern, "widen_plus" },
+-  { vect_recog_widen_minus_pattern, "widen_minus" },
++  // { vect_recog_widen_plus_pattern, "widen_plus" },
++  // { vect_recog_widen_minus_pattern, "widen_minus" },
+ };
+ 
+ const unsigned int NUM_PATTERNS = ARRAY_SIZE (vect_vect_recog_func_ptrs);
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index af477c31a..6cbf8085f 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -49,6 +49,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-eh.h"
+ #include "tree-cfg.h"
+ #include "alloc-pool.h"
++#include "print-tree.h"
++#include "gimple-pretty-print.h"
+ 
+ static bool vectorizable_slp_permutation (vec_info *, gimple_stmt_iterator *,
+ 					  slp_tree, stmt_vector_for_cost *);
+@@ -994,6 +996,21 @@ vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap,
+ 	}
+ 
+       gcc_assert (vectype);
++      if (!STMT_VINFO_VECTYPE (stmt_info))
++	STMT_VINFO_VECTYPE (stmt_info) = vectype;
++      if (dump_file)
++	{
++	  fprintf (dump_file, "vect_build_slp_tree_1: %p\n", stmt_info);
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "vect_build_slp_tree_1: vectype=");
++	  if (vectype)
++	    print_generic_expr (dump_file, vectype);
++	  fprintf (dump_file, "\n");
++	  fprintf (dump_file, "internal vectype=");
++	  if (STMT_VINFO_VECTYPE (stmt_info))
++	    print_generic_expr (dump_file, STMT_VINFO_VECTYPE (stmt_info));
++	  fprintf (dump_file, "\n");
++	}
+ 
+       gcall *call_stmt = dyn_cast <gcall *> (stmt);
+       if (call_stmt)
+@@ -1575,10 +1592,10 @@ vect_build_slp_tree (vec_info *vinfo,
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+ 			 "SLP discovery for node %p succeeded\n", res);
+       gcc_assert (res_ == res);
+-      res->max_nunits = this_max_nunits;
++      res_->max_nunits = this_max_nunits;
+       vect_update_max_nunits (max_nunits, this_max_nunits);
+       /* Keep a reference for the bst_map use.  */
+-      SLP_TREE_REF_COUNT (res)++;
++      SLP_TREE_REF_COUNT (res_)++;
+     }
+   return res_;
+ }
+@@ -3190,8 +3207,10 @@ vect_build_slp_instance (vec_info *vinfo,
+ 
+       /* For basic block SLP, try to break the group up into multiples of
+ 	 a vector size.  */
++      bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+       if (is_a <bb_vec_info> (vinfo)
+-	  && (i > 1 && i < group_size))
++	  && (i > 1 && i < group_size)
++	  && !bb_vinfo->transposed)
+ 	{
+ 	  tree scalar_type
+ 	    = TREE_TYPE (DR_REF (STMT_VINFO_DATA_REF (stmt_info)));
+@@ -3301,84 +3320,1034 @@ vect_analyze_slp_instance (vec_info *vinfo,
+       scalar_stmts.create (DR_GROUP_SIZE (stmt_info));
+       while (next_info)
+ 	{
+-	  scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
+-	  next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++	  scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
++	  next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++	}
++    }
++  else if (kind == slp_inst_kind_reduc_chain)
++    {
++      /* Collect the reduction stmts and store them in scalar_stmts.  */
++      scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
++      while (next_info)
++	{
++	  scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
++	  next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
++	}
++      /* Mark the first element of the reduction chain as reduction to properly
++	 transform the node.  In the reduction analysis phase only the last
++	 element of the chain is marked as reduction.  */
++      STMT_VINFO_DEF_TYPE (stmt_info)
++	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
++      STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
++	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
++    }
++  else if (kind == slp_inst_kind_ctor)
++    {
++      tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
++      tree val;
++      scalar_stmts.create (CONSTRUCTOR_NELTS (rhs));
++      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
++	{
++	  stmt_vec_info def_info = vinfo->lookup_def (val);
++	  def_info = vect_stmt_to_vectorize (def_info);
++	  scalar_stmts.quick_push (def_info);
++	}
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "Analyzing vectorizable constructor: %G\n",
++			 stmt_info->stmt);
++    }
++  else if (kind == slp_inst_kind_reduc_group)
++    {
++      /* Collect reduction statements.  */
++      const vec<stmt_vec_info> &reductions
++	= as_a <loop_vec_info> (vinfo)->reductions;
++      scalar_stmts.create (reductions.length ());
++      for (i = 0; reductions.iterate (i, &next_info); i++)
++	if ((STMT_VINFO_RELEVANT_P (next_info)
++	     || STMT_VINFO_LIVE_P (next_info))
++	    /* ???  Make sure we didn't skip a conversion around a reduction
++	       path.  In that case we'd have to reverse engineer that conversion
++	       stmt following the chain using reduc_idx and from the PHI
++	       using reduc_def.  */
++	    && STMT_VINFO_DEF_TYPE (next_info) == vect_reduction_def)
++	  scalar_stmts.quick_push (next_info);
++      /* If less than two were relevant/live there's nothing to SLP.  */
++      if (scalar_stmts.length () < 2)
++	return false;
++    }
++  else
++    gcc_unreachable ();
++
++  vec<stmt_vec_info> roots = vNULL;
++  if (kind == slp_inst_kind_ctor)
++    {
++      roots.create (1);
++      roots.quick_push (stmt_info);
++    }
++  /* Build the tree for the SLP instance.  */
++  bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
++				      roots,
++				      max_tree_size, limit, bst_map,
++				      kind == slp_inst_kind_store
++				      ? stmt_info : NULL);
++  if (!res)
++    roots.release ();
++
++  /* ???  If this is slp_inst_kind_store and the above succeeded here's
++     where we should do store group splitting.  */
++
++  return res;
++}
++
++static inline bool
++is_const_assign (stmt_vec_info store_elem)
++{
++  if (store_elem == NULL)
++    {
++      gcc_unreachable ();
++    }
++  gimple *stmt = store_elem->stmt;
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt);
++  return rhs_class == GIMPLE_SINGLE_RHS
++	 && TREE_CONSTANT (gimple_assign_rhs1 (store_elem->stmt));
++}
++
++/* Push inits to INNERMOST_INITS and check const assign.  */
++
++static bool
++record_innermost (vec<tree> &innermost_inits,
++		  vec<tree> &innermost_offsets,
++		  stmt_vec_info stmt_vinfo)
++{
++  if (!stmt_vinfo)
++    {
++      return false;
++    }
++  stmt_vec_info next_info = stmt_vinfo;
++  while (next_info)
++    {
++      /* No need to vectorize constant assign in a transposed version.  */
++      if (is_const_assign (next_info))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			      "no need to vectorize, store is const assign: %G",
++			      next_info->stmt);
++	    }
++	  return false;
++	}
++      innermost_inits.safe_push (STMT_VINFO_DR_INIT (next_info));
++      innermost_offsets.safe_push (STMT_VINFO_DR_OFFSET (next_info));
++      next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++    }
++  return true;
++}
++
++/* Compare inits to INNERMOST_INITS, return FALSE if inits do not match
++   the first grouped_store.  And check const assign meanwhile.  */
++
++static bool
++compare_innermost (const vec<tree> &innermost_inits,
++		   const vec<tree> &innermost_offsets,
++		   stmt_vec_info stmt_vinfo)
++{
++  if (!stmt_vinfo || innermost_inits.length () != stmt_vinfo->size)
++    {
++      return false;
++    }
++  stmt_vec_info next_info = stmt_vinfo;
++  unsigned int i = 0;
++  while (next_info)
++    {
++      if (is_const_assign (next_info))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "no need to vectorize, store is const "
++			       "assign: %G", next_info->stmt);
++	    }
++	  return false;
++	}
++      if (innermost_inits[i] != STMT_VINFO_DR_INIT (next_info)
++	  || innermost_offsets[i] != STMT_VINFO_DR_OFFSET (next_info))
++	{
++	  return false;
++	}
++      next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++      i++;
++    }
++  return true;
++}
++
++static bool
++check_same_bb (stmt_vec_info grp1, stmt_vec_info grp2)
++{
++  if (grp1->stmt->bb->index == grp2->stmt->bb->index)
++    {
++       return true;
++    }
++  return false;
++}
++
++/* Check if grouped stores are of same type.
++   input: t1/t2 = TREE_TYPE (gimple_assign_lhs (first_element->stmt))
++   output: 0 if same, 1 or -1 else.  */
++
++static int
++tree_type_cmp (const tree t1, const tree t2)
++{
++  gcc_checking_assert (t1 != NULL && t2 != NULL);
++  if (t1 != t2)
++    {
++      if (TREE_CODE (t1) != TREE_CODE (t2))
++	{
++	  return TREE_CODE (t1) > TREE_CODE (t2) ? 1 : -1;
++	}
++      if (TYPE_UNSIGNED (t1) != TYPE_UNSIGNED (t2))
++	{
++	  return TYPE_UNSIGNED (t1) > TYPE_UNSIGNED (t2) ? 1 : -1;
++	}
++      if (TYPE_PRECISION (t1) != TYPE_PRECISION (t2))
++	{
++	  return TYPE_PRECISION (t1) > TYPE_PRECISION (t2) ? 1 : -1;
++	}
++    }
++  return 0;
++}
++
++/* Check it if 2 grouped stores are of same type that
++   we can analyze them in a transpose group.  */
++static int
++check_same_store_type (stmt_vec_info grp1, stmt_vec_info grp2)
++{
++  if (grp1 == grp2)
++    {
++      return 0;
++    }
++  if (grp1->size != grp2->size)
++    {
++      return grp1->size > grp2->size ? 1 : -1;
++    }
++  tree lhs1 = gimple_assign_lhs (grp1->stmt);
++  tree lhs2 = gimple_assign_lhs (grp2->stmt);
++  if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
++    {
++      return TREE_CODE (lhs1) > TREE_CODE (lhs2) ? 1 : -1;
++    }
++  tree grp_type1 = TREE_TYPE (gimple_assign_lhs (grp1->stmt));
++  tree grp_type2 = TREE_TYPE (gimple_assign_lhs (grp2->stmt));
++  int cmp = tree_type_cmp (grp_type1, grp_type2);
++  return cmp;
++}
++
++/* Sort grouped stores according to group_size and store_type.
++   output: 0 if same, 1 if grp1 > grp2, -1 otherwise.  */
++
++static int
++grouped_store_cmp (const void *grp1_, const void *grp2_)
++{
++  stmt_vec_info grp1 = *(stmt_vec_info *)const_cast<void *>(grp1_);
++  stmt_vec_info grp2 = *(stmt_vec_info *)const_cast<void *>(grp2_);
++  return check_same_store_type (grp1, grp2);
++}
++
++/* Transposing is based on permutation in registers.  Permutation requires
++   vector length being power of 2 and satisfying the vector mode.  */
++
++static inline bool
++check_filling_reg (stmt_vec_info current_element)
++{
++  if (current_element->size == 0)
++    {
++      return false;
++    }
++  /* If the gimple STMT was already vectorized in vect pass, it's unable to
++     conduct transpose analysis, skip it.  */
++  bool lhs_vectorized
++	= TREE_CODE (TREE_TYPE (gimple_get_lhs (current_element->stmt)))
++	  == VECTOR_TYPE;
++  bool rhs_vectorized
++	= TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (current_element->stmt)))
++	  == VECTOR_TYPE;
++  if (lhs_vectorized || rhs_vectorized)
++    {
++      return false;
++    }
++  unsigned int store_precision
++    = TYPE_PRECISION (TREE_TYPE (gimple_get_lhs (current_element->stmt)));
++  auto_vector_modes vector_modes;
++  targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
++  unsigned min_mode_size = -1u;
++  for (unsigned i = 0; i < vector_modes.length (); i++)
++    {
++      unsigned mode_bit_size = (GET_MODE_BITSIZE (vector_modes[i])).coeffs[0];
++      min_mode_size = mode_bit_size < min_mode_size
++			? mode_bit_size : min_mode_size;
++    }
++  return store_precision != 0
++	 && pow2p_hwi (current_element->size)
++	 && (current_element->size * store_precision % min_mode_size == 0);
++}
++
++/* Check if previous groups are suitable to transpose, if not, set their
++   group number to -1, reduce grp_num and clear current_groups.
++   Otherwise, just clear current_groups.  */
++
++static void
++check_and_clear_groups (vec<stmt_vec_info> &current_groups,
++			unsigned int &grp_num)
++{
++  stmt_vec_info first_element;
++  if (current_groups.length () == 1
++      || (current_groups.length () != 0
++	  && !pow2p_hwi (current_groups.length ())))
++    {
++      while (current_groups.length () != 0)
++	{
++	  first_element = current_groups.pop ();
++	  first_element->group_number = -1;
++	}
++      grp_num--;
++    }
++  else
++    {
++      while (current_groups.length ())
++	{
++	  current_groups.pop ();
++	}
++    }
++}
++
++
++/* Make sure that transpose slp vectorization is conducted only if grouped
++   stores are one dimension array ref.  */
++
++static bool
++is_store_one_dim_array (gimple *stmt)
++{
++  tree op = gimple_get_lhs (stmt);
++  if (TREE_CODE (op) != ARRAY_REF)
++    return false;
++  return TREE_OPERAND_LENGTH (op) > 0
++	 && TREE_OPERAND_LENGTH (TREE_OPERAND (op, 0)) == 0;
++}
++
++/* Set grouped_stores with similar MEM_REF to the same group and mark their
++   grp_num.  Groups with same grp_num consist the minimum unit to analyze
++   transpose.  Return num of such units.  */
++
++static unsigned
++vect_prepare_transpose (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info current_element = NULL;
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  unsigned int grp_num = 0;
++  /* Use arrays to record MEM_REF data in different GROUPED_STORES.  */
++  auto_vec<tree> innermost_inits;
++  auto_vec<tree> innermost_offsets;
++
++  /* A set of stmt_vec_info with same store type.  Analyze them if their size
++     is suitable to transpose.  */
++  auto_vec<stmt_vec_info> current_groups;
++
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, current_element)
++    {
++      /* Compare current grouped_store to the first one if first_element exists,
++	 push current_element to current_groups if they are similar on innermost
++	 behavior of MEM_REF.  */
++      if (first_element != NULL
++	  && !check_same_store_type (first_element, current_element)
++	  && compare_innermost (innermost_inits, innermost_offsets,
++				current_element)
++	  && check_same_bb (first_element, current_element))
++	{
++	  current_groups.safe_push (current_element);
++	  current_element->group_number = grp_num;
++	  /* If current_element is the last element in grouped_stores, continue
++	     will exit the loop and leave the last group unanalyzed.  */
++	  if (i == bb_vinfo->grouped_stores.length () - 1)
++	    {
++	      check_and_clear_groups (current_groups, grp_num);
++	    }
++	  continue;
++	}
++      check_and_clear_groups (current_groups, grp_num);
++      innermost_inits.release ();
++      innermost_offsets.release ();
++      /* Beginning of a new group to analyze whether they are able to consist
++	 a unit to conduct transpose analysis.  */
++      first_element = NULL;
++      if (is_store_one_dim_array (current_element->stmt)
++	  && check_filling_reg (current_element)
++	  && record_innermost (innermost_inits, innermost_offsets,
++			       current_element))
++	{
++	  first_element = current_element;
++	  current_groups.safe_push (current_element);
++	  current_element->group_number = ++grp_num;
++	  if (i == bb_vinfo->grouped_stores.length () - 1)
++	    {
++	      check_and_clear_groups (current_groups, grp_num);
++	    }
++	  continue;
++	}
++      current_element->group_number = -1;
++    }
++  return grp_num;
++}
++
++/* Return a flag to transpose grouped stores before building slp tree.
++   Add bool may_transpose in class vec_info.  */
++
++static bool
++vect_may_transpose (bb_vec_info bb_vinfo)
++{
++  if (targetm.vectorize.vec_perm_const == NULL)
++    {
++      return false;
++    }
++
++  if (bb_vinfo->grouped_stores.length () < 2)
++    {
++      return false;
++    }
++
++  DUMP_VECT_SCOPE ("analyze if grouped stores may transpose to slp");
++  /* Sort grouped_stores according to size and type for function
++     vect_prepare_transpose ().  */
++  bb_vinfo->grouped_stores.qsort (grouped_store_cmp);
++
++  int groups = vect_prepare_transpose (bb_vinfo);
++  BB_VINFO_TRANS_GROUPS (bb_vinfo) = groups;
++  if (dump_enabled_p ())
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "%d groups to analyze transposed slp.\n", groups);
++  return groups != 0;
++}
++
++/* Get the base address of STMT_INFO.  */
++
++static tree
++get_op_base_address (stmt_vec_info stmt_info)
++{
++  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
++  tree op = DR_BASE_ADDRESS (dr);
++  while (TREE_OPERAND_LENGTH (op) > 0)
++    {
++      op = TREE_OPERAND (op, 0);
++    }
++  return op;
++}
++
++/* Compare the UID of the two stmt_info STMTINFO_A and STMTINFO_B.
++   Sorting them in ascending order.  */
++
++static int
++dr_group_cmp (const void *stmtinfo_a_, const void *stmtinfo_b_)
++{
++  stmt_vec_info stmtinfo_a
++	= *(stmt_vec_info *) const_cast<void *> (stmtinfo_a_);
++  stmt_vec_info stmtinfo_b
++	= *(stmt_vec_info *) const_cast<void *> (stmtinfo_b_);
++
++  /* Stabilize sort.  */
++  if (stmtinfo_a == stmtinfo_b)
++    {
++      return 0;
++    }
++  return gimple_uid (stmtinfo_a->stmt) < gimple_uid (stmtinfo_b->stmt) ? -1 : 1;
++}
++
++/* Find the first elements of the grouped loads which are required to merge.  */
++
++static void
++vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
++			    vec<stmt_vec_info> &res)
++{
++  unsigned int i = 0;
++  stmt_vec_info merge_first_element = NULL;
++  stmt_vec_info first_element = NULL;
++  tree opa = NULL;
++  unsigned int grp_size_a = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, i, first_element)
++    {
++      if (visited[i])
++	{
++	  continue;
++	}
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || !pow2p_hwi (DR_GROUP_SIZE (first_element)))
++	{
++	  /* Non-conforming grouped load should be grouped separately.  */
++	  if (merge_first_element == NULL)
++	    {
++	      visited[i] = true;
++	      res.safe_push (first_element);
++	      return;
++	    }
++	}
++      if (merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	  opa = get_op_base_address (first_element);
++	  grp_size_a = DR_GROUP_SIZE (first_element);
++	  res.safe_push (first_element);
++	  visited[i] = true;
++	  continue;
++	}
++
++      /* If the two first elements are of the same base address and group size,
++	 these two grouped loads need to be merged.  */
++      tree opb = get_op_base_address (first_element);
++      unsigned int grp_size_b = DR_GROUP_SIZE (first_element);
++      if (opa == opb && grp_size_a == grp_size_b)
++	{
++	  res.safe_push (first_element);
++	  visited[i] = true;
++	}
++    }
++}
++
++/* Merge the grouped loads that are found from
++   vect_slp_grouped_load_find ().  */
++
++static stmt_vec_info
++vect_slp_grouped_load_merge (vec<stmt_vec_info> &res)
++{
++  stmt_vec_info stmt_info = res[0];
++  if (res.length () == 1)
++    {
++      return stmt_info;
++    }
++  unsigned int i = 0;
++  unsigned int size = DR_GROUP_SIZE (res[0]);
++  unsigned int new_group_size = size * res.length ();
++  stmt_vec_info first_element = NULL;
++  stmt_vec_info merge_first_element = NULL;
++  stmt_vec_info last_element = NULL;
++  FOR_EACH_VEC_ELT (res, i, first_element)
++    {
++      if (merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	  last_element = merge_first_element;
++	  size = DR_GROUP_SIZE (merge_first_element);
++	}
++
++      if (last_element != first_element
++	  && !DR_GROUP_NEXT_ELEMENT (last_element))
++	{
++	  DR_GROUP_NEXT_ELEMENT (last_element) = first_element;
++	  /* Store the gap from the previous member of the group.  If there is
++	     no gap in the access, DR_GROUP_GAP is always 1.  */
++	  DR_GROUP_GAP_TRANS (first_element) = DR_GROUP_GAP (first_element);
++	  DR_GROUP_GAP (first_element) = 1;
++	}
++      for (stmt_info = first_element; stmt_info;
++	   stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  DR_GROUP_FIRST_ELEMENT (stmt_info) = merge_first_element;
++	  DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info);
++	  DR_GROUP_SIZE (stmt_info) = new_group_size;
++	  last_element = stmt_info;
++	}
++    }
++  DR_GROUP_SIZE (merge_first_element) = new_group_size;
++  DR_GROUP_SLP_TRANSPOSE (merge_first_element) = true;
++  DR_GROUP_NEXT_ELEMENT (last_element) = NULL;
++  return merge_first_element;
++}
++
++/* Merge the grouped loads which have the same base address and group size.
++   For example, for grouped loads (opa_1, opa_2, opb_1, opb_2):
++     opa_1: a0->a1->a2->a3
++     opa_2: a8->a9->a10->a11
++     opb_1: b0->b1
++     opb_2: b16->b17
++   we can probably get two merged grouped loads:
++     opa: a0->a1->a2->a3->a8->a9->a10->a11
++     opb: b0->b1->b16->b17.  */
++
++static bool
++vect_merge_slp_grouped_loads (bb_vec_info bb_vinfo)
++{
++  if (bb_vinfo->grouped_loads.length () <= 0)
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "The number of grouped loads is 0.\n");
++	}
++      return false;
++    }
++  bb_vinfo->grouped_loads.qsort (dr_group_cmp);
++  auto_vec<bool> visited (bb_vinfo->grouped_loads.length ());
++  auto_vec<stmt_vec_info> grouped_loads_merge;
++  for (unsigned int i = 0; i < bb_vinfo->grouped_loads.length (); i++)
++    {
++      visited.safe_push (false);
++    }
++  while (1)
++    {
++      /* Find grouped loads which are required to merge.  */
++      auto_vec<stmt_vec_info> res;
++      vect_slp_grouped_load_find (bb_vinfo, visited, res);
++      if (res.is_empty ())
++	{
++	  break;
++	}
++      /* Merge the required grouped loads into one group.  */
++      grouped_loads_merge.safe_push (vect_slp_grouped_load_merge (res));
++    }
++  if (grouped_loads_merge.length () == bb_vinfo->grouped_loads.length ())
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "No grouped loads need to be merged.\n");
++	}
++      return false;
++    }
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Merging grouped loads successfully.\n");
++    }
++  BB_VINFO_GROUPED_LOADS (bb_vinfo).release ();
++  for (unsigned int i = 0; i < grouped_loads_merge.length (); i++)
++    {
++      BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (grouped_loads_merge[i]);
++    }
++  return true;
++}
++
++/* Find the first elements of the grouped stores
++   which are required to transpose and merge.  */
++
++static void
++vect_slp_grouped_store_find (bb_vec_info bb_vinfo, vec<bool> &visited,
++			     vec<stmt_vec_info> &res)
++{
++  stmt_vec_info first_element = NULL;
++  stmt_vec_info merge_first_element = NULL;
++  unsigned int k = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      if (visited[k])
++	{
++	  continue;
++	}
++      /* Non-conforming grouped store should be grouped separately.  */
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || first_element->group_number == -1)
++	{
++	  if (merge_first_element == NULL)
++	    {
++	      visited[k] = true;
++	      res.safe_push (first_element);
++	      return;
++	    }
++	}
++      if (first_element->group_number != -1
++	  && merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	}
++      if (merge_first_element->group_number == first_element->group_number)
++	{
++	  visited[k] = true;
++	  res.safe_push (first_element);
++	}
++    }
++}
++
++/* Transpose and merge the grouped stores that are found from
++   vect_slp_grouped_store_find ().  */
++
++static stmt_vec_info
++vect_slp_grouped_store_transform (vec<stmt_vec_info> &res)
++{
++  stmt_vec_info stmt_info = res[0];
++  if (res.length () == 1)
++    {
++      return stmt_info;
++    }
++  stmt_vec_info rearrange_first_element = stmt_info;
++  stmt_vec_info last_element = rearrange_first_element;
++
++  unsigned int size = DR_GROUP_SIZE (rearrange_first_element);
++  unsigned int new_group_size = size * res.length ();
++  for (unsigned int i = 1; i < res.length (); i++)
++    {
++      /* Store the gap from the previous member of the group.  If there is no
++	 gap in the access, DR_GROUP_GAP is always 1.  */
++      DR_GROUP_GAP_TRANS (res[i]) = DR_GROUP_GAP (res[i]);
++      DR_GROUP_GAP (res[i]) = 1;
++    }
++  while (!res.is_empty ())
++    {
++      stmt_info = res[0];
++      res.ordered_remove (0);
++      if (DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  res.safe_push (DR_GROUP_NEXT_ELEMENT (stmt_info));
++	}
++      DR_GROUP_FIRST_ELEMENT (stmt_info) = rearrange_first_element;
++      DR_GROUP_NEXT_ELEMENT (last_element) = stmt_info;
++      DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info);
++      DR_GROUP_SIZE (stmt_info) = new_group_size;
++      last_element = stmt_info;
++    }
++
++  DR_GROUP_SIZE (rearrange_first_element) = new_group_size;
++  DR_GROUP_SLP_TRANSPOSE (rearrange_first_element) = true;
++  DR_GROUP_NEXT_ELEMENT (last_element) = NULL;
++  return rearrange_first_element;
++}
++
++/* Save the STMT_INFO in the grouped stores to BB_VINFO_SCALAR_STORES for
++   transposing back grouped stores.  */
++
++static void
++get_scalar_stores (bb_vec_info bb_vinfo)
++{
++  unsigned int k = 0;
++  stmt_vec_info first_element = NULL;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      /* Filter the grouped store which is unnecessary for transposing.  */
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || first_element->group_number == -1)
++	{
++	  continue;
++	}
++      vec<stmt_vec_info> tmp_scalar_store;
++      tmp_scalar_store.create (DR_GROUP_SIZE (first_element));
++      for (stmt_vec_info stmt_info = first_element; stmt_info;
++	   stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  tmp_scalar_store.safe_push (stmt_info);
++	}
++      BB_VINFO_SCALAR_STORES (bb_vinfo).safe_push (tmp_scalar_store);
++    }
++}
++
++/* Transpose and merge the grouped stores which have the same group number.
++   For example, for grouped stores (opa_0, opa_1, opa_2, opa_3):
++     opa_0: a00->a01->a02->a03
++     opa_1: a10->a11->a12->a13
++     opa_2: a20->a21->a22->a23
++     opa_2: a30->a31->a32->a33
++   we can probably get the merged grouped store:
++     opa: a00->a10->a20->a30
++	->a01->a11->a21->a31
++	->a02->a12->a22->a32
++	->a03->a13->a23->a33.  */
++
++static bool
++vect_transform_slp_grouped_stores (bb_vec_info bb_vinfo)
++{
++  if (bb_vinfo->grouped_stores.length () <= 0)
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "The number of grouped stores is 0.\n");
++	}
++      return false;
++    }
++
++  bb_vinfo->grouped_stores.qsort (dr_group_cmp);
++  auto_vec<stmt_vec_info> grouped_stores_merge;
++  auto_vec<bool> visited (bb_vinfo->grouped_stores.length ());
++  unsigned int i = 0;
++  for (i = 0; i < bb_vinfo->grouped_stores.length (); i++)
++    {
++      visited.safe_push (false);
++    }
++
++  /* Get scalar stores for the following transposition recovery.  */
++  get_scalar_stores (bb_vinfo);
++
++  while (1)
++    {
++      /* Find grouped stores which are required to transpose and merge.  */
++      auto_vec<stmt_vec_info> res;
++      vect_slp_grouped_store_find (bb_vinfo, visited, res);
++      if (res.is_empty ())
++	{
++	  break;
++	}
++      /* Transpose and merge the required grouped stores into one group.  */
++      grouped_stores_merge.safe_push (vect_slp_grouped_store_transform (res));
++    }
++
++  BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
++  for (i = 0; i < grouped_stores_merge.length (); i++)
++    {
++      BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_merge[i]);
++    }
++
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Transposing grouped stores successfully.\n");
++    }
++  return true;
++}
++
++/* A helpful function of vect_transform_back_slp_grouped_stores ().  */
++
++static auto_vec<stmt_vec_info>
++vect_transform_back_slp_grouped_store (bb_vec_info bb_vinfo,
++				       stmt_vec_info first_stmt_info)
++{
++  auto_vec<stmt_vec_info> grouped_stores_split;
++  for (unsigned int i = 0; i < bb_vinfo->scalar_stores.length (); i++)
++    {
++      vec<stmt_vec_info> scalar_tmp = bb_vinfo->scalar_stores[i];
++      if (scalar_tmp.length () > 1
++	  && scalar_tmp[0]->group_number != first_stmt_info->group_number)
++	{
++	  continue;
++	}
++      stmt_vec_info cur_stmt_info = NULL;
++      stmt_vec_info cur_first_stmt_info = NULL;
++      stmt_vec_info last_stmt_info = NULL;
++      unsigned int k = 0;
++      FOR_EACH_VEC_ELT (scalar_tmp, k, cur_stmt_info)
++	{
++	  if (k == 0)
++	    {
++	      cur_first_stmt_info = cur_stmt_info;
++	      last_stmt_info = cur_stmt_info;
++	    }
++	  DR_GROUP_FIRST_ELEMENT (cur_stmt_info) = cur_first_stmt_info;
++	  DR_GROUP_NEXT_ELEMENT (last_stmt_info) = cur_stmt_info;
++	  last_stmt_info = cur_stmt_info;
++	}
++      DR_GROUP_SIZE (cur_first_stmt_info) = k;
++      DR_GROUP_NEXT_ELEMENT (last_stmt_info) = NULL;
++      if (first_stmt_info != cur_first_stmt_info)
++	{
++	  DR_GROUP_GAP (cur_first_stmt_info)
++		= DR_GROUP_GAP_TRANS (cur_first_stmt_info);
++	  DR_GROUP_SLP_TRANSPOSE (cur_first_stmt_info) = false;
++	  DR_GROUP_NUMBER (cur_first_stmt_info) = -1;
++	}
++      grouped_stores_split.safe_push (cur_first_stmt_info);
++    }
++  return grouped_stores_split;
++}
++
++/* Transform the grouped store back.  */
++
++void
++vect_transform_back_slp_grouped_stores (bb_vec_info bb_vinfo,
++					stmt_vec_info first_stmt_info)
++{
++  if (first_stmt_info->group_number == -1)
++    {
++      return;
++    }
++  /* Transform back.  */
++  auto_vec<stmt_vec_info> grouped_stores_split
++	= vect_transform_back_slp_grouped_store (bb_vinfo, first_stmt_info);
++
++  /* Add the remaining grouped stores to grouped_stores_split.  */
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element)
++    {
++      if (first_element->group_number != first_stmt_info->group_number)
++	{
++	  grouped_stores_split.safe_push (first_element);
++	}
++    }
++  DR_GROUP_SLP_TRANSPOSE (first_stmt_info) = false;
++  DR_GROUP_NUMBER (first_stmt_info) = -1;
++  BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
++  for (i = 0; i < grouped_stores_split.length (); i++)
++    {
++      BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_split[i]);
++    }
++}
++
++/* Function check_for_slp_vectype
++
++   Restriction for grouped stores by checking their vectype.
++   If the vectype of the grouped store is changed, it need transform back.
++   If all grouped stores need to be transformed back, return FALSE.  */
++
++static bool
++check_for_slp_vectype (bb_vec_info bb_vinfo)
++{
++  if (dump_file)
++    fprintf (dump_file, "check_for_slp_vectype: enter\n");
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  int count = 0;
++  auto_vec<stmt_vec_info> grouped_stores_check;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element)
++    {
++      grouped_stores_check.safe_push (first_element);
++    }
++  FOR_EACH_VEC_ELT (grouped_stores_check, i, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element)
++	  && first_element->group_number != -1)
++	{
++	  unsigned int group_size_b
++			= DR_GROUP_SIZE_TRANS (first_element);
++	  tree vectype = STMT_VINFO_VECTYPE (first_element);
++	  gimple *stmt = STMT_VINFO_STMT (first_element);
++	  tree lhs = gimple_get_lhs (stmt);
++	  tree type = TREE_TYPE (lhs);
++#if 0
++	  if (!vectype && !type)
++	    {
++	      if (dump_file)
++		fprintf (dump_file, "check_for_slp_vectype: no vectype/stmt type\n");
++	      continue;
++	    }
++
++	  if (!vectype)
++	    vectype = type;
++#endif
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "check_for_slp_vectype: %p\n", first_element);
++	      print_gimple_stmt (dump_file, stmt, 0);
++	      fprintf (dump_file, "check_for_slp_vectype: vectype=");
++	      if (vectype)
++		print_generic_expr (dump_file, vectype);
++	      fprintf (dump_file, "\n");
++	    }
++#if 0
++	  if (!vectype || !VECTOR_TYPE_P (vectype))
++	    continue;
++#endif
++	  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
++	  if (nunits.to_constant () > group_size_b)
++	    {
++	      count++;
++	      /* If the vectype is changed, this grouped store need
++		 to be transformed back.  */
++	      vect_transform_back_slp_grouped_stores (bb_vinfo, first_element);
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "No supported: only supported for"
++				   " group_size geq than nunits.\n");
++		}
++	    }
++	}
++    }
++  if (count == BB_VINFO_TRANS_GROUPS (bb_vinfo))
++    {
++      return false;
++    }
++  if (dump_file)
++    fprintf (dump_file, "check_for_slp_vectype: True\n");
++  return true;
++}
++
++/* Function check_for_dr_alignment
++
++   Check the alignment of the slp instance loads.
++   Return FALSE if a load cannot be vectorized.  */
++
++static bool
++check_for_dr_alignment (bb_vec_info bb_vinfo, slp_instance instance)
++{
++  slp_tree node = NULL;
++  unsigned int i = 0;
++  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
++    {
++      stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
++      dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "check_for_dr_alignment: %p\n", first_stmt_info);
++
++	  gimple *stmt = STMT_VINFO_STMT (first_stmt_info);
++	  tree lhs = gimple_get_lhs (stmt);
++	  tree type = TREE_TYPE (lhs);
++	  print_gimple_stmt (dump_file, stmt, 0);
++	}
++
++      tree vectype = STMT_VINFO_VECTYPE (first_stmt_info);
++      int malign = dr_misalignment (first_dr_info, vectype);
++      enum dr_alignment_support supportable_dr_alignment
++	= vect_supportable_dr_alignment (bb_vinfo, first_dr_info,
++					 vectype, malign);
++      if (supportable_dr_alignment == dr_explicit_realign_optimized
++	  || supportable_dr_alignment == dr_explicit_realign)
++	{
++	  return false;
+ 	}
+     }
+-  else if (kind == slp_inst_kind_reduc_chain)
++  return true;
++}
++
++/* Initialize slp_transpose flag before transposing.  */
++
++static void
++init_stmt_info_slp_transpose (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info first_element = NULL;
++  unsigned int k = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
+     {
+-      /* Collect the reduction stmts and store them in scalar_stmts.  */
+-      scalar_stmts.create (REDUC_GROUP_SIZE (stmt_info));
+-      while (next_info)
++      if (STMT_VINFO_GROUPED_ACCESS (first_element))
+ 	{
+-	  scalar_stmts.quick_push (vect_stmt_to_vectorize (next_info));
+-	  next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
++	  DR_GROUP_SLP_TRANSPOSE (first_element) = false;
+ 	}
+-      /* Mark the first element of the reduction chain as reduction to properly
+-	 transform the node.  In the reduction analysis phase only the last
+-	 element of the chain is marked as reduction.  */
+-      STMT_VINFO_DEF_TYPE (stmt_info)
+-	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
+-      STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
+-	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
+     }
+-  else if (kind == slp_inst_kind_ctor)
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, k, first_element)
+     {
+-      tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
+-      tree val;
+-      scalar_stmts.create (CONSTRUCTOR_NELTS (rhs));
+-      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
++      if (STMT_VINFO_GROUPED_ACCESS (first_element))
+ 	{
+-	  stmt_vec_info def_info = vinfo->lookup_def (val);
+-	  def_info = vect_stmt_to_vectorize (def_info);
+-	  scalar_stmts.quick_push (def_info);
++	  DR_GROUP_SLP_TRANSPOSE (first_element) = false;
+ 	}
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_NOTE, vect_location,
+-			 "Analyzing vectorizable constructor: %G\n",
+-			 stmt_info->stmt);
+     }
+-  else if (kind == slp_inst_kind_reduc_group)
++}
++
++/* Analyze and transpose the stmts before building the SLP tree.  */
++
++static bool
++vect_analyze_transpose (bb_vec_info bb_vinfo)
++{
++  DUMP_VECT_SCOPE ("vect_analyze_transpose");
++
++  if (!vect_may_transpose (bb_vinfo))
+     {
+-      /* Collect reduction statements.  */
+-      const vec<stmt_vec_info> &reductions
+-	= as_a <loop_vec_info> (vinfo)->reductions;
+-      scalar_stmts.create (reductions.length ());
+-      for (i = 0; reductions.iterate (i, &next_info); i++)
+-	if ((STMT_VINFO_RELEVANT_P (next_info)
+-	     || STMT_VINFO_LIVE_P (next_info))
+-	    /* ???  Make sure we didn't skip a conversion around a reduction
+-	       path.  In that case we'd have to reverse engineer that conversion
+-	       stmt following the chain using reduc_idx and from the PHI
+-	       using reduc_def.  */
+-	    && STMT_VINFO_DEF_TYPE (next_info) == vect_reduction_def)
+-	  scalar_stmts.quick_push (next_info);
+-      /* If less than two were relevant/live there's nothing to SLP.  */
+-      if (scalar_stmts.length () < 2)
+-	return false;
++      return false;
+     }
+-  else
+-    gcc_unreachable ();
+ 
+-  vec<stmt_vec_info> roots = vNULL;
+-  if (kind == slp_inst_kind_ctor)
++  /* For basic block SLP, try to merge the grouped stores and loads
++     into one group.  */
++  init_stmt_info_slp_transpose (bb_vinfo);
++  if (vect_transform_slp_grouped_stores (bb_vinfo)
++      && vect_merge_slp_grouped_loads (bb_vinfo))
+     {
+-      roots.create (1);
+-      roots.quick_push (stmt_info);
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "Analysis succeeded with SLP transposed.\n");
++	}
++      return true;
+     }
+-  /* Build the tree for the SLP instance.  */
+-  bool res = vect_build_slp_instance (vinfo, kind, scalar_stmts,
+-				      roots,
+-				      max_tree_size, limit, bst_map,
+-				      kind == slp_inst_kind_store
+-				      ? stmt_info : NULL);
+-  if (!res)
+-    roots.release ();
+-
+-  /* ???  If this is slp_inst_kind_store and the above succeeded here's
+-     where we should do store group splitting.  */
+-
+-  return res;
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Analysis failed with SLP transposed.\n");
++    }
++  return false;
+ }
+ 
+ /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
+@@ -4963,7 +5932,7 @@ vect_slp_analyze_operations (vec_info *vinfo)
+ 	  /* Check we can vectorize the reduction.  */
+ 	  || (SLP_INSTANCE_KIND (instance) == slp_inst_kind_bb_reduc
+ 	      && !vectorizable_bb_reduc_epilogue (instance, &cost_vec)))
+-        {
++	{
+ 	  slp_tree node = SLP_INSTANCE_TREE (instance);
+ 	  stmt_vec_info stmt_info;
+ 	  if (!SLP_INSTANCE_ROOT_STMTS (instance).is_empty ())
+@@ -4975,7 +5944,7 @@ vect_slp_analyze_operations (vec_info *vinfo)
+ 			     "removing SLP instance operations starting from: %G",
+ 			     stmt_info->stmt);
+ 	  vect_free_slp_instance (instance);
+-          vinfo->slp_instances.ordered_remove (i);
++	  vinfo->slp_instances.ordered_remove (i);
+ 	  cost_vec.release ();
+ 	  while (!visited_vec.is_empty ())
+ 	    visited.remove (visited_vec.pop ());
+@@ -5204,7 +6173,7 @@ vect_bb_slp_scalar_cost (vec_info *vinfo,
+       gimple *orig_stmt = orig_stmt_info->stmt;
+ 
+       /* If there is a non-vectorized use of the defs then the scalar
+-         stmt is kept live in which case we do not account it or any
++	 stmt is kept live in which case we do not account it or any
+ 	 required defs in the SLP children in the scalar cost.  This
+ 	 way we make the vectorization more costly when compared to
+ 	 the scalar cost.  */
+@@ -5481,7 +6450,11 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo,
+ 
+       vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
+ 
+-      if (dump_enabled_p ())
++      BB_VINFO_VEC_INSIDE_COST (bb_vinfo) = vec_inside_cost;
++      BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo) = vec_outside_cost;
++      BB_VINFO_SCALAR_COST (bb_vinfo) = scalar_cost;
++
++      if (!unlimited_cost_model (NULL) && dump_enabled_p ())
+ 	{
+ 	  dump_printf_loc (MSG_NOTE, vect_location,
+ 			   "Cost model analysis for part in loop %d:\n", sl);
+@@ -5819,7 +6792,7 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
+   if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
+     {
+       if (dump_enabled_p ())
+-        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			 "not vectorized: unhandled data-ref in basic "
+ 			 "block.\n");
+       return false;
+@@ -5854,6 +6827,22 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
+ 
+   vect_pattern_recog (bb_vinfo);
+ 
++  /* Transpose grouped stores and loads for better vectorizable version.  */
++  if (bb_vinfo->transposed)
++    {
++      if (!vect_analyze_transpose (bb_vinfo))
++	{
++	  if (dump_enabled_p ())
++	    {
++	       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				"not vectorized: unhandled slp transposed in "
++				"basic block.\n");
++	    }
++	  return false;
++	}
++    }
++  bb_vinfo->before_slp = true;
++
+   /* Update store groups from pattern processing.  */
+   vect_fixup_store_groups_with_patterns (bb_vinfo);
+ 
+@@ -5872,6 +6861,20 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
+       return false;
+     }
+ 
++  /* Check if the vectype is suitable for SLP transposed.  */
++  if (bb_vinfo->transposed && !check_for_slp_vectype (bb_vinfo))
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "Failed to SLP transposed in the basic block.\n");
++	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "not vectorized: vectype is not suitable for "
++			   "SLP transposed in basic block.\n");
++	}
++      return false;
++    }
++
+   /* Optimize permutations.  */
+   vect_optimize_slp (bb_vinfo);
+ 
+@@ -5914,6 +6917,27 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
+   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
+     return false;
+ 
++  /* Check if the alignment is suitable for SLP transposed.  */
++  if (bb_vinfo->transposed)
++    {
++      for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); i++)
++	{
++	  if (!check_for_dr_alignment (bb_vinfo, instance))
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				   "Failed to SLP transposed in the basic "
++				   "block.\n");
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				   "not vectorized: alignment is not suitable "
++				   "for SLP transposed in basic block.\n");
++		}
++	      return false;
++	    }
++	}
++    }
++
+   if (!vect_slp_analyze_operations (bb_vinfo))
+     {
+       if (dump_enabled_p ())
+@@ -5923,7 +6947,88 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal,
+     }
+ 
+   vect_bb_partition_graph (bb_vinfo);
++  return true;
++}
++
++static bool
++may_new_transpose_bbvinfo (bb_vec_info bb_vinfo_ori, bool res_ori,
++			   loop_p orig_loop)
++{
++  /* If the flag is false or the slp analysis is broken before
++     vect_analyze_slp, we don't try to analyze the transposed SLP version.  */
++  if (!flag_tree_slp_transpose_vectorize
++      || !BB_VINFO_BEFORE_SLP (bb_vinfo_ori))
++    {
++      return false;
++    }
++
++  /* If the original bb_vinfo can't be vectorized, try to new a bb_vinfo
++     of the transposed version.  */
++  if (!res_ori)
++    {
++      return true;
++    }
++
++  /* Caculate the cost of the original bb_vinfo.  */
++  if (unlimited_cost_model (NULL))
++    {
++      vec<slp_instance> &instances = BB_VINFO_SLP_INSTANCES (bb_vinfo_ori);
++      vect_bb_vectorization_profitable_p (bb_vinfo_ori, instances, orig_loop);
++    }
++  /* If the vec cost and scalar cost are not much difference (here we set the
++     threshold to 4), we try to new a bb_vinfo of the transposed version.  */
++  if (BB_VINFO_SCALAR_COST (bb_vinfo_ori)
++      < 4 * (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori)
++	     + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori)))
++    {
++      return true;
++    }
++  return false;
++}
+ 
++static bool
++may_choose_transpose_bbvinfo (bb_vec_info bb_vinfo_trans, bool res_trans,
++			      bb_vec_info bb_vinfo_ori, bool res_ori,
++			      loop_p orig_loop)
++{
++  /* The original bb_vinfo is chosen if the transposed bb_vinfo
++     can't be vectorized.  */
++  if (!res_trans)
++    {
++      return false;
++    }
++  /* Caculate the cost of the transposed bb_vinfo.  */
++  if (unlimited_cost_model (NULL))
++    {
++      vec<slp_instance> &instances = BB_VINFO_SLP_INSTANCES (bb_vinfo_trans);
++      vect_bb_vectorization_profitable_p (bb_vinfo_trans, instances,
++					  orig_loop);
++    }
++  int diff_bb_cost = -1;
++  int diff_bb_cost_trans = -1;
++  if (res_ori)
++    {
++      diff_bb_cost = BB_VINFO_SCALAR_COST (bb_vinfo_ori)
++		     - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori)
++		     - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori);
++    }
++  if (res_trans)
++    {
++      diff_bb_cost_trans = BB_VINFO_SCALAR_COST (bb_vinfo_trans)
++			   - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans)
++			   - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans);
++    }
++  /* The original bb_vinfo is chosen when one of the following conditions
++     is satisfied as follows:
++	1) The cost of original version is better transposed version.
++	2) The vec cost is similar to scalar cost in the transposed version.  */
++  if ((res_ori && res_trans && diff_bb_cost >= diff_bb_cost_trans)
++      || (res_trans && BB_VINFO_SCALAR_COST (bb_vinfo_trans)
++		       <= (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans)
++			  + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans))))
++    {
++      return false;
++    }
+   return true;
+ }
+ 
+@@ -5937,6 +7042,7 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
+ 		 loop_p orig_loop)
+ {
+   bb_vec_info bb_vinfo;
++  bb_vec_info bb_vinfo_trans = NULL;
+   auto_vector_modes vector_modes;
+ 
+   /* Autodetect first vector size we try.  */
+@@ -5951,6 +7057,10 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
+     {
+       bool vectorized = false;
+       bool fatal = false;
++      bool res_bb_vinfo_ori = false;
++      bool res_bb_vinfo_trans = false;
++
++      /* New a bb_vinfo of the original version.  */
+       bb_vinfo = new _bb_vec_info (bbs, &shared);
+ 
+       bool first_time_p = shared.datarefs.is_empty ();
+@@ -5960,8 +7070,113 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
+       else
+ 	bb_vinfo->shared->check_datarefs ();
+       bb_vinfo->vector_mode = next_vector_mode;
++      bb_vinfo->transposed = false;
++      bb_vinfo->before_slp = false;
++
++      res_bb_vinfo_ori = vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal,
++						dataref_groups);
++      auto_vec<slp_instance> profitable_subgraphs;
++      auto_vec<slp_instance> profitable_subgraphs_trans;
++      for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo))
++	{
++	  if (instance->subgraph_entries.is_empty ())
++	    continue;
++
++	    vect_location = instance->location ();
++	    if (!unlimited_cost_model (NULL)
++		&& !vect_bb_vectorization_profitable_p
++		      (bb_vinfo, instance->subgraph_entries, orig_loop))
++	      {
++		if (dump_enabled_p ())
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				   "not vectorized: vectorization is not "
++				   "profitable.\n");
++		  continue;
++	      }
++	    if (res_bb_vinfo_ori)
++	      {
++		if (!dbg_cnt (vect_slp))
++		  continue;
++		profitable_subgraphs.safe_push (instance);
++	      }
++	}
++
++      /* Analyze and new a transposed bb_vinfo.  */
++      if (may_new_transpose_bbvinfo (bb_vinfo, res_bb_vinfo_ori, orig_loop))
++	{
++	  bool fatal_trans = false;
++	  bb_vinfo_trans
++	    = new _bb_vec_info (bbs, &shared);
++	  bool first_time_p = shared.datarefs.is_empty ();
++	  BB_VINFO_DATAREFS (bb_vinfo_trans) = datarefs;
++	  if (first_time_p)
++	    {
++	      bb_vinfo_trans->shared->save_datarefs ();
++	    }
++	  else
++	    {
++	      bb_vinfo_trans->shared->check_datarefs ();
++	    }
++	  bb_vinfo_trans->vector_mode = next_vector_mode;
++	  bb_vinfo_trans->transposed = true;
++	  bb_vinfo_trans->before_slp = false;
++
++	  res_bb_vinfo_trans
++	    = vect_slp_analyze_bb_1 (bb_vinfo_trans, n_stmts, fatal_trans,
++				     dataref_groups);
++	  if (res_bb_vinfo_trans)
++	    {
++	      for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo_trans))
++		{
++		  if (instance->subgraph_entries.is_empty ())
++		    continue;
++
++		  vect_location = instance->location ();
++		  if (!unlimited_cost_model (NULL)
++		      && !vect_bb_vectorization_profitable_p
++			(bb_vinfo_trans, instance->subgraph_entries, orig_loop))
++		    {
++		      if (dump_enabled_p ())
++			  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++					   "not vectorized: transpose vectorization is not "
++					   "profitable.\n");
++		      res_bb_vinfo_trans = false;
++		      continue;
++		     }
++		  if (res_bb_vinfo_trans)
++		    {
++		      if (!dbg_cnt (vect_slp))
++			continue;
++		      profitable_subgraphs_trans.safe_push (instance);
++		    }
++		}
++	    }
++	  if (may_choose_transpose_bbvinfo (bb_vinfo_trans,
++					    res_bb_vinfo_trans,
++					    bb_vinfo, res_bb_vinfo_ori,
++					    orig_loop))
++	    {
++	      bb_vinfo = bb_vinfo_trans;
++	      fatal = fatal_trans;
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "Basic block part vectorized "
++				   "using transposed version.\n");
++		}
++	    }
++	  else
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "Basic block part vectorized "
++				   "\n");
++		}
++	    }
++	}
+ 
+-      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal, dataref_groups))
++      if (res_bb_vinfo_ori || res_bb_vinfo_trans)
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    {
+@@ -5972,90 +7187,129 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
+ 	    }
+ 
+ 	  bb_vinfo->shared->check_datarefs ();
+-
+-	  auto_vec<slp_instance> profitable_subgraphs;
+-	  for (slp_instance instance : BB_VINFO_SLP_INSTANCES (bb_vinfo))
++	  if (!res_bb_vinfo_trans)
+ 	    {
+-	      if (instance->subgraph_entries.is_empty ())
+-		continue;
+-
+-	      vect_location = instance->location ();
+-	      if (!unlimited_cost_model (NULL)
+-		  && !vect_bb_vectorization_profitable_p
+-			(bb_vinfo, instance->subgraph_entries, orig_loop))
++	      /* When we're vectorizing an if-converted loop body make sure
++		 we vectorized all if-converted code.  */
++	      if (!profitable_subgraphs.is_empty ()
++		  && orig_loop)
+ 		{
+-		  if (dump_enabled_p ())
+-		    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-				     "not vectorized: vectorization is not "
+-				     "profitable.\n");
+-		  continue;
++		  gcc_assert (bb_vinfo->bbs.length () == 1);
++		  for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
++		       !gsi_end_p (gsi); gsi_next (&gsi))
++		    {
++		      /* The costing above left us with DCEable vectorized scalar
++			 stmts having the visited flag set on profitable
++			 subgraphs.  Do the delayed clearing of the flag here.  */
++		      if (gimple_visited_p (gsi_stmt (gsi)))
++			{
++			  gimple_set_visited (gsi_stmt (gsi), false);
++			  continue;
++			}
++		      if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED)
++			continue;
++
++		      if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
++		       if (gimple_assign_rhs_code (ass) == COND_EXPR)
++			 {
++			   if (!profitable_subgraphs.is_empty ()
++			       && dump_enabled_p ())
++			     dump_printf_loc (MSG_NOTE, vect_location,
++					      "not profitable because of "
++					      "unprofitable if-converted scalar "
++					      "code\n");
++			   profitable_subgraphs.truncate (0);
++			 }
++		    }
+ 		}
+ 
+-	      if (!dbg_cnt (vect_slp))
+-		continue;
++	      /* Finally schedule the profitable subgraphs.  */
++	      for (slp_instance instance : profitable_subgraphs)
++		{
++		  if (!vectorized && dump_enabled_p ())
++		    dump_printf_loc (MSG_NOTE, vect_location,
++				     "Basic block will be vectorized "
++				     "using SLP\n");
++		  vectorized = true;
+ 
+-	      profitable_subgraphs.safe_push (instance);
+-	    }
++		  vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
+ 
+-	  /* When we're vectorizing an if-converted loop body make sure
+-	     we vectorized all if-converted code.  */
+-	  if (!profitable_subgraphs.is_empty ()
+-	      && orig_loop)
++		  unsigned HOST_WIDE_INT bytes;
++		  if (dump_enabled_p ())
++		    {
++		      if (GET_MODE_SIZE
++			   (bb_vinfo->vector_mode).is_constant (&bytes))
++			 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
++					  "basic block part vectorized using %wu "
++					  "byte vectors\n", bytes);
++		      else
++			 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
++					  "basic block part vectorized using "
++					  "variable length vectors\n");
++		    }
++		}
++	    }
++	  else
+ 	    {
+-	      gcc_assert (bb_vinfo->bbs.length () == 1);
+-	      for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
+-		   !gsi_end_p (gsi); gsi_next (&gsi))
++	      if (!profitable_subgraphs_trans.is_empty ()
++		  && orig_loop)
+ 		{
+-		  /* The costing above left us with DCEable vectorized scalar
+-		     stmts having the visited flag set on profitable
+-		     subgraphs.  Do the delayed clearing of the flag here.  */
+-		  if (gimple_visited_p (gsi_stmt (gsi)))
++		  gcc_assert (bb_vinfo->bbs.length () == 1);
++		  for (gimple_stmt_iterator gsi = gsi_start_bb (bb_vinfo->bbs[0]);
++		       !gsi_end_p (gsi); gsi_next (&gsi))
+ 		    {
+-		      gimple_set_visited (gsi_stmt (gsi), false);
+-		      continue;
++		      /* The costing above left us with DCEable vectorized scalar
++			 stmts having the visited flag set on profitable
++			 subgraphs.  Do the delayed clearing of the flag here.  */
++		      if (gimple_visited_p (gsi_stmt (gsi)))
++			{
++			  gimple_set_visited (gsi_stmt (gsi), false);
++			  continue;
++			}
++		       if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED)
++			 continue;
++
++		       if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
++			if (gimple_assign_rhs_code (ass) == COND_EXPR)
++			 {
++			   if (!profitable_subgraphs_trans.is_empty ()
++			       && dump_enabled_p ())
++			     dump_printf_loc (MSG_NOTE, vect_location,
++					      "not profitable because of "
++					      "unprofitable if-converted scalar "
++					      "code\n");
++			   profitable_subgraphs_trans.truncate (0);
++			 }
+ 		    }
+-		  if (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED)
+-		    continue;
+-
+-		  if (gassign *ass = dyn_cast <gassign *> (gsi_stmt (gsi)))
+-		    if (gimple_assign_rhs_code (ass) == COND_EXPR)
+-		      {
+-			if (!profitable_subgraphs.is_empty ()
+-			    && dump_enabled_p ())
+-			  dump_printf_loc (MSG_NOTE, vect_location,
+-					   "not profitable because of "
+-					   "unprofitable if-converted scalar "
+-					   "code\n");
+-			profitable_subgraphs.truncate (0);
+-		      }
+ 		}
+-	    }
+ 
+-	  /* Finally schedule the profitable subgraphs.  */
+-	  for (slp_instance instance : profitable_subgraphs)
+-	    {
+-	      if (!vectorized && dump_enabled_p ())
+-		dump_printf_loc (MSG_NOTE, vect_location,
+-				 "Basic block will be vectorized "
+-				 "using SLP\n");
+-	      vectorized = true;
++	      /* Finally schedule the profitable subgraphs.  */
++	      for (slp_instance instance : profitable_subgraphs_trans)
++		{
++		  if (!vectorized && dump_enabled_p ())
++		    dump_printf_loc (MSG_NOTE, vect_location,
++				     "Basic block will be vectorized "
++				     "using SLP\n");
++		  vectorized = true;
+ 
+-	      vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
++		  vect_schedule_slp (bb_vinfo, instance->subgraph_entries);
+ 
+-	      unsigned HOST_WIDE_INT bytes;
+-	      if (dump_enabled_p ())
+-		{
+-		  if (GET_MODE_SIZE
+-			(bb_vinfo->vector_mode).is_constant (&bytes))
+-		    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+-				     "basic block part vectorized using %wu "
+-				     "byte vectors\n", bytes);
+-		  else
+-		    dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+-				     "basic block part vectorized using "
+-				     "variable length vectors\n");
++		  unsigned HOST_WIDE_INT bytes;
++		  if (dump_enabled_p ())
++		    {
++		      if (GET_MODE_SIZE
++			   (bb_vinfo->vector_mode).is_constant (&bytes))
++			 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
++					  "basic block part vectorized using %wu "
++					  "byte vectors\n", bytes);
++		      else
++			 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
++					  "basic block part vectorized using "
++					  "variable length vectors\n");
++		    }
+ 		}
+ 	    }
++
+ 	}
+       else
+ 	{
+@@ -6081,6 +7335,10 @@ vect_slp_region (vec<basic_block> bbs, vec<data_reference_p> datarefs,
+ 	  }
+ 
+       delete bb_vinfo;
++      if (bb_vinfo_trans)
++	{
++	  bb_vinfo_trans = NULL;
++	}
+ 
+       if (mode_i < vector_modes.length ()
+ 	  && VECTOR_MODE_P (autodetected_vector_mode)
+@@ -7244,10 +8502,17 @@ vect_schedule_slp_node (vec_info *vinfo,
+ 	 ready early, vectorized stores go before the last scalar
+ 	 stmt which is where all uses are ready.  */
+       stmt_vec_info last_stmt_info = NULL;
+-      if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+-	last_stmt_info = vect_find_first_scalar_stmt_in_slp (node);
+-      else /* DR_IS_WRITE */
+-	last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
++
++      if (DR_GROUP_FIRST_ELEMENT (stmt_info)
++	  && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	 last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
++      else
++	{
++	   if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
++		last_stmt_info = vect_find_first_scalar_stmt_in_slp (node);
++	   else /* DR_IS_WRITE */
++		last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
++	}
+       si = gsi_for_stmt (last_stmt_info->stmt);
+     }
+   else if ((STMT_VINFO_TYPE (stmt_info) == cycle_phi_info_type
+diff --git a/gcc/tree-vect-stmts.cc b/gcc/tree-vect-stmts.cc
+index 349200411..3099f6743 100644
+--- a/gcc/tree-vect-stmts.cc
++++ b/gcc/tree-vect-stmts.cc
+@@ -1369,10 +1369,10 @@ vect_get_load_cost (vec_info *, stmt_vec_info stmt_info, int ncopies,
+ 
+ static void
+ vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
+-		    gimple_stmt_iterator *gsi)
++		    gimple_stmt_iterator *gsi, bool transpose=false)
+ {
+   if (gsi)
+-    vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi);
++    vect_finish_stmt_generation (vinfo, stmt_vinfo, new_stmt, gsi, transpose);
+   else
+     vinfo->insert_on_entry (stmt_vinfo, new_stmt);
+ 
+@@ -1393,7 +1393,7 @@ vect_init_vector_1 (vec_info *vinfo, stmt_vec_info stmt_vinfo, gimple *new_stmt,
+ 
+ tree
+ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
+-		  gimple_stmt_iterator *gsi)
++		  gimple_stmt_iterator *gsi, bool transpose)
+ {
+   gimple *init_stmt;
+   tree new_temp;
+@@ -1418,7 +1418,7 @@ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
+ 		  new_temp = make_ssa_name (TREE_TYPE (type));
+ 		  init_stmt = gimple_build_assign (new_temp, COND_EXPR,
+ 						   val, true_val, false_val);
+-		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
++		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi, transpose);
+ 		  val = new_temp;
+ 		}
+ 	    }
+@@ -1437,7 +1437,7 @@ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
+ 		{
+ 		  init_stmt = gsi_stmt (gsi2);
+ 		  gsi_remove (&gsi2, false);
+-		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
++		  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi, transpose);
+ 		}
+ 	    }
+ 	}
+@@ -1446,7 +1446,7 @@ vect_init_vector (vec_info *vinfo, stmt_vec_info stmt_info, tree val, tree type,
+ 
+   new_temp = vect_get_new_ssa_name (type, vect_simple_var, "cst_");
+   init_stmt = gimple_build_assign (new_temp, val);
+-  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi);
++  vect_init_vector_1 (vinfo, stmt_info, init_stmt, gsi, transpose);
+   return new_temp;
+ }
+ 
+@@ -1572,9 +1572,11 @@ vect_get_vec_defs (vec_info *vinfo, stmt_vec_info stmt_info, slp_tree slp_node,
+    statement and create and return a stmt_vec_info for it.  */
+ 
+ static void
+-vect_finish_stmt_generation_1 (vec_info *,
+-			       stmt_vec_info stmt_info, gimple *vec_stmt)
++vect_finish_stmt_generation_1 (vec_info *vinfo,
++			       stmt_vec_info stmt_info, gimple *vec_stmt, bool transpose=false)
+ {
++  if (transpose)
++    stmt_vec_info vec_stmt_info = vinfo->add_pattern_stmt (vec_stmt, NULL);
+   if (dump_enabled_p ())
+     dump_printf_loc (MSG_NOTE, vect_location, "add new stmt: %G", vec_stmt);
+ 
+@@ -1616,7 +1618,7 @@ vect_finish_replace_stmt (vec_info *vinfo,
+ void
+ vect_finish_stmt_generation (vec_info *vinfo,
+ 			     stmt_vec_info stmt_info, gimple *vec_stmt,
+-			     gimple_stmt_iterator *gsi)
++			     gimple_stmt_iterator *gsi, bool transpose)
+ {
+   gcc_assert (!stmt_info || gimple_code (stmt_info->stmt) != GIMPLE_LABEL);
+ 
+@@ -1648,7 +1650,7 @@ vect_finish_stmt_generation (vec_info *vinfo,
+ 	}
+     }
+   gsi_insert_before (gsi, vec_stmt, GSI_SAME_STMT);
+-  vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt);
++  vect_finish_stmt_generation_1 (vinfo, stmt_info, vec_stmt, transpose);
+ }
+ 
+ /* We want to vectorize a call to combined function CFN with function
+@@ -2159,6 +2161,173 @@ vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
+   return NULL_TREE;
+ }
+ 
++/* Check succeedor BB, BB without load is regarded as empty BB.  Ignore empty
++   BB in DFS.  */
++
++static unsigned
++mem_refs_in_bb (basic_block bb, vec<gimple *> &stmts)
++{
++  unsigned num = 0;
++  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
++       !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++      if (is_gimple_debug (stmt))
++	continue;
++      if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
++	  && !gimple_has_volatile_ops (stmt))
++	{
++	  if (gimple_assign_rhs_code (stmt) == MEM_REF
++	      || gimple_assign_rhs_code (stmt) == ARRAY_REF)
++	    {
++	      stmts.safe_push (stmt);
++	      num++;
++	    }
++	  else if (TREE_CODE (gimple_get_lhs (stmt)) == MEM_REF
++		   || TREE_CODE (gimple_get_lhs (stmt)) == ARRAY_REF)
++	    num++;
++	}
++    }
++  return num;
++}
++
++static bool
++check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
++{
++  for (unsigned ui = 0; ui < datarefs->length (); ui++)
++    {
++      tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
++      tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
++      if (TREE_CODE (op1) != TREE_CODE (op2))
++	continue;
++      if (TREE_CODE (op1) == ADDR_EXPR)
++	{
++	  op1 = TREE_OPERAND (op1, 0);
++	  op2 = TREE_OPERAND (op2, 0);
++	}
++      enum tree_code code = TREE_CODE (op1);
++      switch (code)
++	{
++	case VAR_DECL:
++	  if (DECL_NAME (op1) == DECL_NAME (op2)
++	      && DR_IS_READ ((*datarefs)[ui]))
++	    return true;
++	  break;
++	case SSA_NAME:
++	  if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
++	      && DR_IS_READ ((*datarefs)[ui]))
++	    return true;
++	  break;
++	default:
++	  break;
++	}
++    }
++  return false;
++}
++
++/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
++   Otherwise, set false to SUCCESS.  */
++
++static void
++check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
++	       stmt_vec_info stmt_info, bool &success)
++{
++  if (stmt_info == NULL)
++    {
++      success = false;
++      return;
++    }
++  if (DR_IS_READ (stmt_info->dr_aux.dr))
++    {
++      success = false;
++      return;
++    }
++  unsigned ui = 0;
++  gimple *candidate = NULL;
++  FOR_EACH_VEC_ELT (stmts, ui, candidate)
++    {
++      if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
++	continue;
++
++      if (candidate->bb != candidate->bb->loop_father->header)
++	{
++	  success = false;
++	  return;
++	}
++      auto_vec<data_reference_p> datarefs;
++      tree res = find_data_references_in_bb (candidate->bb->loop_father,
++					     candidate->bb, &datarefs);
++      if (res == chrec_dont_know)
++	{
++	  success = false;
++	  return;
++	}
++      if (check_same_base (&datarefs, stmt_info->dr_aux.dr))
++	return;
++    }
++  success = false;
++}
++
++/* Deep first search from present BB.  If succeedor has load STMTS,
++   stop further searching.  */
++
++static void
++dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, stmt_vec_info stmt_info,
++	      bool &success, vec<basic_block> &visited_bbs)
++{
++  if (bb == cfun->cfg->x_exit_block_ptr)
++    {
++      success = false;
++      return;
++    }
++  if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
++    return;
++
++  visited_bbs.safe_push (bb);
++  auto_vec<gimple *> stmts;
++  unsigned num = mem_refs_in_bb (bb, stmts);
++  /* Empty BB.  */
++  if (num == 0)
++    {
++      edge e;
++      edge_iterator ei;
++      FOR_EACH_EDGE (e, ei, bb->succs)
++	{
++	  dfs_check_bb (loop_vinfo, e->dest, stmt_info, success, visited_bbs);
++	  if (!success)
++	    return;
++	}
++      return;
++    }
++  /* Non-empty BB.  */
++  check_vec_use (loop_vinfo, stmts, stmt_info, success);
++}
++
++/* For grouped store, if all succeedors of present BB have vectorized load
++   from same base of store.  If so, set memory_access_type using
++   VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES.  */
++
++static bool
++conti_perm (stmt_vec_info stmt_vinfo, loop_vec_info loop_vinfo)
++{
++  gimple *stmt = stmt_vinfo->stmt;
++  if (gimple_code (stmt) != GIMPLE_ASSIGN)
++    return false;
++
++  if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
++    return false;
++
++  basic_block bb = stmt->bb;
++  bool success = true;
++  auto_vec<basic_block> visited_bbs;
++  visited_bbs.safe_push (bb);
++  edge e;
++  edge_iterator ei;
++  FOR_EACH_EDGE (e, ei, bb->succs)
++    dfs_check_bb (loop_vinfo, e->dest, stmt_vinfo, success, visited_bbs);
++  return success;
++}
++
+ /* A subroutine of get_load_store_type, with a subset of the same
+    arguments.  Handle the case where STMT_INFO is part of a grouped load
+    or store.
+@@ -2373,6 +2542,20 @@ get_group_load_store_type (vec_info *vinfo, stmt_vec_info stmt_info,
+ 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ 	      overrun_p = would_overrun_p;
+ 	    }
++
++	  if (*memory_access_type == VMAT_LOAD_STORE_LANES
++	      && TREE_CODE (loop_vinfo->num_iters) == INTEGER_CST
++	      && maybe_eq (tree_to_shwi (loop_vinfo->num_iters),
++			   loop_vinfo->vectorization_factor)
++	      && conti_perm (stmt_info, loop_vinfo)
++	      && (vls_type == VLS_LOAD
++		  ? vect_grouped_load_supported (vectype, single_element_p,
++						 group_size)
++		  : vect_grouped_store_supported (vectype, group_size)))
++	    {
++	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
++	      overrun_p = would_overrun_p;
++	    }
+ 	}
+ 
+       /* As a last resort, trying using a gather load or scatter store.
+@@ -7456,6 +7639,154 @@ vectorizable_scan_store (vec_info *vinfo,
+   return true;
+ }
+ 
++/* Function vect_permute_store_chains
++
++   Call function vect_permute_store_chain ().
++   Given a chain of interleaved stores in DR_CHAIN, generate
++   interleave_high/low stmts to reorder the data correctly.
++   Return the final references for stores in RESULT_CHAIN.  */
++
++static void
++vect_permute_store_chains (vec_info *vinfo, vec<tree> dr_chain,
++			   unsigned int num_each, stmt_vec_info stmt_info,
++			   gimple_stmt_iterator *gsi, vec<tree> *result_chain,
++			   unsigned int group)
++{
++  unsigned int k = 0;
++  unsigned int t = 0;
++
++  /* Divide vectors into GROUP parts.  And permute every NUM_EACH vectors
++     together.  */
++  for (k = 0; k < group; k++)
++    {
++      auto_vec<tree> dr_chain_transposed (num_each);
++      auto_vec<tree> result_chain_transposed (num_each);
++      for (t = k; t < dr_chain.length (); t = t + group)
++	{
++	  dr_chain_transposed.quick_push (dr_chain[t]);
++	}
++      vect_permute_store_chain (vinfo, dr_chain_transposed, num_each,
++				stmt_info, gsi, &result_chain_transposed);
++      for (t = 0; t < num_each; t++)
++	{
++	  result_chain->quick_push (result_chain_transposed[t]);
++	}
++    }
++}
++
++/* Function transpose_oprnd_store
++
++    Calculate the transposed results from VEC_OPRNDS (VEC_STMT)
++    for vectorizable_store.  */
++
++static void
++transpose_oprnd_store (vec_info *vinfo, vec<tree>vec_oprnds,
++		       vec<tree> *result_chain, unsigned int vec_num,
++		       unsigned int const_nunits, unsigned int array_num,
++		       stmt_vec_info first_stmt_info,
++		       gimple_stmt_iterator *gsi)
++{
++  unsigned int group_for_transform = 0;
++  unsigned int num_each = 0;
++
++  /* Transpose back for vec_oprnds.  */
++  /* vec = {vec1, vec2, ...}  */
++  if (array_num < const_nunits
++      && const_nunits % array_num == 0)
++    {
++      vect_transpose_store_chain (vinfo, vec_oprnds,
++				  vec_num, array_num,
++				  first_stmt_info,
++				  gsi, result_chain);
++    }
++   /* vec1 = {vec_part1}, vec2 = {vec_part2}, ...  */
++  else if (array_num >= const_nunits
++	   && array_num % const_nunits == 0)
++    {
++      group_for_transform = array_num / const_nunits;
++      num_each = vec_oprnds.length () / group_for_transform;
++      vect_permute_store_chains (vinfo, vec_oprnds,
++				 num_each, first_stmt_info,
++				 gsi, result_chain,
++				 group_for_transform);
++    }
++  else
++    {
++      gcc_unreachable ();
++    }
++}
++
++static dr_vec_info *
++get_dr_info (stmt_vec_info stmt_info)
++{
++  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
++  if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
++    {
++      SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
++    }
++  return dr_info;
++}
++
++static unsigned
++dr_align_vect_store (vec_info *vinfo, dr_vec_info *cur_first_dr_info,
++		     tree vectype, unsigned HOST_WIDE_INT &align)
++{
++  unsigned misalign = 0;
++  align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info));
++  if (aligned_access_p (cur_first_dr_info, vectype))
++    {
++      return misalign;
++    }
++  else if (cur_first_dr_info->misalignment == -1)
++    {
++      align = dr_alignment (vect_dr_behavior (vinfo, cur_first_dr_info));
++    }
++  else
++    {
++      misalign = cur_first_dr_info->misalignment;
++    }
++  return misalign;
++}
++
++static void
++add_new_stmt_vect_store (vec_info *vinfo, tree vectype, tree dataref_ptr,
++			 tree dataref_offset, tree ref_type,
++			 dr_vec_info *cur_first_dr_info, tree vec_oprnd,
++			 gimple_stmt_iterator *gsi, stmt_vec_info stmt_info)
++{
++  /* Data align.  */
++  unsigned HOST_WIDE_INT align;
++  unsigned misalign = dr_align_vect_store (vinfo, cur_first_dr_info,
++					   vectype, align);
++
++  if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME)
++    {
++      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
++    }
++
++  /* Get data_ref.  */
++  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
++  tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, offset);
++  if (aligned_access_p (cur_first_dr_info, vectype))
++    {
++      ;
++    }
++  else if (cur_first_dr_info->misalignment == -1)
++    {
++      TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref),
++						 align * BITS_PER_UNIT);
++    }
++  else
++    {
++      tree elem_type = TREE_TYPE (vectype);
++      TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref),
++						 TYPE_ALIGN (elem_type));
++    }
++  /* Add new stmt.  */
++  vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr));
++  gassign *new_stmt = gimple_build_assign (data_ref, vec_oprnd);
++  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi, true);
++}
+ 
+ /* Function vectorizable_store.
+ 
+@@ -8333,6 +8664,16 @@ vectorizable_store (vec_info *vinfo,
+ 					   &vec_offsets);
+ 	      vec_offset = vec_offsets[0];
+ 	    }
++	  /* If the stmt_info need to be transposed recovery, dataref_ptr
++	     will be caculated later.  */
++	  else if (memory_access_type == VMAT_CONTIGUOUS
++		   && is_a <bb_vec_info> (vinfo)
++		   && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++		   && DR_GROUP_SLP_TRANSPOSE (
++			DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      dataref_ptr = NULL_TREE;
++	    }
+ 	  else
+ 	    dataref_ptr
+ 	      = vect_create_data_ref_ptr (vinfo, first_stmt_info, aggr_type,
+@@ -8423,6 +8764,75 @@ vectorizable_store (vec_info *vinfo,
+ 	}
+       else
+ 	{
++	  /* group_size: the size of group after transposing and merging.
++	     group_size_b: the size of group before transposing and merging,
++			 and only group_size_b >= const_nunits is supported.
++	     array_num: the number of arrays.
++	     const_nunits: TYPE_VECTOR_SUBPARTS (vectype).
++	     ncontinues: group_size_b / const_nunits, it means the number of
++			 times an array is stored in memory.  */
++	  if (slp && is_a <bb_vec_info> (vinfo)
++	      && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	      && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "vectorizable_store for slp transpose.\n");
++		}
++	      /* Transpose back for grouped stores.  */
++	      vect_transform_back_slp_grouped_stores (bb_vinfo,
++						      first_stmt_info);
++
++	      result_chain.create (vec_oprnds.length ());
++	      unsigned int const_nunits = nunits.to_constant ();
++	      unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info);
++	      unsigned int array_num = group_size / group_size_b;
++	      transpose_oprnd_store (vinfo, vec_oprnds, &result_chain, vec_num,
++				     const_nunits, array_num,
++				     first_stmt_info, gsi);
++
++	      /* For every store group, not for every vec, because transposing
++		 and merging have changed the data reference access.  */
++	      gcc_assert (group_size_b >= const_nunits);
++	      unsigned int ncontinues = group_size_b / const_nunits;
++
++	      unsigned int k = 0;
++	      for (i = 0; i < array_num; i++)
++		{
++		  stmt_vec_info first_stmt_b;
++		  BB_VINFO_GROUPED_STORES (vinfo).iterate (i, &first_stmt_b);
++		  bool simd_lane_access_p
++			= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_b) != 0;
++		  tree ref_type = get_group_alias_ptr_type (first_stmt_b);
++		  dataref_ptr = vect_create_data_ref_ptr (
++				 vinfo, first_stmt_b, aggr_type,
++				 simd_lane_access_p ? loop : NULL,
++				 offset, &dummy, gsi, &ptr_incr,
++				 simd_lane_access_p, bump);
++		  dr_vec_info *cur_first_dr_info = get_dr_info (first_stmt_b);
++		  for (unsigned int t = 0; t < ncontinues; t++)
++		    {
++		      vec_oprnd = result_chain[k];
++		      k++;
++		      if (t > 0)
++			{
++			  /* Bump the vector pointer.  */
++			  dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr,
++							 ptr_incr, gsi,
++							 first_stmt_b, bump);
++			}
++		      add_new_stmt_vect_store (vinfo, vectype, dataref_ptr,
++					       dataref_offset, ref_type,
++					       cur_first_dr_info, vec_oprnd,
++					       gsi, first_stmt_b);
++		    }
++		}
++	      oprnds.release ();
++	      result_chain.release ();
++	      vec_oprnds.release ();
++	      return true;
++	    }
+ 	  new_stmt = NULL;
+ 	  if (grouped_store)
+ 	    {
+@@ -8719,6 +9129,451 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
+   return true;
+ }
+ 
++static tree
++calculate_new_type (tree vectype, unsigned int const_nunits,
++		    unsigned int group_size_b, unsigned int &nloads,
++		    unsigned int &ncontinues, tree &lvectype)
++{
++  tree ltype = TREE_TYPE (vectype);
++  /* nloads is the number of ARRAYs in a vector.
++     vectemp = {a[], b[], ...}  */
++  if (group_size_b < const_nunits)
++    {
++      tree ptype;
++      tree vtype
++	= vector_vector_composition_type (vectype,
++					  const_nunits / group_size_b,
++					  &ptype);
++      if (vtype != NULL_TREE)
++	{
++	  nloads = const_nunits / group_size_b;
++	  lvectype = vtype;
++	  ltype = ptype;
++	  ncontinues = 1;
++	}
++    }
++  /* ncontinues is the number of vectors from an ARRAY.
++     vectemp1 = {a[0], a[1], ...}
++     ...
++     vectempm = {a[k], a[k+1], ...}  */
++  else
++    {
++      nloads = 1;
++      ltype = vectype;
++      ncontinues = group_size_b / const_nunits;
++    }
++  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
++  return ltype;
++}
++
++static void
++generate_old_load_permutations (slp_tree slp_node, unsigned int group_size,
++				vec<unsigned> &old_load_permutation)
++{
++  /* Generate the old load permutations from the slp_node.  */
++  unsigned i = 0;
++  unsigned k = 0;
++
++  /* If SLP_NODE has load_permutation, we copy it to old_load_permutation.
++     Otherwise, we generate a permutation sequentially.  */
++  if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
++    {
++      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), i, k)
++	{
++	  old_load_permutation.safe_push (k);
++	}
++    }
++  else
++    {
++      for (unsigned i = 0; i < group_size; i++)
++	{
++	  old_load_permutation.safe_push (i);
++	}
++    }
++}
++
++static void
++generate_new_load_permutation_mapping (unsigned slp_node_length,
++				       vec<unsigned> &group_idx,
++				       const vec<unsigned> &load_permutation,
++				       unsigned int group_size_b,
++				       unsigned &new_group_size,
++				       vec<unsigned> &group_from)
++{
++  /* group_num_vec: only stores the group_loads IDs which are caculated from
++     load_permutation.  */
++  auto_vec<unsigned> group_num_vec;
++
++  /* Caculate which group_loads are the stmts in SLP_NODE from.  */
++  unsigned i = 0;
++  unsigned k = 0;
++  FOR_EACH_VEC_ELT (load_permutation, i, k)
++    {
++      unsigned int t0 = k / group_size_b;
++      if (!group_num_vec.contains (t0))
++	{
++	  group_num_vec.safe_push (t0);
++	}
++      group_from.safe_push (t0);
++    }
++  group_num_vec.qsort (cmp_for_group_num);
++  /* n_groups: the number of group_loads.  */
++  unsigned int n_groups = group_num_vec.length ();
++  new_group_size = n_groups * group_size_b;
++  for (i = 0; i < n_groups; i++)
++    {
++      group_idx.safe_push (group_num_vec[i] * group_size_b);
++    }
++  /* A new mapping from group_ind_vec to group_from.
++      For example:
++	Origin: group_from = {1,1,3,3,5,5,7,7};
++	After mapping: group_from = {0,0,1,1,2,2,2,2};  */
++  auto_vec<unsigned> group_ind_vec (n_groups);
++  for (k = 0; k < n_groups; k++)
++    {
++      group_ind_vec.safe_push (k);
++    }
++  for (i = 0; i < slp_node_length; i++)
++    {
++      for (k = 0; k < n_groups; k++)
++	{
++	  if (group_from[i] == group_num_vec[k])
++	    {
++	      group_from[i] = group_ind_vec[k];
++	      break;
++	    }
++	}
++    }
++}
++
++static void
++generate_new_load_permutation (vec<unsigned> &new_load_permutation,
++			       const vec<unsigned> &old_load_permutation,
++			       slp_tree slp_node, bool &this_load_permuted,
++			       const vec<unsigned> &group_from,
++			       unsigned int group_size_b)
++{
++  unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++  /* Generate the new load permutation from the new mapping.  */
++  new_load_permutation.create (slp_node_length);
++  unsigned i = 0;
++  unsigned k = 0;
++  FOR_EACH_VEC_ELT (old_load_permutation, i, k)
++    {
++      /* t1 is the new permutation of k in the old permutation.
++	 t1 = base_address + offset:
++	 base_address = group_from[i] * group_size_b;
++	 offset = k % group_size_b.  */
++      unsigned int t1
++	= group_from[i] * group_size_b + k % group_size_b;
++      new_load_permutation.safe_push (t1);
++      if (t1 != k)
++	{
++	  this_load_permuted = true;
++	}
++    }
++}
++
++static bool
++is_slp_perm (bool slp_perm, bool this_load_permuted, poly_uint64 nunits,
++	     unsigned int group_size, stmt_vec_info first_stmt_info)
++{
++  /* Calculate the unrolling factor based on the smallest type.  */
++  poly_uint64 unrolling_factor
++    = exact_div (common_multiple (nunits, group_size), group_size);
++  /* The load requires permutation when unrolling exposes
++     a gap either because the group is larger than the SLP
++     group-size or because there is a gap between the groups.  */
++  if (!slp_perm && !this_load_permuted
++      && (known_eq (unrolling_factor, 1U)
++	  || (group_size == DR_GROUP_SIZE (first_stmt_info)
++	      && DR_GROUP_GAP (first_stmt_info) == 0)))
++    {
++      return false;
++    }
++  else
++    {
++      return true;
++    }
++}
++
++static void
++generate_load_permutation (slp_tree slp_node, unsigned &new_group_size,
++			   unsigned int group_size, unsigned int group_size_b,
++			   bool &this_load_permuted, vec<unsigned> &group_idx,
++			   vec<unsigned> &new_load_permutation)
++{
++  /* Generate the old load permutations from SLP_NODE.  */
++  vec<unsigned> old_load_permutation;
++  old_load_permutation.create (group_size);
++  generate_old_load_permutations (slp_node, group_size, old_load_permutation);
++
++  /* Caculate which group_loads are the stmts in SLP_NODE from.  */
++  unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++  /* group_from: stores the group_loads ID for every stmt in SLP_NODE.  */
++  vec<unsigned> group_from;
++  group_from.create (slp_node_length);
++  generate_new_load_permutation_mapping (slp_node_length, group_idx,
++					 old_load_permutation,
++					 group_size_b, new_group_size,
++					 group_from);
++
++  /* Generate the new load permutation from the new mapping and caculate
++     this_load_permuted flag.  If this_load_permuted is true, we need execute
++     slp permutation by using new load permutation.  */
++  generate_new_load_permutation (new_load_permutation, old_load_permutation,
++				 slp_node, this_load_permuted, group_from,
++				 group_size_b);
++  old_load_permutation.release ();
++  group_from.release ();
++}
++
++static unsigned int
++dr_align_vect_load (vec_info *vinfo, dr_vec_info *cur_first_dr_info,
++		    tree vectype, unsigned HOST_WIDE_INT &align,
++		    enum dr_alignment_support alignment_support_scheme)
++{
++  unsigned int misalign = 0;
++
++  align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info));
++  if (alignment_support_scheme == dr_aligned)
++    {
++      gcc_assert (aligned_access_p (cur_first_dr_info, vectype));
++    }
++  else if (cur_first_dr_info->misalignment == -1)
++    {
++      align = dr_alignment (vect_dr_behavior (vinfo, cur_first_dr_info));
++    }
++  else
++    {
++      misalign = cur_first_dr_info->misalignment;
++    }
++  return misalign;
++}
++
++static stmt_vec_info
++add_new_stmt_vect_load (vec_info *vinfo, tree vectype, tree dataref_ptr,
++			tree dataref_offset, tree ref_type, tree ltype,
++			gassign *(&new_stmt), dr_vec_info *cur_first_dr_info,
++			gimple_stmt_iterator *gsi, stmt_vec_info stmt_info)
++{
++  /* Data align.  */
++  int malign = dr_misalignment (cur_first_dr_info, vectype);
++  enum dr_alignment_support alignment_support_scheme
++	= vect_supportable_dr_alignment (vinfo, cur_first_dr_info,
++					 vectype, malign);
++  unsigned HOST_WIDE_INT align;
++  unsigned int misalign = dr_align_vect_load (vinfo, cur_first_dr_info,
++					      vectype, align,
++					      alignment_support_scheme);
++  if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME)
++    {
++      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
++    }
++
++  /* Get data_ref.  */
++  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
++  tree data_ref = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
++  if (alignment_support_scheme == dr_aligned)
++    {
++      ;
++    }
++  else if (cur_first_dr_info->misalignment == -1)
++    {
++      TREE_TYPE (data_ref)
++	= build_aligned_type (TREE_TYPE (data_ref), align * BITS_PER_UNIT);
++    }
++  else
++    {
++      tree elem_type = TREE_TYPE (vectype);
++      TREE_TYPE (data_ref)
++	= build_aligned_type (TREE_TYPE (data_ref), TYPE_ALIGN (elem_type));
++    }
++
++  /* Add new stmt.  */
++  vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr));
++  new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
++  vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi, true);
++  stmt_vec_info vec_stmt_info = vinfo->lookup_stmt (new_stmt);
++  return vec_stmt_info;
++}
++
++static void
++push_new_stmt_to_dr_chain (bool slp_perm, stmt_vec_info new_stmt_info,
++			   vec<tree> dr_chain, slp_tree slp_node)
++{
++  if (slp_perm)
++    dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
++  else
++    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info->stmt);
++}
++
++static stmt_vec_info
++get_first_stmt_info_before_transpose (stmt_vec_info first_stmt_info,
++				      unsigned int group_el,
++				      unsigned int group_size)
++{
++  stmt_vec_info last_stmt_info = first_stmt_info;
++  unsigned int count = 0;
++  gcc_assert (group_el < group_size);
++  while (count < group_el)
++    {
++      last_stmt_info = DR_GROUP_NEXT_ELEMENT (last_stmt_info);
++      count++;
++    }
++  return last_stmt_info;
++}
++
++static stmt_vec_info
++add_new_stmt_for_nloads_greater_than_one (vec_info *vinfo, tree lvectype,
++					  tree vectype,
++					  vec<constructor_elt, va_gc> *v,
++					  stmt_vec_info stmt_info,
++					  gimple_stmt_iterator *gsi)
++{
++  tree vec_inv = build_constructor (lvectype, v);
++  tree new_temp = vect_init_vector (vinfo, stmt_info, vec_inv, lvectype, gsi, true);
++  stmt_vec_info new_stmt_info = vinfo->lookup_def (new_temp);
++  if (lvectype != vectype)
++    {
++      gassign *new_stmt = gimple_build_assign (make_ssa_name (vectype),
++					       VIEW_CONVERT_EXPR,
++					       build1 (VIEW_CONVERT_EXPR,
++						       vectype, new_temp));
++      vect_finish_stmt_generation (vinfo, stmt_info, new_stmt, gsi, true);
++      new_stmt_info = vinfo->lookup_stmt (new_stmt);
++    }
++  return new_stmt_info;
++}
++
++/* Function new_vect_stmt_for_nloads.
++
++   New a VEC_STMT when nloads Arrays are merged into a vector.
++
++   ncopies is the number of vectors that need to be loaded from memmory.
++   nloads is the number of ARRAYs in a vector.
++   vectemp = {a[], b[], ...}  */
++
++static void
++new_vect_stmt_for_nloads (vec_info *vinfo, unsigned int ncopies,
++			  unsigned int nloads, const vec<unsigned> &group_idx,
++			  stmt_vec_info stmt_info, offset_info *offset_info,
++			  vectype_info *vectype_info,
++			  vect_memory_access_type memory_access_type,
++			  bool slp_perm, vec<tree> dr_chain, slp_tree slp_node,
++			  gimple_stmt_iterator *gsi)
++{
++  vec<constructor_elt, va_gc> *v = NULL;
++  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
++  unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
++  stmt_vec_info first_stmt_info_b = NULL;
++  stmt_vec_info new_stmt_info = NULL;
++  tree dataref_ptr = NULL_TREE;
++  tree dummy;
++  gimple *ptr_incr = NULL;
++  unsigned int n = 0;
++  for (unsigned int i = 0; i < ncopies; i++)
++    {
++      vec_alloc (v, nloads);
++      for (unsigned int t = 0; t < nloads; t++)
++	{
++	  first_stmt_info_b = get_first_stmt_info_before_transpose (
++				first_stmt_info, group_idx[n++], group_size);
++	  dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b);
++	  tree bump = vect_get_data_ptr_increment (vinfo, cur_first_dr_info,
++						   vectype_info->ltype,
++						   memory_access_type);
++	  bool simd_lane_access_p
++		= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0;
++
++	  /* Create dataref_ptr which is point to init_address.  */
++	  dataref_ptr = vect_create_data_ref_ptr (
++			 vinfo, first_stmt_info_b, vectype_info->ltype, NULL,
++			 offset_info->offset, &dummy, gsi, &ptr_incr,
++			 simd_lane_access_p, bump);
++
++	  gassign *new_stmt = NULL;
++	  new_stmt_info = add_new_stmt_vect_load (vinfo, vectype_info->vectype, dataref_ptr,
++				  offset_info->dataref_offset,
++				  vectype_info->ref_type,  vectype_info->ltype,
++				  new_stmt, cur_first_dr_info, gsi,
++				  first_stmt_info_b);
++
++	  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, gimple_assign_lhs (new_stmt));
++	}
++	new_stmt_info = add_new_stmt_for_nloads_greater_than_one (
++				 vinfo, vectype_info->lvectype,
++				 vectype_info->vectype, v,
++				 first_stmt_info_b, gsi);
++	push_new_stmt_to_dr_chain (slp_perm, new_stmt_info,
++				   dr_chain, slp_node);
++    }
++}
++
++/* Function new_vect_stmt_for_ncontinues.
++
++   New a VEC_STMTs when an Array is divided into several vectors.
++
++   n_groups is the number of ARRAYs.
++   ncontinues is the number of vectors from an ARRAY.
++   vectemp1 = {a[0], a[1], ...}
++   ...
++   vectempm = {a[k], a[k+1], ...}  */
++
++static void
++new_vect_stmt_for_ncontinues (vec_info *vinfo, unsigned int ncontinues,
++			      const vec<unsigned> &group_idx,
++			      stmt_vec_info stmt_info,
++			      offset_info* offset_info,
++			      vectype_info* vectype_info,
++			      vect_memory_access_type memory_access_type,
++			      bool slp_perm, vec<tree> &dr_chain,
++			      slp_tree slp_node,
++			      gimple_stmt_iterator *gsi)
++{
++  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
++  unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
++  stmt_vec_info new_stmt_info = NULL;
++  tree dataref_ptr = NULL_TREE;
++  tree dummy;
++  gimple *ptr_incr = NULL;
++  unsigned int n_groups = group_idx.length ();
++  for (unsigned int i = 0; i < n_groups; i++)
++    {
++      stmt_vec_info first_stmt_info_b = get_first_stmt_info_before_transpose (
++				first_stmt_info, group_idx[i], group_size);
++      dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b);
++      tree bump = vect_get_data_ptr_increment (vinfo, cur_first_dr_info,
++			vectype_info->ltype, memory_access_type);
++      bool simd_lane_access_p
++		= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0;
++      for (unsigned int k = 0; k < ncontinues; k++)
++	{
++	  /* Create dataref_ptr which is point to init_address.  */
++	  if (k == 0)
++	    {
++	      dataref_ptr = vect_create_data_ref_ptr (
++			 vinfo, first_stmt_info_b, vectype_info->ltype, NULL,
++			 offset_info->offset, &dummy, gsi, &ptr_incr,
++			 simd_lane_access_p, bump);
++	    }
++	  else
++	    {
++	      dataref_ptr = bump_vector_ptr (vinfo, dataref_ptr, ptr_incr,
++					     gsi, first_stmt_info_b, bump);
++	    }
++	  gassign *new_stmt = NULL;
++	  new_stmt_info = add_new_stmt_vect_load (vinfo, vectype_info->vectype, dataref_ptr,
++				  offset_info->dataref_offset,
++				  vectype_info->ref_type, vectype_info->ltype,
++				  new_stmt, cur_first_dr_info, gsi,
++				  first_stmt_info_b);
++	  push_new_stmt_to_dr_chain (slp_perm, new_stmt_info,
++	  		dr_chain, slp_node);
++	}
++    }
++}
++
+ /* vectorizable_load.
+ 
+    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
+@@ -9338,6 +10193,8 @@ vectorizable_load (vec_info *vinfo,
+       if (bb_vinfo)
+ 	first_stmt_info_for_drptr
+ 	  = vect_find_first_scalar_stmt_in_slp (slp_node);
++  // first_stmt_info_for_drptr = SLP_TREE_SCALAR_STMTS (slp_node)[0];
++
+ 
+       /* Check if the chain of loads is already vectorized.  */
+       if (STMT_VINFO_VEC_STMTS (first_stmt_info).exists ()
+@@ -9601,6 +10458,9 @@ vectorizable_load (vec_info *vinfo,
+     }
+   tree vec_mask = NULL_TREE;
+   poly_uint64 group_elt = 0;
++  unsigned new_group_size = 0;
++  vec<unsigned> new_load_permutation;
++
+   for (j = 0; j < ncopies; j++)
+     {
+       /* 1. Create the vector or array pointer update chain.  */
+@@ -9621,6 +10481,15 @@ vectorizable_load (vec_info *vinfo,
+ 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+ 	      dataref_offset = build_int_cst (ref_type, 0);
+ 	    }
++	  /* If the stmt_info need to be transposed recovery, dataref_ptr
++	     will be caculated later.  */
++	  else if (slp && is_a <bb_vec_info> (vinfo)
++		   && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++		   && DR_GROUP_SLP_TRANSPOSE (
++			DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      dataref_ptr = NULL_TREE;
++	    }
+ 	  else if (diff_first_stmt_info)
+ 	    {
+ 	      dataref_ptr
+@@ -9731,6 +10600,63 @@ vectorizable_load (vec_info *vinfo,
+ 	  /* Record that VEC_ARRAY is now dead.  */
+ 	  vect_clobber_variable (vinfo, stmt_info, gsi, vec_array);
+ 	}
++      else if (slp && is_a <bb_vec_info> (vinfo)
++	       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	       && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "vectorizable_load for slp transpose.\n");
++	    }
++	  /* group_size: the size of group after merging.
++	     group_size_b: the size of group before merging.
++	     const_nunits: TYPE_VECTOR_SUBPARTS (vectype), it is the number of
++		elements in a vector.
++	     nloads: const_nunits / group_size_b or 1, it means the number
++		of ARRAYs in a vector.
++	     ncontinues: group_size_b / const_nunits or 1, it means the number
++		of vectors from an ARRAY.  */
++	  unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info);
++	  unsigned int const_nunits = nunits.to_constant ();
++	  unsigned int nloads = const_nunits;
++	  unsigned int ncontinues = group_size_b;
++	  tree lvectype = vectype;
++	  tree ltype = calculate_new_type (vectype, const_nunits,
++					   group_size_b, nloads,
++					   ncontinues, lvectype);
++	  bool this_load_permuted = false;
++	  auto_vec<unsigned> group_idx;
++	  generate_load_permutation (slp_node, new_group_size, group_size,
++				     group_size_b, this_load_permuted,
++				     group_idx, new_load_permutation);
++	  slp_perm = is_slp_perm (slp_perm, this_load_permuted, nunits,
++				  group_size, first_stmt_info);
++
++	  /* ncopies: the number of vectors that need to be loaded from
++		 memmory.  */
++	  unsigned int ncopies = new_group_size / const_nunits;
++	  offset_info offset_info = {offset, NULL_TREE, dataref_offset};
++	  vectype_info vectype_info = {vectype, ltype, lvectype, ref_type};
++	  if (slp_perm)
++	    {
++	       dr_chain.create (ncopies);
++	    }
++	  if (nloads > 1 && ncontinues == 1)
++	    {
++	      new_vect_stmt_for_nloads (vinfo, ncopies, nloads, group_idx,
++					stmt_info, &offset_info, &vectype_info,
++					memory_access_type, slp_perm, dr_chain,
++					slp_node, gsi);
++	    }
++	  else
++	    {
++	      new_vect_stmt_for_ncontinues (vinfo, ncontinues, group_idx,
++					    stmt_info, &offset_info,
++					    &vectype_info, memory_access_type,
++					    slp_perm, dr_chain, slp_node, gsi);
++	    }
++	}
+       else
+ 	{
+ 	  for (i = 0; i < vec_num; i++)
+@@ -10177,7 +11103,32 @@ vectorizable_load (vec_info *vinfo,
+       if (slp && !slp_perm)
+ 	continue;
+ 
+-      if (slp_perm)
++      /* Using the new load permutation to generate vector permute statements
++	 from a list of loads in DR_CHAIN.  */
++      if (slp && slp_perm && is_a <bb_vec_info> (vinfo)
++	  && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	  && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	{
++	  unsigned n_perms;
++	  stmt_vec_info stmt_info_ = SLP_TREE_SCALAR_STMTS (slp_node)[0];
++	  unsigned int old_size = DR_GROUP_SIZE (stmt_info);
++	  DR_GROUP_SIZE (stmt_info_) = new_group_size;
++	  vec<unsigned> old_load_permutation
++			  = SLP_TREE_LOAD_PERMUTATION (slp_node);
++	  SLP_TREE_LOAD_PERMUTATION (slp_node) = new_load_permutation;
++	  bool perm_load_success = vect_transform_slp_perm_load (
++				     vinfo, slp_node, dr_chain, gsi, vf,
++				     false, &n_perms);
++	  DR_GROUP_SIZE (stmt_info_) = old_size;
++	  SLP_TREE_LOAD_PERMUTATION (slp_node) = old_load_permutation;
++	  new_load_permutation.release ();
++	  if (!perm_load_success)
++	    {
++	      dr_chain.release ();
++	      return false;
++	    }
++	}
++      else if (slp_perm)
+         {
+ 	  unsigned n_perms;
+ 	  /* For SLP we know we've seen all possible uses of dr_chain so
+diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+index 642eb0aeb..e13bc6c99 100644
+--- a/gcc/tree-vectorizer.h
++++ b/gcc/tree-vectorizer.h
+@@ -412,6 +412,21 @@ public:
+   vec<ddr_p> ddrs;
+ };
+ 
++/* Information about offset in vectorizable_load.  */
++struct offset_info {
++  tree offset;
++  tree byte_offset;
++  tree dataref_offset;
++};
++
++/* Information about vectype in vectorizable_load.  */
++struct vectype_info {
++  tree vectype;
++  tree ltype;
++  tree lvectype;
++  tree ref_type;
++};
++
+ /* Vectorizer state common between loop and basic-block vectorization.  */
+ class vec_info {
+ public:
+@@ -455,6 +470,14 @@ public:
+      stmt in the chain.  */
+   auto_vec<stmt_vec_info> grouped_stores;
+ 
++  /* All interleaving chains of loads, represented by the first
++     stmt in the chain.  */
++  auto_vec<stmt_vec_info> grouped_loads;
++
++  /* All interleaving chains of stores (before transposed), represented by all
++     stmt in the chain.  */
++  auto_vec<vec<stmt_vec_info> > scalar_stores;
++
+   /* The set of vector modes used in the vectorized region.  */
+   mode_set used_vector_modes;
+ 
+@@ -899,6 +922,8 @@ public:
+ #define LOOP_VINFO_CHECK_NONZERO(L)        (L)->check_nonzero
+ #define LOOP_VINFO_LOWER_BOUNDS(L)         (L)->lower_bounds
+ #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
++#define LOOP_VINFO_GROUPED_LOADS(L)	    (L)->grouped_loads
++#define LOOP_VINFO_SCALAR_STORES(L)	    (L)->scalar_stores
+ #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
+ #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
+ #define LOOP_VINFO_REDUCTIONS(L)           (L)->reductions
+@@ -982,6 +1007,25 @@ public:
+   vec<basic_block> bbs;
+ 
+   vec<slp_root> roots;
++
++  /* True, if bb_vinfo can goto vect_analyze_slp.  */
++  bool before_slp;
++
++  /* True, if bb_vinfo is a transposed version.  */
++  bool transposed;
++
++  /* The number of transposed groups.  */
++  int transposed_group;
++
++  /* The cost of the scalar iterations.  */
++  int scalar_cost;
++
++  /* The cost of the vector prologue and epilogue, including peeled
++     iterations and set-up code.  */
++  int vec_outside_cost;
++
++  /* The cost of the vector loop body.  */
++  int vec_inside_cost;
+ } *bb_vec_info;
+ 
+ #define BB_VINFO_BB(B)               (B)->bb
+@@ -989,6 +1033,14 @@ public:
+ #define BB_VINFO_SLP_INSTANCES(B)    (B)->slp_instances
+ #define BB_VINFO_DATAREFS(B)         (B)->shared->datarefs
+ #define BB_VINFO_DDRS(B)             (B)->shared->ddrs
++#define BB_VINFO_GROUPED_LOADS(B)    (B)->grouped_loads
++#define BB_VINFO_SCALAR_STORES(B)    (B)->scalar_stores
++#define BB_VINFO_VEC_OUTSIDE_COST(B) (B)->vec_outside_cost
++#define BB_VINFO_VEC_INSIDE_COST(B)  (B)->vec_inside_cost
++#define BB_VINFO_SCALAR_COST(B)      (B)->scalar_cost
++#define BB_VINFO_SLP_TRANSPOSED(B)   (B)->transposed
++#define BB_VINFO_BEFORE_SLP(B)       (B)->before_slp
++#define BB_VINFO_TRANS_GROUPS(B)     (B)->transposed_group
+ 
+ /*-----------------------------------------------------------------*/
+ /* Info on vectorized defs.                                        */
+@@ -1219,6 +1271,17 @@ public:
+   stmt_vec_info next_element;
+   /* The size of the group.  */
+   unsigned int size;
++
++  /* The size of the group before transposed.  */
++  unsigned int size_before_transpose;
++
++  /* If true, the stmt_info is slp transposed.  */
++  bool slp_transpose;
++
++  /* Mark the group store number for rebuild interleaving chain
++     during transpose phase.  Value -1 represents unable to transpose.  */
++  int group_number;
++
+   /* For stores, number of stores from this group seen. We vectorize the last
+      one.  */
+   unsigned int store_count;
+@@ -1226,6 +1289,9 @@ public:
+      is 1.  */
+   unsigned int gap;
+ 
++  /* The gap before transposed.  */
++  unsigned int gap_before_transpose;
++
+   /* The minimum negative dependence distance this stmt participates in
+      or zero if none.  */
+   unsigned int min_neg_dist;
+@@ -1427,6 +1493,12 @@ struct gather_scatter_info {
+ #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
+ #define STMT_VINFO_SLP_VECT_ONLY_PATTERN(S) (S)->slp_vect_pattern_only_p
+ 
++#define DR_GROUP_SLP_TRANSPOSE(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->slp_transpose)
++#define DR_GROUP_SIZE_TRANS(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->size_before_transpose)
++#define DR_GROUP_NUMBER(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->group_number)
+ #define DR_GROUP_FIRST_ELEMENT(S) \
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
+ #define DR_GROUP_NEXT_ELEMENT(S) \
+@@ -1437,6 +1509,8 @@ struct gather_scatter_info {
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
+ #define DR_GROUP_GAP(S) \
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
++#define DR_GROUP_GAP_TRANS(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap_before_transpose)
+ 
+ #define REDUC_GROUP_FIRST_ELEMENT(S) \
+   (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element)
+@@ -2033,6 +2107,17 @@ vect_get_scalar_dr_size (dr_vec_info *dr_info)
+   return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
+ }
+ 
++/* Compare two unsigned int A and B.
++   Sorting them in ascending order.  */
++
++static inline int
++cmp_for_group_num (const void *a_, const void *b_)
++{
++  unsigned int a = *(unsigned int *)const_cast<void *>(a_);
++  unsigned int b = *(unsigned int *)const_cast<void *>(b_);
++  return a < b ? -1 : 1;
++}
++
+ /* Return true if LOOP_VINFO requires a runtime check for whether the
+    vector loop is profitable.  */
+ 
+@@ -2152,7 +2237,7 @@ record_stmt_cost (stmt_vector_for_cost *body_cost_vec, int count,
+ 
+ extern void vect_finish_replace_stmt (vec_info *, stmt_vec_info, gimple *);
+ extern void vect_finish_stmt_generation (vec_info *, stmt_vec_info, gimple *,
+-					 gimple_stmt_iterator *);
++					 gimple_stmt_iterator *,bool transpose=false);
+ extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
+ extern tree vect_get_store_rhs (stmt_vec_info);
+ void vect_get_vec_defs_for_operand (vec_info *vinfo, stmt_vec_info, unsigned,
+@@ -2168,7 +2253,7 @@ void vect_get_vec_defs (vec_info *, stmt_vec_info, slp_tree, unsigned,
+ 			tree = NULL, vec<tree> * = NULL, tree = NULL,
+ 			tree = NULL, vec<tree> * = NULL, tree = NULL);
+ extern tree vect_init_vector (vec_info *, stmt_vec_info, tree, tree,
+-                              gimple_stmt_iterator *);
++			      gimple_stmt_iterator *, bool transpose=false);
+ extern tree vect_get_slp_vect_def (slp_tree, unsigned);
+ extern bool vect_transform_stmt (vec_info *, stmt_vec_info,
+ 				 gimple_stmt_iterator *,
+@@ -2235,6 +2320,9 @@ extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
+ extern void vect_permute_store_chain (vec_info *, vec<tree> &,
+ 				      unsigned int, stmt_vec_info,
+ 				      gimple_stmt_iterator *, vec<tree> *);
++extern void vect_transpose_store_chain (vec_info *, vec<tree>, unsigned int,
++					unsigned int, stmt_vec_info,
++					gimple_stmt_iterator *, vec<tree> *);
+ extern tree vect_setup_realignment (vec_info *,
+ 				    stmt_vec_info, gimple_stmt_iterator *,
+ 				    tree *, enum dr_alignment_support, tree,
+@@ -2262,7 +2350,8 @@ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
+ 				  enum tree_code);
+ extern bool needs_fold_left_reduction_p (tree, code_helper);
+ /* Drive for loop analysis stage.  */
+-extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *);
++extern opt_loop_vec_info vect_analyze_loop (class loop *, vec_info_shared *,
++					    bool result_only_p = false);
+ extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
+ extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
+ 					 tree *, bool);
+@@ -2331,6 +2420,7 @@ extern bool vect_transform_slp_perm_load (vec_info *, slp_tree, const vec<tree>
+ 					  gimple_stmt_iterator *, poly_uint64,
+ 					  bool, unsigned *,
+ 					  unsigned * = nullptr, bool = false);
++extern void vect_transform_back_slp_grouped_stores (bb_vec_info, stmt_vec_info);
+ extern bool vect_slp_analyze_operations (vec_info *);
+ extern void vect_schedule_slp (vec_info *, const vec<slp_instance> &);
+ extern opt_result vect_analyze_slp (vec_info *, unsigned);
+-- 
+2.33.0
+
diff --git a/0100-Add-hip09-machine-discribtion.patch b/0100-Add-hip09-machine-discribtion.patch
new file mode 100644
index 0000000000000000000000000000000000000000..c2221d937ecabd58db19b1e0f7782a57a83ac638
--- /dev/null
+++ b/0100-Add-hip09-machine-discribtion.patch
@@ -0,0 +1,882 @@
+From d9131757175667d35e74d9ee84689039990af768 Mon Sep 17 00:00:00 2001
+From: xingyushuai <xingyushuai@huawei.com>
+Date: Fri, 3 Mar 2023 09:31:04 +0800
+Subject: [PATCH 001/157] Add hip09 machine discribtion
+
+Here is the patch introducing hip09 machine model
+for the scheduler.
+---
+ gcc/config/aarch64/aarch64-cores.def     |   1 +
+ gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++
+ gcc/config/aarch64/aarch64-tune.md       |   2 +-
+ gcc/config/aarch64/aarch64.cc            | 109 +++++
+ gcc/config/aarch64/aarch64.md            |   1 +
+ gcc/config/aarch64/hip09.md              | 558 +++++++++++++++++++++++
+ 6 files changed, 774 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/config/aarch64/hip09.md
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 70b11eb80..a854bdb24 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
+ 
+ /* HiSilicon ('H') cores. */
+ AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
++AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
+ 
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
+index 48522606f..fc5a3cbe4 100644
+--- a/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -668,6 +668,110 @@ const struct cpu_cost_table a64fx_extra_costs =
+   }
+ };
+ 
++const struct cpu_cost_table hip09_extra_costs =
++{
++  /* ALU */
++  {
++    0,                 /* arith.  */
++    0,                 /* logical.  */
++    0,                 /* shift.  */
++    0,                 /* shift_reg.  */
++    COSTS_N_INSNS (1), /* arith_shift.  */
++    COSTS_N_INSNS (1), /* arith_shift_reg.  */
++    COSTS_N_INSNS (1), /* log_shift.  */
++    COSTS_N_INSNS (1), /* log_shift_reg.  */
++    0,                 /* extend.  */
++    COSTS_N_INSNS (1), /* extend_arith.  */
++    0,                 /* bfi.  */
++    0,                 /* bfx.  */
++    0,                 /* clz.  */
++    0,                 /* rev.  */
++    0,                 /* non_exec.  */
++    true               /* non_exec_costs_exec.  */
++  },
++
++  {
++    /* MULT SImode */
++    {
++      COSTS_N_INSNS (2),       /* simple.  */
++      COSTS_N_INSNS (2),       /* flag_setting.  */
++      COSTS_N_INSNS (2),       /* extend.  */
++      COSTS_N_INSNS (2),       /* add.  */
++      COSTS_N_INSNS (2),       /* extend_add.  */
++      COSTS_N_INSNS (11)       /* idiv.  */
++    },
++        /* MULT DImode */
++    {
++      COSTS_N_INSNS (3),       /* simple.  */
++      0,                       /* flag_setting (N/A).  */
++      COSTS_N_INSNS (3),       /* extend.  */
++      COSTS_N_INSNS (3),       /* add.  */
++      COSTS_N_INSNS (3),       /* extend_add.  */
++      COSTS_N_INSNS (19)       /* idiv.  */
++    }
++  },
++  /* LD/ST */
++  {
++    COSTS_N_INSNS (3),         /* load.  */
++    COSTS_N_INSNS (4),         /* load_sign_extend.  */
++    COSTS_N_INSNS (3),         /* ldrd.  */
++    COSTS_N_INSNS (3),         /* ldm_1st.  */
++    1,                         /* ldm_regs_per_insn_1st.  */
++    2,                         /* ldm_regs_per_insn_subsequent.  */
++    COSTS_N_INSNS (4),         /* loadf.  */
++    COSTS_N_INSNS (4),         /* loadd.  */
++    COSTS_N_INSNS (4),         /* load_unaligned.  */
++    0,                         /* store.  */
++    0,                         /* strd.  */
++    0,                         /* stm_1st.  */
++    1,                         /* stm_regs_per_insn_1st.  */
++    2,                         /* stm_regs_per_insn_subsequent.  */
++    0,                         /* storef.  */
++    0,                         /* stored.  */
++    COSTS_N_INSNS (1),         /* store_unaligned.  */
++    COSTS_N_INSNS (4),         /* loadv.  */
++    COSTS_N_INSNS (4)          /* storev.  */
++  },
++  {
++    /* FP SFmode */
++    {
++      COSTS_N_INSNS (10),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (4),       /* mult_addsub.  */
++      COSTS_N_INSNS (4),       /* fma.  */
++      COSTS_N_INSNS (4),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    },
++    /* FP DFmode */
++    {
++      COSTS_N_INSNS (17),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (6),       /* mult_addsub.  */
++      COSTS_N_INSNS (6),       /* fma.  */
++      COSTS_N_INSNS (3),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    }
++  },
++  /* Vector */
++  {
++    COSTS_N_INSNS (1)  /* alu.  */
++  }
++};
++
+ const struct cpu_cost_table ampere1_extra_costs =
+ {
+   /* ALU */
+diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
+index 9dc9adc70..238bb6e31 100644
+--- a/gcc/config/aarch64/aarch64-tune.md
++++ b/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
++	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
+ 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 5537a537c..e9b3980c4 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -465,6 +465,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table =
+   0, /* imm_offset  */
+ };
+ 
++static const struct cpu_addrcost_table hip09_addrcost_table =
++{
++    {
++        1, /* hi  */
++        0, /* si  */
++        0, /* di  */
++        1, /* ti  */
++    },
++  0, /* pre_modify  */
++  0, /* post_modify  */
++  0, /* register_offset  */
++  1, /* register_sextend  */
++  1, /* register_zextend  */
++  0, /* imm_offset  */
++};
++
+ static const struct cpu_addrcost_table qdf24xx_addrcost_table =
+ {
+     {
+@@ -660,6 +676,16 @@ static const struct cpu_regmove_cost a64fx_regmove_cost =
+   2 /* FP2FP  */
+ };
+ 
++static const struct cpu_regmove_cost hip09_regmove_cost =
++{
++  1, /* GP2GP  */
++  /* Avoid the use of slow int<->fp moves for spilling by setting
++     their cost higher than memmov_cost.  */
++  2, /* GP2FP  */
++  3, /* FP2GP  */
++  2  /* FP2FP  */
++};
++
+ static const struct cpu_regmove_cost neoversen2_regmove_cost =
+ {
+   1, /* GP2GP  */
+@@ -947,6 +973,43 @@ static const struct cpu_vector_cost tsv110_vector_cost =
+   nullptr /* issue_info  */
+ };
+ 
++static const advsimd_vec_cost hip09_advsimd_vector_cost =
++{
++  2, /* int_stmt_cost  */
++  2, /* fp_stmt_cost  */
++  0, /* ld2_st2_permute_cost  */
++  0, /* ld3_st3_permute_cost  */
++  0, /* ld4_st4_permute_cost  */
++  2, /* permute_cost  */
++  3, /* reduc_i8_cost  */
++  3, /* reduc_i16_cost  */
++  3, /* reduc_i32_cost  */
++  3, /* reduc_i64_cost  */
++  3, /* reduc_f16_cost  */
++  3, /* reduc_f32_cost  */
++  3, /* reduc_f64_cost  */
++  3, /* store_elt_extra_cost  */
++  3, /* vec_to_scalar_cost  */
++  2, /* scalar_to_vec_cost  */
++  5, /* align_load_cost  */
++  5, /* unalign_load_cost  */
++  1, /* unalign_store_cost  */
++  1  /* store_cost  */
++};
++
++static const struct cpu_vector_cost hip09_vector_cost =
++{
++  1, /* scalar_int_stmt_cost  */
++  1, /* scalar_fp_stmt_cost  */
++  5, /* scalar_load_cost  */
++  1, /* scalar_store_cost  */
++  1, /* cond_taken_branch_cost  */
++  1, /* cond_not_taken_branch_cost  */
++  &hip09_advsimd_vector_cost, /* advsimd  */
++  nullptr, /* sve  */
++  nullptr /* issue_info  */
++};
++
+ static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
+ {
+   2, /* int_stmt_cost  */
+@@ -1293,6 +1356,18 @@ static const cpu_prefetch_tune tsv110_prefetch_tune =
+   -1                    /* default_opt_level  */
+ };
+ 
++
++static const cpu_prefetch_tune hip09_prefetch_tune =
++{
++  0,                    /* num_slots  */
++  64,                   /* l1_cache_size  */
++  64,                   /* l1_cache_line_size  */
++  512,                  /* l2_cache_size  */
++  true,                 /* prefetch_dynamic_strides */
++  -1,                   /* minimum_stride */
++  -1                    /* default_opt_level  */
++};
++
+ static const cpu_prefetch_tune xgene1_prefetch_tune =
+ {
+   8,			/* num_slots  */
+@@ -1658,6 +1733,40 @@ static const struct tune_params tsv110_tunings =
+   &tsv110_prefetch_tune
+ };
+ 
++static const struct tune_params hip09_tunings =
++{
++  &hip09_extra_costs,
++  &hip09_addrcost_table,
++  &hip09_regmove_cost,
++  &hip09_vector_cost,
++  &generic_branch_cost,
++  &generic_approx_modes,
++  SVE_256, /* sve_width  */
++  { 4, /* load_int.  */
++    4, /* store_int.  */
++    4, /* load_fp.  */
++    4, /* store_fp.  */
++    4, /* load_pred.  */
++    4 /* store_pred.  */
++  }, /* memmov_cost.  */
++  4,    /* issue_rate  */
++  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
++   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
++  "16", /* function_align.  */
++  "4",  /* jump_align.  */
++  "8",  /* loop_align.  */
++  2,    /* int_reassoc_width.  */
++  4,    /* fp_reassoc_width.  */
++  1,    /* vec_reassoc_width.  */
++  2,    /* min_div_recip_mul_sf.  */
++  2,    /* min_div_recip_mul_df.  */
++  0,    /* max_case_values.  */
++  tune_params::AUTOPREFETCHER_WEAK,     /* autoprefetcher_model.  */
++  (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
++   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),     /* tune_flags.  */
++  &hip09_prefetch_tune
++};
++
+ static const struct tune_params xgene1_tunings =
+ {
+   &xgene1_extra_costs,
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index d24c8afcf..cf699e4c7 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -477,6 +477,7 @@
+ (include "thunderx2t99.md")
+ (include "tsv110.md")
+ (include "thunderx3t110.md")
++(include "hip09.md")
+ 
+ ;; -------------------------------------------------------------------
+ ;; Jumps and other miscellaneous insns
+diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md
+new file mode 100644
+index 000000000..25428de9a
+--- /dev/null
++++ b/gcc/config/aarch64/hip09.md
+@@ -0,0 +1,558 @@
++;; hip09 pipeline description
++;; Copyright (C) 2023 Free Software Foundation, Inc.
++;;
++;;Contributed by Yushuai Xing
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but
++;; WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++;; General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_automaton "hip09")
++(define_automaton "hip09_ldst")
++(define_automaton "hip09_fsu")
++
++(define_attr "hip09_type"
++  "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla,
++   hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt,
++   hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes,
++   hip09_neon_load2and4, hip09_neon_load3_3reg,
++   hip09_neon_load4_4reg, hip09_neon_store1and2,
++   hip09_neon_store1_1reg, hip09_neon_store1_2reg,
++   hip09_neon_store1_3reg, hip09_neon_store1_4reg,
++   hip09_neon_store3and4_lane, hip09_neon_store3_3reg,
++   hip09_neon_store4_4reg, unknown"
++  (cond [
++         (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\
++                  neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\
++                  neon_sub_widen,neon_qadd,neon_qadd_q,\
++                  neon_add_long,neon_sub_long,\
++                  neon_qabs,neon_qabs_q,neon_qneg,\
++                  neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\
++                  neon_compare_q,neon_compare_zero,\
++                  neon_compare_zero_q,neon_logic,neon_logic_q,\
++                  neon_minmax,neon_minmax_q,neon_tst,\
++                  neon_tst_q,neon_bsl,neon_bsl_q,\
++                  neon_cls,neon_cls_q,neon_ext,\
++                  neon_ext_q,neon_rev,neon_rev_q,\
++                  neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\
++                  neon_fp_abs_s_q,neon_fp_abs_d,\
++                  neon_fp_neg_s,neon_fp_neg_s_q,\
++                  neon_fp_neg_d,neon_fp_neg_d_q,\
++                  neon_shift_imm_narrow_q,neon_move,neon_move_q")
++           (const_string "hip09_neon_abs")
++         (eq_attr "type" "neon_abd,neon_abd_q,\
++                  neon_arith_acc,neon_arith_acc_q,\
++                  neon_add_halve,neon_add_halve_q,\
++                  neon_sub_halve,neon_sub_halve_q,\
++                  neon_add_halve_narrow_q,\
++                  neon_sub_halve_narrow_q,neon_reduc_add,\
++                  neon_reduc_add_q,\
++                  neon_sat_mul_b,neon_sat_mul_b_q,\
++                  neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\
++                  neon_mul_b_long,neon_mla_b,neon_mla_b_q,\
++                  neon_mla_b_long,neon_sat_mla_b_long,\
++                  neon_sat_shift_imm,\
++                  neon_sat_shift_imm_q,neon_shift_imm_long,\
++                  neon_shift_imm,neon_shift_imm_q,neon_cnt,\
++                  neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\
++                  neon_fp_recpe_d,neon_fp_recpe_d_q,\
++                  neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
++                  neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\
++                  neon_fp_recpx_s,neon_fp_recpx_s_q,\
++                  neon_fp_recpx_d,neon_fp_recpx_d_q,\
++                  neon_tbl2,neon_tbl2_q,neon_to_gp,\
++                  neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\
++                  neon_fp_abd_d,neon_fp_abd_d_q,\
++                  neon_fp_addsub_s,neon_fp_addsub_s_q,\
++                  neon_fp_addsub_d,neon_fp_addsub_d_q,\
++                  neon_fp_compare_s,neon_fp_compare_s_q,\
++                  neon_fp_compare_d,neon_fp_compare_d_q,\
++                  neon_fp_cvt_widen_s,neon_fp_to_int_s,\
++                  neon_fp_to_int_s_q,neon_fp_to_int_d,\
++                  neon_fp_to_int_d_q,neon_fp_minmax_s,\
++                  neon_fp_minmax_s_q,neon_fp_minmax_d,\
++                  neon_fp_minmax_d_q,neon_fp_round_s,\
++                  neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\
++                  neon_fp_round_d,neon_fp_round_d_q,\
++                  neon_fp_cvt_narrow_s_q")
++           (const_string "hip09_neon_fp_arith")
++         (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\
++                  neon_sat_mul_s,neon_sat_mul_s_q,\
++                  neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\
++                  neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\
++                  neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\
++                  neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\
++                  neon_mul_s,neon_mul_s_q,neon_mul_h_long,\
++                  neon_mul_s_long,neon_mul_h_scalar_long,\
++                  neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\
++                  neon_mla_s,neon_mla_h_scalar,\
++                  neon_mla_h_scalar_q,neon_mla_s_scalar,\
++                  neon_mla_h_long,\
++                  neon_mla_s_long,neon_sat_mla_h_long,\
++                  neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\
++                  neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\
++                  neon_mla_h_scalar_long,neon_mla_s_scalar_q,\
++                  neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\
++                  neon_shift_reg_q,neon_sat_shift_reg,\
++                  neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\
++                  neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\
++                  neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\
++                  neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\
++                  neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\
++                  neon_fp_reduc_minmax_d_q,\
++                  neon_fp_mul_s_q,\
++                  neon_fp_mul_d,neon_fp_mul_d_q,\
++                  neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\
++                  neon_fp_mul_s_scalar_q")
++           (const_string "hip09_neon_mul")
++         (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\
++                  neon_reduc_minmax_q,neon_fp_recps_s,\
++                  neon_fp_recps_s_q,neon_fp_recps_d,\
++                  neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\
++                  neon_fp_mla_s,\
++                  neon_fp_mla_d,neon_fp_mla_d_q,\
++                  neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
++                  neon_fp_mla_d_scalar_q")
++           (const_string "hip09_neon_mla")
++         (eq_attr "type" "neon_dot,neon_dot_q")
++           (const_string "hip09_neon_dot")
++         (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
++                   neon_fp_div_d,neon_fp_div_d_q")
++           (const_string "hip09_neon_fp_div")
++         (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
++                   neon_fp_sqrt_d,neon_fp_sqrt_d_q")
++           (const_string "hip09_neon_fp_sqrt")
++         (eq_attr "type" "neon_dup,neon_dup_q,\
++                   neon_ins,neon_ins_q")
++           (const_string "hip09_neon_ins")
++         (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
++                   neon_load1_2reg,neon_load1_2reg_q,\
++                   neon_load1_3reg,neon_load1_3reg_q,\
++                   neon_load1_4reg,neon_load1_4reg_q")
++           (const_string "hip09_neon_load1")
++         (eq_attr "type" "neon_load1_one_lane,\
++                   neon_load1_one_lane_q,\
++                   neon_load1_all_lanes,neon_load1_all_lanes_q")
++           (const_string "hip09_neon_load1_lanes")
++         (eq_attr "type" "neon_load2_all_lanes,\
++                   neon_load2_all_lanes_q,\
++                   neon_load2_one_lane,neon_load2_2reg,\
++                   neon_load2_2reg_q,neon_load3_one_lane,\
++                   neon_load3_all_lanes,neon_load3_all_lanes_q,\
++                   neon_load4_one_lane,neon_load4_all_lanes,\
++                   neon_load4_all_lanes_q")
++           (const_string "hip09_neon_load2and4")
++         (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")
++           (const_string "hip09_neon_load3_3reg")
++         (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")
++           (const_string "hip09_neon_load4_4reg")
++         (eq_attr "type" "neon_store1_one_lane,\
++                   neon_store1_one_lane_q,neon_store2_one_lane,\
++                   neon_store2_one_lane_q,neon_store2_2reg,\
++                   neon_store2_2reg_q")
++           (const_string "hip09_neon_store1and2")
++         (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")
++           (const_string "hip09_neon_store1_1reg")
++         (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")
++           (const_string "hip09_neon_store1_2reg")
++         (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")
++           (const_string "hip09_neon_store1_3reg")
++         (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q")
++           (const_string "hip09_neon_store1_4reg")
++         (eq_attr "type" "neon_store3_one_lane,\
++                   neon_store3_one_lane_q,neon_store4_one_lane,\
++                   neon_store4_one_lane_q")
++           (const_string "hip09_neon_store3and4_lane")
++         (eq_attr "type" "neon_store3_3reg,\
++                  neon_store3_3reg_q")
++           (const_string "hip09_neon_store3_3reg")
++         (eq_attr "type" "neon_store4_4reg,\
++                   neon_store4_4reg_q")
++           (const_string "hip09_neon_store4_4reg")]
++  (const_string "unknown")))
++
++; The hip09 core is modelled as issues pipeline that has
++; the following functional units.
++; 1.  Two pipelines for branch micro operations: BRU1, BRU2
++
++(define_cpu_unit "hip09_bru0" "hip09")
++(define_cpu_unit "hip09_bru1" "hip09")
++
++(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1")
++
++; 2.  Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4
++
++(define_cpu_unit "hip09_alus0" "hip09")
++(define_cpu_unit "hip09_alus1" "hip09")
++(define_cpu_unit "hip09_alus2" "hip09")
++(define_cpu_unit "hip09_alus3" "hip09")
++
++(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3")
++(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1")
++(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3")
++
++; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2
++
++(define_cpu_unit "hip09_alum0" "hip09")
++(define_cpu_unit "hip09_alum1" "hip09")
++
++(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1")
++
++; 4. Two pipelines for load micro opetations: Load1, Load2
++
++(define_cpu_unit "hip09_load0" "hip09_ldst")
++(define_cpu_unit "hip09_load1" "hip09_ldst")
++
++(define_reservation "hip09_ld01" "hip09_load0|hip09_load1")
++
++; 5. Two pipelines for store micro operations: Store1, Store2
++
++(define_cpu_unit "hip09_store0" "hip09_ldst")
++(define_cpu_unit "hip09_store1" "hip09_ldst")
++
++(define_reservation "hip09_st01" "hip09_store0|hip09_store1")
++
++; 6. Two pipelines for store data micro operations: STD0,STD1
++
++(define_cpu_unit "hip09_store_data0" "hip09_ldst")
++(define_cpu_unit "hip09_store_data1" "hip09_ldst")
++
++(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1")
++
++; 7.  Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4
++
++(define_cpu_unit "hip09_fsu0" "hip09_fsu")
++(define_cpu_unit "hip09_fsu1" "hip09_fsu")
++(define_cpu_unit "hip09_fsu2" "hip09_fsu")
++(define_cpu_unit "hip09_fsu3" "hip09_fsu")
++
++(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3")
++(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2")
++
++
++; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2
++
++;; Simple Execution Unit:
++;
++;; Simple ALU without shift
++(define_insn_reservation "hip09_alu" 1
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "alu_imm,logic_imm,\
++            adc_imm,adc_reg,\
++            alu_sreg,logic_reg,\
++            mov_imm,mov_reg,\
++            csel,rotate_imm,bfm,mov_imm,\
++            clz,rbit,rev"))
++  "hip09_alus0123")
++
++(define_insn_reservation "hip09_alus" 1
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "alus_sreg,alus_imm,\
++            adcs_reg,adcs_imm,\
++            logics_imm,logics_reg,adr"))
++  "hip09_alus23")
++
++;; ALU ops with shift and extend
++(define_insn_reservation "hip09_alu_ext_shift" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "alu_ext,alus_ext,\
++        logics_shift_imm,logics_shift_reg,\
++        logic_shift_reg,logic_shift_imm,\
++        "))
++  "hip09_alum01")
++
++;; Multiplies instructions
++(define_insn_reservation "hip09_mult" 3
++  (and (eq_attr "tune" "hip09")
++       (ior (eq_attr "mul32" "yes")
++       (eq_attr "widen_mul64" "yes")))
++  "hip09_alum01")
++
++;; Integer divide
++(define_insn_reservation "hip09_div" 10
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "udiv,sdiv"))
++  "hip09_alum0")
++
++;; Branch execution Unit
++;
++; Branches take two issue slot.
++; No latency as there is no result
++(define_insn_reservation "hip09_branch" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "branch,call"))
++  "hip09_bru01 + hip09_alus23")
++
++;; Load execution Unit
++;
++; Loads of up to two words.
++(define_insn_reservation "hip09_load1" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "load_4,load_8"))
++  "hip09_ld01")
++
++; Stores of up to two words.
++(define_insn_reservation "hip09_store1" 1
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "store_4,store_8"))
++  "hip09_st01")
++
++;; FP data processing instructions.
++
++(define_insn_reservation "hip09_fp_arith" 1
++   (and (eq_attr "tune" "hip09")
++        (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\
++         f_mrc"))
++   "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_cmp" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fcmps,fcmpd"))
++  "hip09_fsu0123+hip09_alus23")
++
++(define_insn_reservation "hip09_fp_ccmp" 7
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fccmps,fccmpd"))
++  "hip09_alus01+hip09_fsu0123+hip09_alus23")
++
++(define_insn_reservation "hip09_fp_csel" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fcsel,f_mcr"))
++  "hip09_alus01+hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_divs" 7
++  (and (eq_attr "tune" "hip09")
++  (eq_attr "type" "fdivs"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_divd" 10
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fdivd"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_sqrts" 9
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fsqrts"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_sqrtd" 15
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fsqrtd"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_mul" 3
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fmuls,fmuld"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_add" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\
++       f_rints,f_rintd"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_fp_mac" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "fmacs,fmacd"))
++  "hip09_fsu0123")
++
++;; FP miscellaneous instructions.
++
++(define_insn_reservation "hip09_fp_cvt" 5
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "f_cvtf2i"))
++  "hip09_fsu0123+hip09_alus23")
++
++(define_insn_reservation "hip09_fp_cvt2" 5
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "f_cvti2f"))
++  "hip09_alus01+hip09_fsu0123")
++
++;; FP Load Instructions 
++
++(define_insn_reservation "hip09_fp_load" 7
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "f_loads,f_loadd"))
++  "hip09_ld01")
++
++(define_insn_reservation "hip09_fp_load2" 6
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "neon_ldp_q,neon_ldp"))
++  "hip09_ld01+hip09_alus01")
++
++;; FP store instructions
++
++(define_insn_reservation "hip09_fp_store" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "f_stores,f_stored"))
++  "hip09_st01+hip09_std01")
++
++;; ASIMD integer instructions
++
++(define_insn_reservation "hip09_asimd_base1" 1
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_abs"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_base2" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_fp_arith"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_base3" 3
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_mul"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_base4" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_mla"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_base5" 5
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "neon_fp_mul_s"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_dot" 6
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_dot"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_bfmmla" 9
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "neon_fp_mla_s_q"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_fdiv" 15
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_fp_div"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_fsqrt" 25
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_fp_sqrt"))
++  "hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_pmull" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "crypto_pmull"))
++  "hip09_fsu2")
++
++(define_insn_reservation "hip09_asimd_dup" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_ins"))
++  "hip09_alus01+hip09_fsu0123")
++
++;; ASIMD load instructions
++
++(define_insn_reservation "hip09_asimd_ld1_reg" 6
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_load1"))
++  "hip09_ld01")
++
++(define_insn_reservation "hip09_asimd_ld1_lane" 7
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_load1_lanes"))
++  "hip09_ld01+hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_ld23" 8
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_load2and4"))
++"hip09_ld01+hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_ld3_mtp" 9
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_load3_3reg"))
++  "hip09_ld01+hip09_fsu0123")
++
++(define_insn_reservation "hip09_asimd_ld4_mtp" 13
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_load4_4reg"))
++  "hip09_ld01+hip09_fsu0123")
++
++;; ASIMD store instructions
++
++(define_insn_reservation "hip09_asimd_st12" 1
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store1and2"))
++  "hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st1_1reg" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store1_1reg"))
++  "hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st1_2reg" 3
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store1_2reg"))
++  "hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st1_3reg" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store1_3reg"))
++  "hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st1_4reg" 5
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store1_4reg"))
++  "hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st34_lane" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store3and4_lane"))
++  "hip09_fsu0123+hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st3_mtp" 7
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store3_3reg"))
++  "hip09_fsu0123+hip09_st01+hip09_std01")
++
++(define_insn_reservation "hip09_asimd_st4_mtp" 10
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "hip09_type" "hip09_neon_store4_4reg"))
++  "hip09_fsu0123+hip09_st01+hip09_std01")
++
++;; Cryptography extensions
++
++(define_insn_reservation "hip09_asimd_aes" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "crypto_aese,crypto_aesmc"))
++  "hip09_fsu02")
++
++(define_insn_reservation "hip09_asimd_sha3" 1
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "crypto_sha3"))
++  "hip09_fsu2")
++
++(define_insn_reservation "hip09_asimd_sha1" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\
++       crypto_sha256_fast,crypto_sha512,\
++       crypto_sm3"))
++  "hip09_fsu2")
++
++(define_insn_reservation "hip09_asimd_sha1_and256" 4
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\
++       crypto_sm4"))
++  "hip09_fsu2")
++
++;; CRC extension.
++
++(define_insn_reservation "hip09_crc" 2
++  (and (eq_attr "tune" "hip09")
++       (eq_attr "type" "crc"))
++  "hip09_alum01")
+-- 
+2.33.0
+
diff --git a/0101-Add-hip11-CPU-pipeline-scheduling.patch b/0101-Add-hip11-CPU-pipeline-scheduling.patch
new file mode 100644
index 0000000000000000000000000000000000000000..7b89890555fa6f0ccb95e7d657c0ba87ebb745d2
--- /dev/null
+++ b/0101-Add-hip11-CPU-pipeline-scheduling.patch
@@ -0,0 +1,755 @@
+From 824fccdab1d3c5e87fb88b31f0eeb7abd1b35c1f Mon Sep 17 00:00:00 2001
+From: XingYuShuai <1150775134@qq.com>
+Date: Mon, 26 Feb 2024 20:34:06 +0800
+Subject: [PATCH 002/157] Add hip11 CPU pipeline scheduling
+
+This patch adds an mcpu: hip11. It has been tested on aarch64
+and no regressions from this patch.
+---
+ gcc/config/aarch64/aarch64-cores.def     |   1 +
+ gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++
+ gcc/config/aarch64/aarch64-tune.md       |   2 +-
+ gcc/config/aarch64/aarch64.cc            | 108 ++++++
+ gcc/config/aarch64/aarch64.md            |   1 +
+ gcc/config/aarch64/hip11.md              | 418 +++++++++++++++++++++++
+ gcc/doc/invoke.texi                      |   2 +-
+ 7 files changed, 634 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/config/aarch64/hip11.md
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index a854bdb24..601b72abb 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -173,6 +173,7 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 |
+ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+ 
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
++AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
+ 
+ AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+ AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
+index fc5a3cbe4..0ee427b61 100644
+--- a/gcc/config/aarch64/aarch64-cost-tables.h
++++ b/gcc/config/aarch64/aarch64-cost-tables.h
+@@ -561,6 +561,110 @@ const struct cpu_cost_table tsv110_extra_costs =
+   }
+ };
+ 
++const struct cpu_cost_table hip11_extra_costs =
++{
++  /* ALU */
++  {
++    0,                 /* arith.  */
++    0,                 /* logical.  */
++    0,                 /* shift.  */
++    0,                 /* shift_reg.  */
++    COSTS_N_INSNS (1), /* arith_shift.  */
++    COSTS_N_INSNS (1), /* arith_shift_reg.  */
++    COSTS_N_INSNS (1), /* log_shift.  */
++    COSTS_N_INSNS (1), /* log_shift_reg.  */
++    0,                 /* extend.  */
++    COSTS_N_INSNS (1), /* extend_arith.  */
++    0,                 /* bfi.  */
++    0,                 /* bfx.  */
++    0,                 /* clz.  */
++    0,                 /* rev.  */
++    0,                 /* non_exec.  */
++    true               /* non_exec_costs_exec.  */
++  },
++
++  {
++    /* MULT SImode */
++    {
++      COSTS_N_INSNS (2),       /* simple.  */
++      COSTS_N_INSNS (2),       /* flag_setting.  */
++      COSTS_N_INSNS (2),       /* extend.  */
++      COSTS_N_INSNS (2),       /* add.  */
++      COSTS_N_INSNS (2),       /* extend_add.  */
++      COSTS_N_INSNS (11)       /* idiv.  */
++    },
++    /* MULT DImode */
++    {
++      COSTS_N_INSNS (3),       /* simple.  */
++      0,                       /* flag_setting (N/A).  */
++      COSTS_N_INSNS (3),       /* extend.  */
++      COSTS_N_INSNS (3),       /* add.  */
++      COSTS_N_INSNS (3),       /* extend_add.  */
++      COSTS_N_INSNS (19)       /* idiv.  */
++    }
++  },
++  /* LD/ST */
++  {
++    COSTS_N_INSNS (3),         /* load.  */
++    COSTS_N_INSNS (4),         /* load_sign_extend.  */
++    COSTS_N_INSNS (3),         /* ldrd.  */
++    COSTS_N_INSNS (3),         /* ldm_1st.  */
++    1,                         /* ldm_regs_per_insn_1st.  */
++    2,                         /* ldm_regs_per_insn_subsequent.  */
++    COSTS_N_INSNS (4),         /* loadf.  */
++    COSTS_N_INSNS (4),         /* loadd.  */
++    COSTS_N_INSNS (4),         /* load_unaligned.  */
++    0,                         /* store.  */
++    0,                         /* strd.  */
++    0,                         /* stm_1st.  */
++    1,                         /* stm_regs_per_insn_1st.  */
++    2,                         /* stm_regs_per_insn_subsequent.  */
++    0,                         /* storef.  */
++    0,                         /* stored.  */
++    COSTS_N_INSNS (1),         /* store_unaligned.  */
++    COSTS_N_INSNS (4),         /* loadv.  */
++    COSTS_N_INSNS (4)          /* storev.  */
++  },
++  {
++    /* FP SFmode */
++    {
++      COSTS_N_INSNS (10),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (4),       /* mult_addsub.  */
++      COSTS_N_INSNS (4),       /* fma.  */
++      COSTS_N_INSNS (4),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    },
++    /* FP DFmode */
++    {
++      COSTS_N_INSNS (17),      /* div.  */
++      COSTS_N_INSNS (4),       /* mult.  */
++      COSTS_N_INSNS (6),       /* mult_addsub.  */
++      COSTS_N_INSNS (6),       /* fma.  */
++      COSTS_N_INSNS (3),       /* addsub.  */
++      COSTS_N_INSNS (1),       /* fpconst.  */
++      COSTS_N_INSNS (1),       /* neg.  */
++      COSTS_N_INSNS (1),       /* compare.  */
++      COSTS_N_INSNS (2),       /* widen.  */
++      COSTS_N_INSNS (2),       /* narrow.  */
++      COSTS_N_INSNS (2),       /* toint.  */
++      COSTS_N_INSNS (1),       /* fromint.  */
++      COSTS_N_INSNS (2)        /* roundint.  */
++    }
++  },
++  /* Vector */
++  {
++    COSTS_N_INSNS (1)  /* alu.  */
++  }
++};
++
+ const struct cpu_cost_table a64fx_extra_costs =
+ {
+   /* ALU */
+diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
+index 238bb6e31..511422081 100644
+--- a/gcc/config/aarch64/aarch64-tune.md
++++ b/gcc/config/aarch64/aarch64-tune.md
+@@ -1,5 +1,5 @@
+ ;; -*- buffer-read-only: t -*-
+ ;; Generated automatically by gentune.sh from aarch64-cores.def
+ (define_attr "tune"
+-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,demeter,neoversev2"
++	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,neoverse512tvb,saphira,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa710,cortexx2,neoversen2,hip11,demeter,neoversev2"
+ 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index e9b3980c4..7c62ddb2a 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -481,6 +481,22 @@ static const struct cpu_addrcost_table hip09_addrcost_table =
+   0, /* imm_offset  */
+ };
+ 
++static const struct cpu_addrcost_table hip11_addrcost_table =
++{
++    {
++      1, /* hi  */
++      0, /* si  */
++      0, /* di  */
++      1, /* ti  */
++    },
++  0, /* pre_modify  */
++  0, /* post_modify  */
++  0, /* register_offset  */
++  1, /* register_sextend  */
++  1, /* register_zextend  */
++  0, /* imm_offset  */
++};
++
+ static const struct cpu_addrcost_table qdf24xx_addrcost_table =
+ {
+     {
+@@ -666,6 +682,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
+   2  /* FP2FP  */
+ };
+ 
++static const struct cpu_regmove_cost hip11_regmove_cost =
++{
++  1, /* GP2GP  */
++  /* Avoid the use of slow int<->fp moves for spilling by setting
++     their cost higher than memmov_cost.  */
++  2, /* GP2FP  */
++  3, /* FP2GP  */
++  2  /* FP2FP  */
++};
++
+ static const struct cpu_regmove_cost a64fx_regmove_cost =
+ {
+   1, /* GP2GP  */
+@@ -1010,6 +1036,43 @@ static const struct cpu_vector_cost hip09_vector_cost =
+   nullptr /* issue_info  */
+ };
+ 
++static const advsimd_vec_cost hip11_advsimd_vector_cost =
++{
++  2, /* int_stmt_cost  */
++  2, /* fp_stmt_cost  */
++  0, /* ld2_st2_permute_cost  */
++  0, /* ld3_st3_permute_cost  */
++  0, /* ld4_st4_permute_cost  */
++  2, /* permute_cost  */
++  3, /* reduc_i8_cost  */
++  3, /* reduc_i16_cost  */
++  3, /* reduc_i32_cost  */
++  3, /* reduc_i64_cost  */
++  3, /* reduc_f16_cost  */
++  3, /* reduc_f32_cost  */
++  3, /* reduc_f64_cost  */
++  3, /* store_elt_extra_cost  */
++  5, /* vec_to_scalar_cost  */
++  5, /* scalar_to_vec_cost  */
++  5, /* align_load_cost  */
++  5, /* unalign_load_cost  */
++  1, /* unalign_store_cost  */
++  1  /* store_cost  */
++};
++
++static const struct cpu_vector_cost hip11_vector_cost =
++{
++  1, /* scalar_int_stmt_cost  */
++  1, /* scalar_fp_stmt_cost  */
++  5, /* scalar_load_cost  */
++  1, /* scalar_store_cost  */
++  1, /* cond_taken_branch_cost  */
++  1, /* cond_not_taken_branch_cost  */
++  &hip11_advsimd_vector_cost, /* advsimd  */
++  nullptr, /* sve  */
++  nullptr /* issue_info  */
++};
++
+ static const advsimd_vec_cost cortexa57_advsimd_vector_cost =
+ {
+   2, /* int_stmt_cost  */
+@@ -1368,6 +1431,17 @@ static const cpu_prefetch_tune hip09_prefetch_tune =
+   -1                    /* default_opt_level  */
+ };
+ 
++static const cpu_prefetch_tune hip11_prefetch_tune =
++{
++  0,                    /* num_slots  */
++  64,                   /* l1_cache_size  */
++  64,                   /* l1_cache_line_size  */
++  512,                  /* l2_cache_size  */
++  true,                 /* prefetch_dynamic_strides */
++  -1,                   /* minimum_stride */
++  -1                    /* default_opt_level  */
++};
++
+ static const cpu_prefetch_tune xgene1_prefetch_tune =
+ {
+   8,			/* num_slots  */
+@@ -1767,6 +1841,40 @@ static const struct tune_params hip09_tunings =
+   &hip09_prefetch_tune
+ };
+ 
++static const struct tune_params hip11_tunings =
++{
++  &hip11_extra_costs,
++  &hip11_addrcost_table,
++  &hip11_regmove_cost,
++  &hip11_vector_cost,
++  &generic_branch_cost,
++  &generic_approx_modes,
++  SVE_512, /* sve_width  */
++  { 4, /* load_int.  */
++    4, /* store_int.  */
++    4, /* load_fp.  */
++    4, /* store_fp.  */
++    4, /* load_pred.  */
++    4 /* store_pred.  */
++  }, /* memmov_cost.  */
++  4,    /* issue_rate  */
++  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
++   | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
++  "16", /* function_align.  */
++  "4",  /* jump_align.  */
++  "8",  /* loop_align.  */
++  2,    /* int_reassoc_width.  */
++  4,    /* fp_reassoc_width.  */
++  1,    /* vec_reassoc_width.  */
++  2,    /* min_div_recip_mul_sf.  */
++  2,    /* min_div_recip_mul_df.  */
++  0,    /* max_case_values.  */
++  tune_params::AUTOPREFETCHER_WEAK,     /* autoprefetcher_model.  */
++  (AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
++   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),     /* tune_flags.  */
++  &hip11_prefetch_tune
++};
++
+ static const struct tune_params xgene1_tunings =
+ {
+   &xgene1_extra_costs,
+diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+index cf699e4c7..c0c64a798 100644
+--- a/gcc/config/aarch64/aarch64.md
++++ b/gcc/config/aarch64/aarch64.md
+@@ -478,6 +478,7 @@
+ (include "tsv110.md")
+ (include "thunderx3t110.md")
+ (include "hip09.md")
++(include "hip11.md")
+ 
+ ;; -------------------------------------------------------------------
+ ;; Jumps and other miscellaneous insns
+diff --git a/gcc/config/aarch64/hip11.md b/gcc/config/aarch64/hip11.md
+new file mode 100644
+index 000000000..45f91e65b
+--- /dev/null
++++ b/gcc/config/aarch64/hip11.md
+@@ -0,0 +1,418 @@
++;; hip11 pipeline description
++;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify it
++;; under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful, but
++;; WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++;; General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_automaton "hip11")
++
++;; The hip11 core is modelled as issues pipeline that has
++;; the following functional units.
++;; 1.  Three pipelines for integer operations: ALU1, ALU2, ALU3
++
++(define_cpu_unit "hip11_alu1_issue" "hip11")
++(define_reservation "hip11_alu1" "hip11_alu1_issue")
++
++(define_cpu_unit "hip11_alu2_issue" "hip11")
++(define_reservation "hip11_alu2" "hip11_alu2_issue")
++
++(define_cpu_unit "hip11_alu3_issue" "hip11")
++(define_reservation "hip11_alu3" "hip11_alu3_issue")
++
++(define_reservation "hip11alu" "hip11_alu1|hip11_alu2|hip11_alu3")
++
++;; 2.  One pipeline for complex integer operations: MDU
++
++(define_cpu_unit "hip11_mdu_issue" "hip11")
++(define_reservation "hip11_mdu" "hip11_mdu_issue")
++
++;; 3.  Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
++(define_automaton "hip11_fsu")
++
++(define_cpu_unit "hip11_fsu1_issue"
++		 "hip11_fsu")
++(define_cpu_unit "hip11_fsu2_issue"
++		 "hip11_fsu")
++
++(define_reservation "hip11_fsu1" "hip11_fsu1_issue")
++(define_reservation "hip11_fsu2" "hip11_fsu2_issue")
++(define_reservation "hip11_fsu_pipe" "hip11_fsu1|hip11_fsu2")
++
++;; 4.  Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
++
++;; 5.  Two pipelines for load and store operations: LS1, LS2.
++
++(define_cpu_unit "hip11_ls1_issue" "hip11")
++(define_cpu_unit "hip11_ls2_issue" "hip11")
++(define_reservation "hip11_ls1" "hip11_ls1_issue")
++(define_reservation "hip11_ls2" "hip11_ls2_issue")
++
++;; Block all issue queues.
++
++(define_reservation "hip11_block" "hip11_fsu1_issue + hip11_fsu2_issue
++				  + hip11_mdu_issue + hip11_alu1_issue
++				  + hip11_alu2_issue + hip11_alu3_issue + hip11_ls1_issue + hip11_ls2_issue")
++
++;; Branch execution Unit
++;;
++(define_insn_reservation "hip11_branch" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "branch"))
++  "hip11_alu2|hip11_alu3")
++
++(define_insn_reservation "hip11_return_from_subroutine" 6
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "branch")
++       (eq_attr "sls_length" "retbr"))
++  "hip11_mdu,(hip11_alu2|hip11_alu3)")
++
++  ;; Simple Execution Unit:
++;;
++;; Simple ALU without shift
++(define_insn_reservation "hip11_alu" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "alu_imm,logic_imm,\
++			alu_sreg,logic_reg,\
++			adc_imm,adc_reg,\
++			adr,bfm,clz,rbit,rev,\
++			shift_imm,shift_reg,\
++			mov_imm,mov_reg,\
++			mvn_imm,mvn_reg,\
++			mrs,multiple,csel,\
++            rotate_imm"))
++  "hip11_alu1|hip11_alu2|hip11_alu3")
++  
++(define_insn_reservation "hip11_alus" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "alus_imm,logics_imm,\
++			alus_sreg,logics_reg,\
++			adcs_imm,adcs_reg"))
++  "hip11_alu2|hip11_alu3")
++
++;; ALU ops with shift
++(define_insn_reservation "hip11_alu_shift" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "extend,\
++			alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_shift_reg,\
++			crc,logic_shift_imm,logic_shift_reg,\
++			mov_shift,mvn_shift,\
++			mov_shift_reg,mvn_shift_reg"))
++  "hip11_mdu")
++  
++(define_insn_reservation "hip11_alus_shift" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
++			logics_shift_imm,logics_shift_reg"))
++  "hip11_alu2|hip11_alu3")
++
++;; Multiplies instructions
++(define_insn_reservation "hip11_mult" 3
++  (and (eq_attr "tune" "hip11")
++       (ior (eq_attr "mul32" "yes")
++	    (eq_attr "widen_mul64" "yes")))
++  "hip11_mdu")
++
++;; Integer divide
++(define_insn_reservation "hip11_div" 10
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "udiv,sdiv"))
++  "hip11_mdu")
++
++(define_insn_reservation "hip11_mla" 4
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "mla,smlal,umlal,smull,umull"))
++  "hip11_mdu")
++
++;; Block all issue pipes for a cycle
++(define_insn_reservation "hip11_block" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "block"))
++  "hip11_block")
++
++;; Load-store execution Unit
++;;
++(define_insn_reservation "hip11_load1" 4
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "load_4,load_8,load_16"))
++  "hip11_ls1|hip11_ls2")
++
++(define_insn_reservation "hip11_fp_load" 5
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "f_loads,f_loadd"))
++  "hip11_ls1|hip11_ls2")
++
++(define_insn_reservation "hip11_neon_ld1_single" 7
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
++       neon_load1_all_lanes,neon_load1_all_lanes_q"))
++  "(hip11_ls1|hip11_ls2)+hip11_fsu1")
++
++(define_insn_reservation "hip11_neon_ld1_1reg" 5
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
++  "hip11_ls1|hip11_ls2")
++
++(define_insn_reservation "hip11_neon_ld1_2reg" 6
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
++  "hip11_ls1|hip11_ls2")
++
++(define_insn_reservation "hip11_neon_ld1_3reg" 7
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
++  "hip11_ls1|hip11_ls2")
++
++(define_insn_reservation "hip11_neon_ld1_4reg" 8
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
++  "hip11_ls1|hip11_ls2")
++
++(define_insn_reservation "hip11_neon_ld2" 8
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load2_one_lane,neon_load2_one_lane_q,\
++       neon_load2_all_lanes,neon_load2_all_lanes_q,\
++       neon_load2_2reg,neon_load2_2reg_q,\
++       neon_load2_4reg,neon_load2_4reg_q"))
++  "(hip11_ls1|hip11_ls2)+hip11_fsu1")
++
++(define_insn_reservation "hip11_neon_ld3_single" 9
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\
++       neon_load3_all_lanes,neon_load3_all_lanes_q"))
++  "(hip11_ls1|hip11_ls2)+hip11_fsu1")
++
++(define_insn_reservation "hip11_neon_ld3_multiple" 13
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q"))
++  "(hip11_ls1|hip11_ls2)+hip11_fsu1")
++
++(define_insn_reservation "hip11_neon_ld4_single" 10
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\
++       neon_load4_all_lanes,neon_load4_all_lanes_q"))
++  "(hip11_ls1|hip11_ls2)+hip11_fsu1")
++
++(define_insn_reservation "hip11_neon_ld4_multiple" 11
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q"))
++  "(hip11_ls1|hip11_ls2)+hip11_fsu1")
++
++;; Stores of up to two words.
++(define_insn_reservation "hip11_store1" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "store_4,store_8,store_16,\
++       f_stored,f_stores"))
++  "hip11_ls1|hip11_ls2")
++
++;; Floating-Point Operations.
++(define_insn_reservation "hip11_fp_arith" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "ffariths,ffarithd,f_minmaxs,\
++       f_minmaxd,fadds,faddd,neon_fcadd"))
++  "hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_fp_mul" 3
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,\
++       neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\
++       neon_fp_mul_d_scalar_q,fmuld,fmuls"))
++  "hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_fp_cmp" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fccmpd,fccmps"))
++  "hip11alu,hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_fp_csel" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fcsel"))
++  "hip11alu,hip11_fsu1")
++
++(define_insn_reservation "hip11_fp_fcmp" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fcmpd,fcmps"))
++  "hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_fp_divs" 7
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fdivs"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_fp_divd" 10
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fdivd"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_fp_sqrts" 9
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fsqrts"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_fp_sqrtd" 15
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fsqrtd"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_fp_mac" 4
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
++  "hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_fp_mov" 1
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "fmov,neon_dup,neon_dup_q,\
++       neon_from_gp,neon_from_gp_q,\
++       neon_ins,neon_ins_q,\
++       neon_to_gp,neon_to_gp_q,\
++       neon_move,neon_move_q,\
++       neon_rev,neon_rev_q,\
++       neon_permute,neon_permute_q,\
++       neon_shift_imm_narrow_q,\
++       neon_ext,neon_ext_q,\
++       neon_rbit,\
++       crypto_sha3,neon_tbl1,neon_tbl1_q,\
++       neon_tbl2_q,f_mcr,neon_tst,neon_tst_q,\
++       neon_move_narrow_q"))
++  "hip11_fsu1")
++
++;; ASIMD instructions
++(define_insn_reservation "hip11_asimd_simple_arithmetic" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_abs,neon_abs_q,neon_neg,neon_neg_q,\
++       neon_abd,neon_abd_q,\
++       neon_add_long,neon_sub_long,neon_sub_widen,neon_add_widen,\
++       neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
++       neon_arith_acc,neon_arith_acc_q,\
++       neon_compare,neon_compare_q,\
++       neon_compare_zero,neon_compare_zero_q,\
++       neon_minmax,neon_minmax_q,\
++       neon_logic,neon_logic_q,\
++       neon_reduc_add,neon_reduc_add_q,\
++       neon_reduc_minmax,neon_reduc_minmax_q,\
++       neon_fp_to_int_s,neon_fp_to_int_s_q,\
++       neon_fp_to_int_d,neon_fp_to_int_d_q,\
++       neon_fp_cvt_widen_s,\
++       neon_fp_cvt_narrow_d_q,\
++       neon_cls,neon_cls_q,\
++       neon_cnt,neon_cnt_q,\
++       f_rints,f_rintd,f_cvtf2i,f_cvt,\
++       neon_tbl3,neon_fp_round_s,neon_fp_round_s_q,\
++       neon_fp_round_d,neon_fp_round_d_q,\
++       neon_int_to_fp_s,neon_fp_recpe_s,neon_fp_recpe_s_q,\
++       neon_fp_recpe_d,neon_fp_recpe_d_q,\
++       neon_fp_cvt_narrow_s_q,\
++       crypto_aese,crypto_aesmc,\
++       crypto_sha1_fast,crypto_sha1_xor,\
++       crypto_sha1_slow,\
++       crypto_sha256_fast,\
++       crypto_sha512,crypto_sm3,\
++       neon_qabs,neon_qabs_q,\
++       neon_qneg,neon_qneg_q,\
++       neon_qadd,neon_qadd_q,\
++       neon_qsub,neon_qsub_q,\
++       neon_add_halve,neon_add_halve_q,\
++       neon_sub_halve,neon_sub_halve_q,\
++       neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\
++       neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\
++       neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
++       neon_fp_rsqrte_d,neon_fp_rsqrte_d_q"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_asimd_complex_arithmetic" 4
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_mul_b,neon_mul_b_q,\
++       neon_mul_h,neon_mul_h_q,\
++       neon_mul_s,neon_mul_s_q,\
++       neon_mla_b,neon_mla_b_q,\
++       neon_mla_h,neon_mla_h_q,\
++       neon_mla_s,\
++       neon_mla_h_scalar,neon_mla_h_scalar_q,\
++       neon_mla_s_scalar,neon_mla_s_scalar_q,\
++       neon_sat_mul_h_scalar,neon_sat_mul_h_scalar_q,\
++       neon_sat_mul_s_scalar,neon_sat_mul_s_scalar_q,\
++       neon_sat_mul_b,neon_sat_mul_b_q,\
++       neon_sat_mul_h,neon_sat_mul_h_q,\
++       neon_sat_mul_s,neon_sat_mul_s_q,\
++       neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
++       neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,\
++       neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,\
++       neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,\
++       neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,\
++       neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,\
++       crypto_pmull,\
++       neon_sat_shift_reg,neon_sat_shift_reg_q,\
++       neon_shift_reg,neon_shift_reg_q,\
++       neon_shift_imm,neon_shift_imm_q,\
++       neon_shift_imm_long,\
++       neon_sat_shift_imm,neon_sat_shift_imm_q,\
++       neon_sat_shift_imm_narrow_q,\
++       neon_shift_acc,neon_shift_acc_q,\
++       crypto_sha256_slow"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_asimd_fp_compare" 2
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\
++       neon_fp_abs_d,neon_fp_abs_d_q,\
++       neon_fp_neg_s,neon_fp_neg_s_q,\
++       neon_fp_neg_d,neon_fp_neg_d_q,\
++       neon_fp_compare_s,neon_fp_compare_s_q,\
++       neon_fp_compare_d,neon_fp_compare_d_q,\
++       neon_fp_minmax_s,neon_fp_minmax_s_q,\
++       neon_fp_minmax_d,neon_fp_minmax_d_q,\
++       neon_fp_addsub_s,neon_fp_addsub_s_q,\
++       neon_fp_addsub_d,neon_fp_addsub_d_q,\
++       neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\
++       neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\
++       neon_fp_abd_s,neon_fp_abd_s_q,\
++       neon_fp_abd_d,neon_fp_abd_d_q"))
++  "hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_asimd_fdiv" 10
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
++       neon_fp_div_d,neon_fp_div_d_q"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_asimd_fsqrt" 15
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
++       neon_fp_sqrt_d,neon_fp_sqrt_d_q"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_asimd_fp_multiply_add" 4
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\
++       neon_fp_mla_d,neon_fp_mla_d_q,\
++       neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
++       neon_fp_mul_s,neon_fp_mul_s_q,neon_fcmla,\
++       neon_fp_recps_s,neon_fp_recps_s_q,\
++       neon_fp_recps_d,neon_fp_recps_d_q,\
++       neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\
++       neon_fp_rsqrts_d,neon_fp_rsqrts_d_q"))
++  "hip11_fsu_pipe")
++
++(define_insn_reservation "hip11_asimd_frecpx" 3
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\
++       neon_fp_recpx_d,neon_fp_recpx_d_q,neon_tbl4,\
++       neon_dot,neon_dot_q"))
++  "hip11_fsu1")
++
++(define_insn_reservation "hip11_asimd_mmla" 6
++  (and (eq_attr "tune" "hip11")
++       (eq_attr "type" "neon_mla_s_q"))
++  "hip11_fsu1")
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 7ca60dd64..17d9e4126 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -19212,7 +19212,7 @@ performance of the code.  Permissible values for this option are:
+ @samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
+ @samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
+ @samp{octeontx2f95mm},
+-@samp{a64fx},
++@samp{a64fx},@samp{hip11}
+ @samp{thunderx}, @samp{thunderxt88},
+ @samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
+ @samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
+-- 
+2.33.0
+
diff --git a/0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch b/0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch
new file mode 100644
index 0000000000000000000000000000000000000000..6fb0cef7ac11ea7483c88f6e86c2936dd0dac366
--- /dev/null
+++ b/0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch
@@ -0,0 +1,2164 @@
+From 8fa9788ac64a9ea5dc92c61c8f2ec11075cd17ec Mon Sep 17 00:00:00 2001
+From: XingYushuai <xingyushuai@huawei.com>
+Date: Thu, 15 Dec 2022 14:34:16 +0800
+Subject: [PATCH 003/157] Add Crc32 Optimization in Gzip For crc32 algorithm in
+ APBC int_gzip.
+
+Match crc32 lookup table algorithm. An example for crc32 lookup table
+elg: ```c do { c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); } while (--n);
+
+Usage: `gcc -O3 -march=armv8.1-a -floop-crc yourfile.c`
+Node: The cpu you use needs to support the crc32 instructions
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |    4 +
+ gcc/config/aarch64/aarch64-builtins.cc        |   30 +
+ gcc/config/aarch64/aarch64-protos.h           |    1 +
+ gcc/config/aarch64/aarch64.cc                 |   12 +
+ gcc/doc/invoke.texi                           |    6 +-
+ gcc/doc/tm.texi                               |    9 +
+ gcc/doc/tm.texi.in                            |    2 +
+ gcc/match.pd                                  |   23 +
+ gcc/passes.def                                |    1 +
+ gcc/target.def                                |   14 +
+ .../tree-ssa/loop-crc-loop-condition-fail.c   |   85 ++
+ .../tree-ssa/loop-crc-loop-form-fail-2.c      |   90 ++
+ .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c |  112 ++
+ .../gcc.dg/tree-ssa/loop-crc-sucess.c         |   83 +
+ .../tree-ssa/loop-crc-table-check-fail.c      |  114 ++
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ gcc/tree-ssa-loop-crc.cc                      | 1333 +++++++++++++++++
+ 19 files changed, 1921 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c
+ create mode 100644 gcc/tree-ssa-loop-crc.cc
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 5cd838270..2b9f025dc 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1649,6 +1649,7 @@ OBJS = \
+ 	tree-ssa-ifcombine.o \
+ 	tree-ssa-live.o \
+ 	tree-ssa-loop-ch.o \
++	tree-ssa-loop-crc.o \
+ 	tree-ssa-loop-im.o \
+ 	tree-ssa-loop-ivcanon.o \
+ 	tree-ssa-loop-ivopts.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index b18f0b944..42fb2fc19 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1119,6 +1119,10 @@ fcrypto-accel-aes
+ Common Var(flag_crypto_accel_aes) Init(0) Optimization
+ Perform crypto acceleration AES pattern matching.
+ 
++floop-crc
++Common Var(flag_loop_crc) Optimization
++Do the loop crc conversion.
++
+ fauto-inc-dec
+ Common Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
+index 42276e7ca..3b952ef39 100644
+--- a/gcc/config/aarch64/aarch64-builtins.cc
++++ b/gcc/config/aarch64/aarch64-builtins.cc
+@@ -551,6 +551,12 @@ typedef struct
+ #define VAR1(T, N, MAP, FLAG, A) \
+   AARCH64_SIMD_BUILTIN_##T##_##N##A,
+ 
++enum aarch64_crc_builtins{
++  AARCH64_BUILTIN_CRC32B,
++  AARCH64_BUILTIN_CRC32H,
++  AARCH64_BUILTIN_CRC32W,
++};
++
+ enum aarch64_builtins
+ {
+   AARCH64_BUILTIN_MIN,
+@@ -1812,6 +1818,30 @@ aarch64_general_builtin_decl (unsigned code, bool)
+   return aarch64_builtin_decls[code];
+ }
+ 
++/* Implement TARGET_GET_CRC_BUILTIN_CODE  */
++unsigned 
++get_crc_builtin_code(unsigned code, bool)
++{
++  if (code > AARCH64_BUILTIN_CRC32W)
++    return AARCH64_BUILTIN_MIN;
++
++  unsigned res = AARCH64_BUILTIN_MIN;
++  switch (code) {
++    case AARCH64_BUILTIN_CRC32B:
++      res = AARCH64_BUILTIN_crc32b;
++      break;
++    case AARCH64_BUILTIN_CRC32H:
++      res = AARCH64_BUILTIN_crc32h;
++      break;
++    case AARCH64_BUILTIN_CRC32W:
++      res = AARCH64_BUILTIN_crc32w;
++      break;
++    default:
++      break;
++  }
++  return res;
++}
++
+ typedef enum
+ {
+   SIMD_ARG_COPY_TO_REG,
+diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+index 475d174dd..853197ee9 100644
+--- a/gcc/config/aarch64/aarch64-protos.h
++++ b/gcc/config/aarch64/aarch64-protos.h
+@@ -994,6 +994,7 @@ gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *,
+ 					     gimple_stmt_iterator *);
+ rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int);
+ tree aarch64_general_builtin_decl (unsigned, bool);
++unsigned  get_crc_builtin_code(unsigned , bool);
+ tree aarch64_general_builtin_rsqrt (unsigned int);
+ tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
+ void handle_arm_acle_h (void);
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 5537a537c..280e0b618 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -15210,6 +15210,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p)
+   gcc_unreachable ();
+ }
+ 
++/* Implement TARGET_GET_CRC_BUILTIN_CODE.  */
++static unsigned 
++aarch64_get_crc_builtin_code(unsigned code, bool initialize_p)
++{
++  unsigned subcode = get_crc_builtin_code(code,initialize_p);
++  unsigned res = subcode << AARCH64_BUILTIN_SHIFT;
++  return res;
++}
++
+ /* Return true if it is safe and beneficial to use the approximate rsqrt optabs
+    to optimize 1.0/sqrt.  */
+ 
+@@ -27677,6 +27686,9 @@ aarch64_get_v16qi_mode ()
+ #undef TARGET_BUILTIN_DECL
+ #define TARGET_BUILTIN_DECL aarch64_builtin_decl
+ 
++#undef TARGET_GET_CRC_BUILTIN_CODE
++#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code
++
+ #undef TARGET_BUILTIN_RECIPROCAL
+ #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal
+ 
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 7ca60dd64..c3ce148b0 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -537,7 +537,7 @@ Objective-C and Objective-C++ Dialects}.
+ -fisolate-erroneous-paths-dereference  -fisolate-erroneous-paths-attribute @gol
+ -fivopts  -fkeep-inline-functions  -fkeep-static-functions @gol
+ -fkeep-static-consts  -flimit-function-alignment  -flive-range-shrinkage @gol
+--floop-block  -floop-interchange  -floop-strip-mine @gol
++-floop-block  -floop-crc  -floop-interchange  -floop-strip-mine @gol
+ -floop-unroll-and-jam  -floop-nest-optimize @gol
+ -floop-parallelize-all  -flra-remat  -flto  -flto-compression-level @gol
+ -flto-partition=@var{alg}  -fmerge-all-constants @gol
+@@ -12159,6 +12159,10 @@ GIMPLE -> GRAPHITE -> GIMPLE transformation.  Some minimal optimizations
+ are also performed by the code generator isl, like index splitting and
+ dead code elimination in loops.
+ 
++@item -floop-crc 
++@opindex floop-crc
++Do the loop crc conversion
++
+ @item -floop-nest-optimize
+ @opindex floop-nest-optimize
+ Enable the isl based loop nest optimizer.  This is a generic loop nest
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 851d31c18..5a1e0fe43 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -11658,6 +11658,15 @@ If @var{code} is out of range the function should return
+ @code{error_mark_node}.
+ @end deftypefn
+ 
++@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p})
++Define this hook to get crc32 builtin code.  It should be a function that
++returns the crc32 builtin function code @var{code}.
++If there is no such builtin and it cannot be initialized at this time
++if @var{initialize_p} is true the function should return @code{NULL_TREE}.
++If @var{code} is out of range the function should return
++@code{error_mark_node}.
++@end deftypefn
++
+ @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore})
+ 
+ Expand a call to a machine specific built-in function that was set up by
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index ac95cdf7a..6ff0eff66 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -7704,6 +7704,8 @@ to by @var{ce_info}.
+ 
+ @hook TARGET_BUILTIN_DECL
+ 
++@hook TARGET_GET_CRC_BUILTIN_CODE
++
+ @hook TARGET_EXPAND_BUILTIN
+ 
+ @hook TARGET_RESOLVE_OVERLOADED_BUILTIN
+diff --git a/gcc/match.pd b/gcc/match.pd
+index aee58e47b..1f42090a2 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4409,6 +4409,29 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ #endif
+ 
++#if GIMPLE
++/* Try to match
++     _4 = (int) _3;      NOP_EXPR (SSA_NAME @2)
++     _5 = _4 ^ c_10;     BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME)
++     _6 = _5 & 255;      BIT_AND_EXPR (SSA_NAME, INTEGER_CST@3)
++*/
++(match (crc_match_index @1 @2 @3)
++   (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3)
++   (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255))
++)
++#endif
++
++#if GIMPLE
++/* Try to match
++     _8 = c_12 >> 8;      RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2)
++     c_19 = _7 ^ _8;      BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME)
++*/
++(match (crc_match_res @1 @2 @3)
++   (bit_xor SSA_NAME@3  (rshift SSA_NAME@1 INTEGER_CST@2))
++   (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8))
++)
++#endif
++
+ /* Simplification moved from fold_cond_expr_with_comparison.  It may also
+    be extended.  */
+ /* This pattern implements two kinds simplification:
+diff --git a/gcc/passes.def b/gcc/passes.def
+index cdc600298..89d6889e5 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3.  If not see
+ 	  NEXT_PASS (pass_cd_dce, false /* update_address_taken_p */);
+ 	  NEXT_PASS (pass_phiopt, true /* early_p */);
+ 	  NEXT_PASS (pass_array_widen_compare);
++	  NEXT_PASS (pass_loop_crc);
+ 	  NEXT_PASS (pass_tail_recursion);
+ 	  NEXT_PASS (pass_if_to_switch);
+ 	  NEXT_PASS (pass_convert_switch);
+diff --git a/gcc/target.def b/gcc/target.def
+index c9bb2b4c2..8abf49f0a 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -2413,6 +2413,20 @@ If @var{code} is out of range the function should return\n\
+ @code{error_mark_node}.",
+  tree, (unsigned code, bool initialize_p), NULL)
+ 
++/* Initialize (if INITIALIZE_P is true) and return the real code of
++   target-specific built-in function.
++   Return NULL if that is not possible.  Return error_mark_node if CODE
++   is outside of the range of valid crc32 codes.  */
++DEFHOOK
++(get_crc_builtin_code,
++ "Define this hook to get crc32 builtin code.  It should be a function that\n\
++returns the crc32 builtin function code @var{code}.\n\
++If there is no such builtin and it cannot be initialized at this time\n\
++if @var{initialize_p} is true the function should return @code{NULL_TREE}.\n\
++If @var{code} is out of range the function should return\n\
++@code{error_mark_node}.",
++ unsigned , (unsigned code, bool initialize_p), NULL)
++
+ /* Expand a target-specific builtin.  */
+ DEFHOOK
+ (expand_builtin,
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c
+new file mode 100644
+index 000000000..3620e92f7
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c
+@@ -0,0 +1,85 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */
++
++#include <stdint.h>
++#include <stddef.h>
++typedef unsigned long ulg;
++typedef unsigned char uch;
++
++static const ulg crc_32_tab[] = {
++  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
++  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
++  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
++  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
++  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
++  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
++  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
++  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
++  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
++  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
++  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
++  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
++  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
++  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
++  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
++  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
++  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
++  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
++  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
++  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
++  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
++  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
++  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
++  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
++  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
++  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
++  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
++  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
++  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
++  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
++  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
++  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
++  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
++  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
++  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
++  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
++  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
++  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
++  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
++  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
++  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
++  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
++  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
++  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
++  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
++  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
++  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
++  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
++  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
++  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
++  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
++  0x2d02ef8dL
++};
++
++ulg updcrc (s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) 
++        if (n) do {
++            c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8);
++        } while (--n || c != 0) ;
++    }
++    crc = c;
++exit1:
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c
+new file mode 100644
+index 000000000..fac759c67
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c
+@@ -0,0 +1,90 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */
++
++#include <stdint.h>
++#include <stddef.h>
++typedef unsigned long ulg;
++typedef unsigned char uch;
++
++static const ulg crc_32_tab[] = {
++  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
++  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
++  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
++  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
++  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
++  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
++  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
++  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
++  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
++  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
++  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
++  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
++  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
++  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
++  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
++  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
++  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
++  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
++  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
++  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
++  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
++  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
++  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
++  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
++  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
++  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
++  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
++  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
++  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
++  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
++  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
++  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
++  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
++  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
++  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
++  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
++  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
++  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
++  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
++  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
++  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
++  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
++  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
++  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
++  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
++  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
++  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
++  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
++  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
++  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
++  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
++  0x2d02ef8dL
++};
++int test[5] = {0};
++
++ulg updcrc (s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) 
++        if (n) do {
++            c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8);
++        } while (--n) ;
++    }
++    do {
++        c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8);
++        test[c%5] = c;
++    } while (--n) ;
++    crc = c;
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 1 "loop_crc"} } */
++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 1 "loop_crc"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c
+new file mode 100644
+index 000000000..ba9e5bb95
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c
+@@ -0,0 +1,112 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */
++
++#include <stdint.h>
++#include <stddef.h>
++typedef unsigned long ulg;
++typedef unsigned char uch;
++
++static const ulg crc_32_tab[] = {
++  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
++  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
++  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
++  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
++  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
++  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
++  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
++  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
++  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
++  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
++  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
++  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
++  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
++  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
++  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
++  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
++  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
++  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
++  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
++  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
++  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
++  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
++  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
++  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
++  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
++  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
++  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
++  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
++  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
++  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
++  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
++  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
++  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
++  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
++  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
++  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
++  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
++  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
++  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
++  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
++  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
++  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
++  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
++  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
++  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
++  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
++  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
++  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
++  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
++  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
++  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
++  0x2d02ef8dL
++};
++
++/* check when the loop have a innor loop, should fail.  */
++ulg updcrc (s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) 
++        do {
++            c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8);
++            for (int i = 0; i < 5; i++) {
++                c++;
++            }
++            
++        } while (--n);
++    }
++    crc = c;
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++
++/* check when the loop have a second backedge, should fail.  */
++ulg updcrc1(s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) 
++        do {
++            c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8);     
++        } while (--n || c != 0) ;
++    }
++    crc = c;
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 1 "loop_crc"} } */
++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 1 "loop_crc"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c
+new file mode 100644
+index 000000000..dad7bdbfc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c
+@@ -0,0 +1,83 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */
++
++#include <stdint.h>
++#include <stddef.h>
++typedef unsigned long ulg;
++typedef unsigned char uch;
++
++static const ulg crc_32_tab[] = {
++  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
++  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
++  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
++  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
++  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
++  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
++  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
++  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
++  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
++  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
++  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
++  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
++  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
++  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
++  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
++  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
++  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
++  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
++  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
++  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
++  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
++  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
++  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
++  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
++  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
++  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
++  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
++  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
++  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
++  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
++  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
++  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
++  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
++  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
++  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
++  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
++  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
++  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
++  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
++  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
++  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
++  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
++  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
++  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
++  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
++  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
++  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
++  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
++  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
++  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
++  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
++  0x2d02ef8dL
++};
++
++ulg updcrc (s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) do {
++            c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8);
++        } while (--n);
++    }
++    crc = c;
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++/* { dg-final { scan-tree-dump-times "The 1th loop form is success matched,and the loop can be optimized." 1 "loop_crc"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c
+new file mode 100644
+index 000000000..523a7740c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c
+@@ -0,0 +1,114 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -march=armv8.1-a -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */
++
++#include <stdint.h>
++#include <stddef.h>
++typedef unsigned long ulg;
++typedef unsigned char uch;
++
++static const ulg crc_32_tab[] = {
++  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
++  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
++  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
++  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
++  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
++  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
++  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
++  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
++  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
++  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
++  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
++  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
++  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
++  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
++  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
++  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
++  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
++  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
++  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
++  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
++  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
++  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
++  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
++  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
++  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
++  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
++  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
++  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
++  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
++  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
++  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
++  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
++  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
++  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
++  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
++  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
++  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
++  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
++  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
++  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
++  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
++  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
++  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
++  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
++  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
++  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
++  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
++  0x37d83bf1L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
++  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
++  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
++  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
++  0x2d02ef8dL
++};
++int test[5] = {0};
++
++/* check when the loop is doing more then 1 array read or writing an array, both should fail.  */
++ulg updcrc (s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) 
++        do {
++            c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5];
++        } while (--n) ;
++    }
++    do {
++        c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8);
++        test[c%5] = c;
++    } while (--n) ;
++    crc = c;
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++
++/* check when the loop is not working on a correct crc_table. should fail.  */
++ulg updcrc1(s, n)
++    uch *s;                 /* pointer to bytes to pump through */
++    unsigned n;             /* number of bytes in s[] */
++{
++    register ulg c;         /* temporary variable */
++
++    static ulg crc = (ulg)0xffffffffL; /* shift register contents */
++
++    if (s == NULL) {
++        c = 0xffffffffL;
++    } else {
++        c = crc;
++        if (n) 
++        do {
++            c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8);
++        } while (--n) ;
++    }
++    crc = c;
++    return c ^ 0xffffffffL;       /* (instead of ~c for 64-bit machines) */
++}
++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */
++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 3 "loop_crc"} } */
++/* { dg-final { scan-tree-dump-times "Table check fail.  Table not matching." 1 "loop_crc"} } */
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index 8e7510eb3..8341b9ffd 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -220,6 +220,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME	     , "tree rename SSA copies")
+ DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
+ DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
+ DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare")
++DEFTIMEVAR (TV_TREE_LOOP_CRC         , "tree loop crc")
+ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion")
+ DEFTIMEVAR (TV_TREE_SWITCH_LOWERING,   "tree switch lowering")
+ DEFTIMEVAR (TV_TREE_RECIP            , "gimple CSE reciprocals")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index 34e60bc38..6cd679e10 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -454,6 +454,7 @@ extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt);
++extern gimple_opt_pass *make_pass_loop_crc (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt);
+diff --git a/gcc/tree-ssa-loop-crc.cc b/gcc/tree-ssa-loop-crc.cc
+new file mode 100644
+index 000000000..b9c2f71ca
+--- /dev/null
++++ b/gcc/tree-ssa-loop-crc.cc
+@@ -0,0 +1,1333 @@
++/* This pass converts special loops where do CRC algorithms to
++   simple CRC instructions in AArch64.
++   Copyright (C) 2023-2023 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by the
++Free Software Foundation; either version 3, or (at your option) any
++later version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "target.h"
++#include "tree.h"
++#include "gimple.h"
++#include "tree-pass.h"
++#include "gimple-ssa.h"
++#include "tree-pretty-print.h"
++#include "fold-const.h"
++#include "gimplify.h"
++#include "gimple-iterator.h"
++#include "tree-ssa-loop-manip.h"
++#include "tree-ssa-loop.h"
++#include "ssa.h"
++#include "tree-into-ssa.h"
++#include "cfganal.h"
++#include "cfgloop.h"
++#include "gimple-pretty-print.h"
++#include "tree-cfg.h"
++#include "cgraph.h"
++#include "print-tree.h"
++#include "cfghooks.h"
++#include "gimple-fold.h"
++#include "diagnostic-core.h"
++
++/* This pass handles scenarios similar to the following:
++ulg updcrc (s, n)
++    uch *s;
++    unsigned n;
++{
++    register ulg c;
++
++    static ulg crc = (ulg)0xffffffffL; 
++
++    if (s == NULL)
++    {
++        c = 0xffffffffL;
++    }
++    else
++    {
++        c = crc;
++        if (n)
++        do
++        {
++            c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8);
++        } while (--n);
++    }
++    crc = c;
++    return c ^ 0xffffffffL;
++}
++
++If the hardware supports the crc instruction, then the pass completes the
++conversion of the above scenario into:
++
++#define SIZE_U32 sizeof(uint32_t)
++unsigned long updcrc(s, n)
++    unsigned char *s;
++    unsigned n;
++{
++  register unsigned long c;
++
++  static unsigned long crc = (unsigned long)0xffffffffL;
++
++  if (s == NULL)
++  {
++    c = 0xffffffffL;
++  }
++  else
++  {
++    c = crc;
++    if (n)
++    {
++      uint32_t nn = n/SIZE_U32;
++      do
++      {
++        c = __crc32w (c,*((uint32_t *)s));
++        s += SIZE_U32;
++      } while(--nn);
++    }
++  }
++  if (n & sizeof (uint16_t))
++  {
++    c = __crc32h (c, *((uint16_t *)s));
++    s += sizeof (uint16_t);
++  }
++  if (n & sizeof (uint8_t))
++    c = __crc32b (c, *s);
++  crc = c;
++  return c ^ 0xffffffffL;
++}
++
++This pass is to complete the conversion of such scenarios from
++the internal perspective of the compiler:
++1) match_crc_loop: The function completes the screening of such
++   scenarios;
++2) convert_to_new_loop: The function completes the conversion of
++   origin_loop to new loops, and removes origin_loop;
++3) origin_loop_info: The structure is used to record important
++   information of origin_loop: such as loop exit, initial value
++   of induction variable, etc;
++4) create_new_loops: The function is used as the key content
++   of the pass to complete the creation of new loops.  */
++
++extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree));
++extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree));
++
++static gimple *crc_table_read_stmt = NULL;
++
++static gphi *phi_s = NULL;
++static gphi *phi_c = NULL;
++static tree nn_tree = NULL;
++
++enum aarch64_crc_builtins
++{
++  AARCH64_BUILTIN_CRC32B,
++  AARCH64_BUILTIN_CRC32H,
++  AARCH64_BUILTIN_CRC32W,
++};
++
++/* The useful information of origin loop.  */
++struct origin_loop_info
++{
++  tree limit;       /* The limit index of the array in the old loop.  */
++  tree base_n;      /* The initial value of the old loop.  */
++  tree base_s;      /* The initial value of the old loop.  */
++  tree base_c;      /* The initial value of the old loop.  */
++  edge entry_edge;  /* The edge into the old loop.  */
++  edge exit_edge;   /* The edge outto the old loop.  */
++  basic_block exit_bb;
++};
++
++typedef struct origin_loop_info origin_loop_info;
++
++static origin_loop_info origin_loop;
++hash_map <basic_block, tree> n_map;
++hash_map <basic_block, tree> nn_map;
++hash_map <basic_block, tree> s_map;
++hash_map <basic_block, tree> c_map;
++hash_map <basic_block, tree> crc_map;
++
++/* Initialize the origin_loop structure.  */
++static void
++init_origin_loop_structure ()
++{
++  origin_loop.entry_edge = NULL;
++  origin_loop.exit_edge = NULL;
++  origin_loop.exit_bb = NULL;
++  origin_loop.limit = NULL;
++  origin_loop.base_n = NULL;
++  origin_loop.base_s = NULL;
++  origin_loop.base_c = NULL;
++}
++
++/* Get the edge that first entered the loop.  */
++static edge
++get_loop_preheader_edge (class loop *loop)
++{
++  edge e;
++  edge_iterator ei;
++
++  FOR_EACH_EDGE (e, ei, loop->header->preds)
++    if (e->src != loop->latch)
++      break;
++
++  return e;
++}
++
++/* Returns true if t is SSA_NAME and user variable exists.  */
++
++static bool
++ssa_name_var_p (tree t)
++{
++  if (!t || TREE_CODE (t) != SSA_NAME)
++    return false;
++  if (SSA_NAME_VAR (t))
++    return true;
++  return false;
++}
++
++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable.  */
++
++static bool
++same_ssa_name_var_p (tree t1, tree t2)
++{
++  if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2))
++    return false;
++  if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2))
++    return true;
++  return false;
++}
++
++/* Get origin loop induction variable upper bound.  */
++
++static bool
++get_iv_upper_bound (gimple *stmt)
++{
++  if (origin_loop.limit != NULL || origin_loop.base_n != NULL)
++    return false;
++
++  tree lhs = gimple_cond_lhs (stmt);
++  tree rhs = gimple_cond_rhs (stmt);
++
++  if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
++      || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE)
++    return false;
++
++  /* TODO: Currently, the input restrictions on lhs and rhs are implemented
++     through PARM_DECL.  We may consider relax the restrictions later, and
++     we need to consider the overall adaptation scenario and adding test
++     cases.  */
++  if (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) == PARM_DECL)
++  {
++    origin_loop.limit = rhs;
++    origin_loop.base_n = lhs;
++  }
++  else
++    return false;
++
++  if (origin_loop.limit != NULL && origin_loop.base_n != NULL)
++    return true;
++
++  return false;
++}
++
++/* Get origin loop info.  */
++static bool
++get_origin_loop_info (class loop *loop)
++{
++  auto_vec<edge> edges = get_loop_exit_edges (loop);
++  origin_loop.exit_edge = edges[0];
++  origin_loop.exit_bb = origin_loop.exit_edge->dest;
++  origin_loop.entry_edge = get_loop_preheader_edge (loop);
++  origin_loop.base_s = PHI_ARG_DEF_FROM_EDGE (phi_s,origin_loop.entry_edge);
++  origin_loop.base_c = PHI_ARG_DEF_FROM_EDGE (phi_c,origin_loop.entry_edge);
++
++  basic_block preheader_bb = origin_loop.entry_edge->src;
++
++  if (preheader_bb->preds->length () != 1)
++    return false;
++
++  edge entry_pre_bb_edge = EDGE_PRED (preheader_bb, 0);
++
++  basic_block pre_preheader_bb = entry_pre_bb_edge->src;
++
++  gimple_stmt_iterator gsi;
++  gimple *stmt;
++  bool get_upper_bound = false;
++  for (gsi = gsi_start_bb (pre_preheader_bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (stmt && gimple_code (stmt) == GIMPLE_COND
++          && get_iv_upper_bound (stmt))
++        {
++          get_upper_bound = true;
++          break;
++        }
++    }
++
++  return get_upper_bound;
++}
++
++/* The loop form check will check the entire loop control flow
++   It should be a loop that:
++   1. a do-while loop with header and latch only with no other control flow
++      inside the loop
++   2. have only one exiting edge
++   3. have only one back edge and one entry edge
++*/
++static bool
++crc_loop_form_check (class loop *loop)
++{
++  if (loop->num_nodes > 2 || loop->inner)
++    return false;
++  // Should only have 1 exit edge
++  auto_vec<edge> edges = get_loop_exit_edges (loop);
++  if (edges.length() != 1)
++    return false;
++
++  // The header should have only 2 incoming edges
++  // One of them is the preheader edge and the other is the backedge from the
++  // latch
++  if (EDGE_COUNT (loop->header->preds) != 2)
++    return false;
++  edge e1 = EDGE_PRED (loop->header, 0);
++  edge e2 = EDGE_PRED (loop->header, 1);
++
++  if ((e1->src == loop->latch && e2->src->loop_father != loop)
++      || (e2->src == loop->latch && e1->src->loop_father != loop))
++    return true;
++
++  return false;
++}
++
++/* Check there is only one array is read in the loop.
++   Return the only array as crc_table.  */
++static bool
++only_one_array_read (class loop *loop, tree &crc_table)
++{
++  gimple_stmt_iterator gsi;
++  gimple *stmt;
++  bool res = false;
++  for (gsi = gsi_start_bb (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (stmt == NULL)
++        return false;
++
++      if (gimple_code (stmt) == GIMPLE_ASSIGN
++          && TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF)
++        return false;
++
++      /* Only one-dimensional integer arrays meet the condition.  */
++      if (gimple_code (stmt) == GIMPLE_ASSIGN
++          && TREE_CODE (gimple_assign_rhs1 (stmt)) == ARRAY_REF
++          && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) == VAR_DECL
++          && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) == INTEGER_TYPE)
++        {
++          if (crc_table == NULL
++              && TREE_READONLY (gimple_assign_rhs1 (stmt)))
++            {
++              crc_table = gimple_assign_rhs1 (stmt);
++              crc_table_read_stmt = stmt;
++              res = true;
++            }
++          else
++            return false;
++        }
++    }
++  return res;
++}
++
++static const unsigned HOST_WIDE_INT crc_32_tab[] = {
++  0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L,
++  0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L,
++  0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L,
++  0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL,
++  0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L,
++  0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L,
++  0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L,
++  0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL,
++  0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L,
++  0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL,
++  0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L,
++  0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L,
++  0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L,
++  0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL,
++  0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL,
++  0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L,
++  0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL,
++  0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L,
++  0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L,
++  0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L,
++  0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL,
++  0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L,
++  0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L,
++  0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL,
++  0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L,
++  0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L,
++  0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L,
++  0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L,
++  0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L,
++  0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL,
++  0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL,
++  0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L,
++  0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L,
++  0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL,
++  0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL,
++  0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L,
++  0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL,
++  0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L,
++  0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL,
++  0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L,
++  0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL,
++  0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L,
++  0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L,
++  0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL,
++  0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L,
++  0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L,
++  0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L,
++  0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L,
++  0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L,
++  0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L,
++  0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL,
++  0x2d02ef8dL
++};
++
++/* Check the content of the array.  */
++static bool
++match_crc_table (tree crc_table)
++{
++  const unsigned LOW_BOUND = 0;
++  const unsigned UP_BOUND = 255;
++  const unsigned ELEMENT_SIZE = 8;
++  tree low_bound = array_ref_low_bound (crc_table);
++  tree up_bound = array_ref_up_bound (crc_table);
++  tree element_size = array_ref_element_size (crc_table);
++  if (!tree_fits_uhwi_p(low_bound) || !tree_fits_uhwi_p(up_bound) ||
++      !tree_fits_uhwi_p(element_size))
++    return false;
++  unsigned HOST_WIDE_INT lb = tree_to_uhwi (low_bound);
++  unsigned HOST_WIDE_INT ub = tree_to_uhwi (up_bound);
++  unsigned HOST_WIDE_INT es = tree_to_uhwi (element_size);
++  if (lb != LOW_BOUND || ub != UP_BOUND || es != ELEMENT_SIZE)
++    return false;
++
++  tree decl = TREE_OPERAND (crc_table, 0);
++  tree ctor = ctor_for_folding(decl);
++  for (int i = lb; i <= ub; i++)
++    {
++      unsigned HOST_WIDE_INT val = tree_to_uhwi (CONSTRUCTOR_ELT (ctor,
++                                                                  i)->value);
++      if (crc_32_tab[i] != val)
++        return false;
++    }
++  return true;
++}
++
++/* Check the crc table.  The loop should have only one data reference. 
++   And match the data reference with the predefined array.  */
++static bool
++crc_table_check (class loop *loop)
++{
++  tree crc_table = NULL;
++  if (!only_one_array_read (loop, crc_table))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nTable check fail. not only single array "
++                            "is read.\n");
++      return false;
++    }
++  if (!match_crc_table (crc_table))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nTable check fail.  Table not matching.\n");
++      return false;
++    }
++  return  true;
++}
++
++/* Check whether the evolution pattern of phi is phi = SSA_NAME + target*/
++static bool
++evolution_pattern_plus_with_p (class loop *loop, gphi *phi,
++                               unsigned HOST_WIDE_INT target)
++{
++  edge backedge = find_edge (loop->latch, loop->header);
++  if (backedge == NULL)
++    return false;
++  tree evolution_node = PHI_ARG_DEF_FROM_EDGE (phi, backedge);
++  gimple *evolution_expr = SSA_NAME_DEF_STMT (evolution_node);
++
++  if (evolution_expr && (gimple_assign_rhs_code (evolution_expr) == PLUS_EXPR ||
++                         gimple_assign_rhs_code (evolution_expr)
++                             == POINTER_PLUS_EXPR))
++    {
++      tree rhs1 = gimple_assign_rhs1 (evolution_expr);
++      tree rhs2 = gimple_assign_rhs2 (evolution_expr);
++      if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST
++          && tree_to_uhwi (rhs2) == target)
++        return true;
++    }
++  return false;
++}
++
++/* Check whether there are only 3 phi nodes in the header block.
++   Return 3 phi nodes in the capture.  */
++static bool
++check_num_of_phi (basic_block header,  gphi *capture[])
++{
++  gphi *phi;
++  gphi_iterator gsi;
++  int num_of_phi = 0;
++
++  for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      phi = gsi.phi ();
++      if (phi)
++        num_of_phi++;
++      if (num_of_phi > 3)
++        return false;
++      capture[num_of_phi - 1] = phi;
++    }
++  /* Phi node should be exactly 3.  */
++  return num_of_phi == 3;   
++}
++
++/* Check the evolution pattern of three phi nodes.
++   Should be one of the node +1 every time (s), one of the node -1
++   every time (n), and a 3rd one neither (c).  Return 3 phi nodes in
++   the capture with the order of s,n,c.*/
++static bool
++check_evolution_pattern (class loop *loop,  gphi *capture[])
++{
++  gphi *s = NULL;
++  gphi *n = NULL;
++  gphi *c = NULL;
++
++  for (int i = 0; i < 3; i++)
++    {
++      if (evolution_pattern_plus_with_p (loop, capture[i], 1))
++        {
++          if (s != NULL)
++            return false;
++          s = capture[i];
++          phi_s = s;
++        }
++      else if (evolution_pattern_plus_with_p (loop, capture[i], 4294967295))
++        {
++          if (n != NULL)
++            return false;
++          n = capture[i];
++        }
++      else
++        {
++          if (c != NULL)
++            return false;
++          c = capture[i];
++          phi_c = c;
++        }
++    }
++
++  // Some envolution pattern cannot find 
++  if (!n || !s || !c)
++    return false;
++
++  capture[0] = s;
++  capture[1] = n;
++  capture[2] = c;
++  return true;
++}
++/* Check the calculation pattern before and after the crc_table array read stmt.
++   _7 = crc_32_tab[_6];
++   The caculation of index _6 should be the result of a sequency of calculation
++   by the s and c 
++   The result of the array read _7 should be used to calculate the new c.  */
++static bool
++check_calculation_pattern (class loop *loop,  gphi *capture[])
++{
++  gphi *s = capture[0];
++  gphi *c = capture[2];
++  tree res_ops[3];
++  tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1);
++
++  /* Try to match
++  _4 = (int) _3; //NOP_EXPR (SSA_NAME @2)
++  _5 =  _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1)
++  _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3)
++  */
++  if (!gimple_crc_match_index (index, res_ops, NULL))
++    return false;
++  gimple *s_res_stmt = SSA_NAME_DEF_STMT (res_ops[0]);
++  if (!s_res_stmt)
++    return false;
++  gimple *s_def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (s_res_stmt));
++  if (!s_def_stmt)
++    return false;
++  tree s_res = TREE_OPERAND (gimple_assign_rhs1 (s_def_stmt), 0);
++  if (res_ops[1] != gimple_phi_result (c) || s_res != gimple_phi_result (s))
++    return false;
++
++  /* Try to match 
++  _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2)
++  c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME)
++  */
++  edge backedge = find_edge (loop->latch, loop->header);
++  tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge);
++  if (!gimple_crc_match_res (updated_c, res_ops, NULL))
++    return false;
++  if (res_ops[0] != gimple_phi_result (c)
++      || res_ops[2] != gimple_assign_lhs (crc_table_read_stmt))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\n gimple_crc_match_res pattern check failed.\n");
++      return false;
++    }
++
++  return true;
++}
++
++/* Check the exit condition is n != 0.  */
++static bool
++check_exit_condition (class loop *loop,  gphi *n)
++{
++  edge backedge = find_edge (loop->latch, loop->header);
++  gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header));
++  if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND 
++      || gimple_cond_code (cond_stmt) != NE_EXPR
++      || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge)
++      || tree_to_uhwi (gimple_cond_rhs (cond_stmt)) != 0)
++    return false;
++  
++  return  true;
++}
++
++/* Check the loop body. The loop body we are trying to match is
++
++# s_10 = PHI <s_14(D)(6), s_18(7)>
++# n_11 = PHI <n_17(D)(6), n_20(7)>
++# c_12 = PHI <c_16(6), c_19(7)>
++_1 = (int) c_12;
++s_18 = s_10 + 1;
++_3 = *s_10;
++_4 = (int) _3;
++_5 = _1 ^ _4;
++_6 = _5 & 255;
++_7 = crc_32_tab[_6];
++_8 = c_12 >> 8;
++c_19 = _7 ^ _8;
++n_20 = n_11 + 4294967295;
++if (n_20 != 0)
++  goto <bb 7>; [INV]
++else
++  goto <bb 5>; [INV]
++
++which is doing a very simple calculation
++do {
++        c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8);
++} while (--n);
++
++In this case ,we don't want this loop to have any other operation inside.
++so the matching condition is 
++1. There are only 3 loop variant during each itoration, namely s,c,n,  
++   which is limited by the condition that the loop have exactly 3 phi nodes.
++2. The 3 loop variants should have evolution pattern as 1 of the 3 nodes is
++   increased by 1 every itoration, 1 of the 3 nodes is decreased by 1 every itor
++   and the 3rd one is neither. These three tree node SSA value will be captured
++   for the later arithmatic pattern matching
++3. Pattern matching for the index of crc_table
++4. pattern matching for the result of c calcuation after read from crc_table
++5. The exit condition matching. 
++  */
++static bool
++crc_loop_body_check (class loop *loop)
++{
++  basic_block header = loop->header;
++  gphi *capture[3];
++  if (!check_num_of_phi(header, capture))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\n num of phi noeds check failed.\n");
++      return false;
++    }
++  if (!check_evolution_pattern (loop, capture))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\n evolution pattern check failed.\n");
++      return false;
++    }
++  if (!check_calculation_pattern (loop, capture))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\n calculation pattern check failed.\n");
++      return false;
++    }
++  if (!check_exit_condition (loop, capture[1]))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\n exit condition check failed.\n");
++      return false;
++    }
++  return true;
++}
++
++static bool check_prev_bb (basic_block prev_bb, enum tree_code code)
++{
++  gimple_stmt_iterator gsi;
++  gimple *stmt;
++  for (gsi = gsi_start_bb (prev_bb); !gsi_end_p (gsi);
++       gsi_next (&gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (stmt == NULL)
++        return false;
++
++      if (gimple_code (stmt) == GIMPLE_COND
++          && gimple_cond_code (stmt) == code
++          && TREE_CODE (gimple_cond_rhs (stmt)) == INTEGER_CST
++          && tree_int_cst_sgn (gimple_cond_rhs (stmt)) == 0)
++        return true;
++    }
++  return false;
++}
++
++/* Check the prev_bb of prev_bb of loop header.  The prev_bb we are trying to
++match is
++
++c_15 = crc;
++if (n_16 (D) != 0)
++  goto <bb 6>; [INV]
++else
++  goto <bb 5>; [INV]
++
++  In this case , we must be sure that the n is not zero.
++  so the match condition is
++  1 the n is not zero.
++
++  <bb 2> :
++if (s_13 (D) == 0B)
++  goto <bb 5>; [INV]
++else
++  goto <bb 3>; [INV]
++
++  In this case, we must be sure the s is not NULL.
++  so the match condition is
++  1 the s is not NULL.
++*/
++static bool
++crc_prev_bb_of_loop_header_check (class loop *loop)
++{
++  basic_block header = loop->header;
++  basic_block prev_header_bb = header->prev_bb;
++  if (NULL == prev_header_bb)
++    return false;
++
++  basic_block prev_prev_header_bb = prev_header_bb->prev_bb;
++  if (NULL == prev_prev_header_bb)
++    return false;
++
++  if (!check_prev_bb (prev_prev_header_bb, NE_EXPR))
++    return false;
++
++  basic_block first_bb = prev_prev_header_bb->prev_bb;
++  if (NULL == first_bb)
++    return false;
++
++  if (!check_prev_bb (first_bb, EQ_EXPR))
++    return false;
++
++  return true;
++}
++
++static bool
++match_crc_loop (class loop *loop)
++{
++  if (!crc_loop_form_check (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nWrong loop form for crc matching.\n");
++      return false;
++    }
++  if (!crc_table_check (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nWrong crc table for crc matching.\n");
++      return false;
++    }
++  if (!crc_loop_body_check (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nWrong loop body for crc matching.\n");
++      return false;
++    }
++  if (!crc_prev_bb_of_loop_header_check (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nWrong prev basic_blocks of loop header for"
++                            " crc matching.\n");
++      return false;
++    }
++    
++  init_origin_loop_structure ();
++  if (!get_origin_loop_info (loop))
++    return false;
++
++  return true;
++}
++
++static void
++create_new_bb (basic_block &new_bb, basic_block after_bb,
++               basic_block dominator_bb, class loop *outer)
++{
++  new_bb = create_empty_bb (after_bb);
++  add_bb_to_loop (new_bb, outer);
++  set_immediate_dominator (CDI_DOMINATORS, new_bb, dominator_bb);
++}
++
++static void
++change_preheader_bb (edge entry_edge)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  tree lhs1;
++
++  lhs1 = create_tmp_var (TREE_TYPE (origin_loop.base_n),"nn");
++  lhs1 = make_ssa_name (lhs1);
++  gsi = gsi_last_bb (entry_edge->src);
++  g = gimple_build_assign (lhs1, RSHIFT_EXPR, origin_loop.base_n,
++                           build_int_cst (TREE_TYPE (origin_loop.base_n), 2));
++  gimple_seq_add_stmt (&stmts, g);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  nn_tree = lhs1;
++  set_current_def (nn_tree, lhs1);
++  nn_map.put (entry_edge->src, lhs1);
++}
++
++static gphi *
++create_phi_node_for_bb (tree old_name, basic_block bb)
++{
++  gphi *phi = create_phi_node (NULL_TREE, bb);
++  create_new_def_for (old_name, phi, gimple_phi_result_ptr (phi));
++  return phi;
++}
++
++static gimple *
++call_builtin_fun (int code,tree &lhs, tree arg1, tree arg2)
++{
++  unsigned int builtin_code = targetm.get_crc_builtin_code (code, true);
++  // Get the decl of __builtin_aarch64_crc32w
++  tree fn = targetm.builtin_decl (builtin_code, true);
++  if (!fn || fn == error_mark_node)
++    fatal_error (input_location,
++                 "target specific builtin not available");
++  gimple *call_builtin = gimple_build_call (fn, 2, arg1, arg2);
++  lhs = make_ssa_name (unsigned_type_node);
++  gimple_call_set_lhs (call_builtin, lhs);
++
++  return call_builtin;
++}
++
++/* Create loop_header and loop_latch for new loop
++   <bb 5> :
++   # s_14 = PHI <s_23(D)(4), s_30(5)>
++   # c_16 = PHI <c_25(4), c_29(5)>
++   # nn_19 = PHI <nn_27(4), nn_31(5)>
++   _1 = (unsigned int) c_16;
++   _2 = MEM[(uint32_t *)s_14];
++   _40 = __builtin_aarch64_crc32w (_1, _2);
++   c_29 = (long unsigned int) _40;
++   s_30 = s_14 + 4;
++   nn_31 = nn_19 + 4294967295;
++   if (nn_31 != 0)
++   The IR of bb is as above.  */
++static void
++create_loop_bb (basic_block &loop_bb, basic_block after_bb,
++                basic_block dominator_bb, class loop *outer, edge entry_edge)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  gphi *phi_s_loop;
++  gphi *phi_c_loop;
++  gphi *phi_nn_loop;
++
++  create_new_bb (loop_bb, after_bb, dominator_bb, outer);
++  redirect_edge_and_branch (entry_edge, loop_bb);
++  gsi = gsi_last_bb (loop_bb);
++  tree entry_nn = get_current_def (nn_tree);
++  phi_s_loop = create_phi_node_for_bb (origin_loop.base_s, loop_bb);
++  phi_c_loop = create_phi_node_for_bb (origin_loop.base_c, loop_bb);
++  phi_nn_loop = create_phi_node_for_bb (entry_nn, loop_bb);
++
++  tree res_s = gimple_phi_result (phi_s_loop);
++  tree res_nn = gimple_phi_result (phi_nn_loop);
++  tree lhs1 = gimple_build (&stmts, NOP_EXPR, unsigned_type_node,
++                            gimple_phi_result (phi_c_loop));
++  g = gimple_build_assign (make_ssa_name (unsigned_type_node),
++                           fold_build2 (MEM_REF, unsigned_type_node, res_s,
++                                        build_int_cst (
++                                            build_pointer_type (
++                                                unsigned_type_node),0)));
++  gimple_seq_add_stmt (&stmts, g);
++  tree lhs2 = gimple_assign_lhs (g);  // _2 = MEM[(uint32_t *)s_14];
++  unsigned int code = AARCH64_BUILTIN_CRC32W;
++  tree lhs3;
++  gimple *build_crc32w = call_builtin_fun (code, lhs3, lhs1, lhs2);
++  crc_map.put (loop_bb, lhs3);
++  gimple_seq_add_stmt (&stmts, build_crc32w);
++
++  tree lhs4 = copy_ssa_name (origin_loop.base_c);
++  g = gimple_build_assign (lhs4, NOP_EXPR, lhs3);
++  gimple_seq_add_stmt (&stmts, g);
++  c_map.put (loop_bb, lhs4);
++
++  tree lhs5 = copy_ssa_name (origin_loop.base_s);
++  g = gimple_build_assign (lhs5, POINTER_PLUS_EXPR, res_s,
++                           build_int_cst (sizetype, 4));
++  gimple_seq_add_stmt (&stmts, g);
++  s_map.put (loop_bb, lhs5);
++
++  tree lhs6 = copy_ssa_name (nn_tree);
++  g = gimple_build_assign (lhs6, PLUS_EXPR, res_nn,
++                           build_int_cst (TREE_TYPE (res_nn), 4294967295));
++  gimple_seq_add_stmt (&stmts,g);
++  nn_map.put (loop_bb, lhs6);
++
++  gcond *cond_stmt = gimple_build_cond (NE_EXPR, lhs6, origin_loop.limit,
++                                        NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++}
++
++/*  <bb 6> :
++    # c_6 = PHI <c_29(5)>
++    # s_46 = PHI <s_30(5)>
++    _44 = n_26(D) & 2;
++    if (_44 != 0)
++    The IR of bb is as above.  */
++static void
++create_cond_bb (basic_block &cond_bb, basic_block after_bb,
++                basic_block dominator_bb, class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gphi *phi_s_loop;
++  gphi *phi_c_loop;
++
++  create_new_bb (cond_bb, after_bb, dominator_bb, outer);
++  gsi = gsi_last_bb (cond_bb);
++  tree entry_nn = get_current_def (nn_tree);
++  phi_s_loop = create_phi_node_for_bb (origin_loop.base_s, cond_bb);
++  phi_c_loop = create_phi_node_for_bb (origin_loop.base_c, cond_bb);
++  tree res_s = gimple_phi_result (phi_s_loop);
++  set_current_def (origin_loop.base_s, res_s);
++  s_map.put (cond_bb, res_s);
++  tree res_c = gimple_phi_result (phi_c_loop);
++  set_current_def (origin_loop.base_c, res_c);
++  c_map.put (cond_bb, res_c);
++
++  tree lhs1 = gimple_build (&stmts, BIT_AND_EXPR,
++                            TREE_TYPE (origin_loop.base_n), origin_loop.base_n,
++                            build_int_cst (TREE_TYPE (origin_loop.base_n), 2));
++  gcond *cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit,
++                                        NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++}
++
++/*  <bb 7> :
++    _7 = MEM[(uint16_t *)s_46];
++    _41 = __builtin_aarch64_crc32h (_8, _7);
++    c_33 = (long unsigned int) _41;
++    s_34 = s_30 + 2;
++    The IR of bb is as above.  */
++static void
++create_cond_true_bb (basic_block &cond_true_bb, basic_block after_bb,
++                     basic_block dominator_bb, class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple *g;
++  gimple_stmt_iterator gsi;
++
++  create_new_bb (cond_true_bb, after_bb, dominator_bb, outer);
++  gsi = gsi_last_bb (cond_true_bb);
++  tree s_46 = *(s_map.get (after_bb));
++  tree type = build_pointer_type (short_unsigned_type_node);
++  g = gimple_build_assign (make_ssa_name (short_unsigned_type_node),
++                           fold_build2 (MEM_REF, short_unsigned_type_node, s_46,
++                                        build_int_cst (type, 0)));
++  gimple_seq_add_stmt (&stmts,g);
++  tree lhs1 = gimple_assign_lhs (g);  // _7 = MEM[(uint16_t *)s_46];
++  unsigned int code = AARCH64_BUILTIN_CRC32H;
++  tree lhs2;
++  gimple *call_builtin = call_builtin_fun (code, lhs2,
++                             *(crc_map.get (
++                                  cond_true_bb->prev_bb->prev_bb)), lhs1);
++  crc_map.put (cond_true_bb,lhs2);
++  gimple_seq_add_stmt (&stmts, call_builtin);
++
++  tree lhs3 = copy_ssa_name (origin_loop.base_c);
++  g = gimple_build_assign (lhs3, NOP_EXPR, lhs2);
++  gimple_seq_add_stmt (&stmts, g);
++  c_map.put (cond_true_bb, lhs3);
++
++  tree lhs5 = copy_ssa_name (s_46);
++  g = gimple_build_assign (lhs5, POINTER_PLUS_EXPR, s_46,
++                           build_int_cst (sizetype, 2)); //  s_30 + 2;
++  gimple_seq_add_stmt (&stmts, g);
++  s_map.put (cond_true_bb, lhs5);
++
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  s_map.put (cond_true_bb, lhs5);
++}
++
++/* <bb 8> :
++  # s_15 = PHI <s_46(6), s_34(7)>
++  # c_17 = PHI <c_6(6), c_33(7)>
++  _3 = n_26(D) & 1;
++  if (_3 != 0)
++   The IR of bb is as above.  */
++static void
++create_cond_false_bb (basic_block &cond_false_bb, basic_block after_bb,
++                      basic_block dominator_bb, class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gphi *phi_s_cond_true_bb;
++  gphi *phi_c_cond_true_bb;
++
++  create_new_bb (cond_false_bb, after_bb, dominator_bb, outer);
++  make_single_succ_edge (after_bb, cond_false_bb, EDGE_FALLTHRU);
++
++  tree entry_s = get_current_def (origin_loop.base_s);
++  phi_s_cond_true_bb = create_phi_node_for_bb (entry_s, cond_false_bb);
++  tree entry_c = get_current_def (origin_loop.base_c);
++  phi_c_cond_true_bb = create_phi_node_for_bb (entry_c, cond_false_bb);
++  tree res_s = gimple_phi_result (phi_s_cond_true_bb);
++  set_current_def (origin_loop.base_s, res_s);
++  s_map.put (cond_false_bb, res_s);
++  tree res_c = gimple_phi_result (phi_c_cond_true_bb);
++  set_current_def (origin_loop.base_c, res_c);
++  c_map.put (cond_false_bb, res_c);
++
++  gsi = gsi_last_bb (cond_false_bb);
++  tree lhs1 = gimple_build (&stmts, BIT_AND_EXPR,
++                            TREE_TYPE (origin_loop.base_n), origin_loop.base_n,
++                            build_int_cst (TREE_TYPE (origin_loop.base_n), 1));
++  gcond *cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit,
++                                        NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++}
++
++/*  <bb 9> :
++  _11 = (unsigned int) c_17;
++  _12 = *s_15;
++  _42 = __builtin_aarch64_crc32b (_11, _12);
++  c_36 = (long unsigned int) _42;
++  The IR of bb is as above.  */
++static void
++create_lastcond_true_bb (basic_block &new_bb, basic_block after_bb,
++                         basic_block dominator_bb, class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++
++  create_new_bb (new_bb, after_bb, dominator_bb, outer);
++  gsi = gsi_last_bb (new_bb);
++
++  tree lhs1 = gimple_build (&stmts, NOP_EXPR, unsigned_type_node,
++                            get_current_def (origin_loop.base_c));
++  tree lhs2;
++  tree s_15 = get_current_def (origin_loop.base_s);
++  g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
++                           fold_build2 (MEM_REF, unsigned_char_type_node, s_15,
++                                        build_int_cst (TREE_TYPE (s_15), 0)));
++  gimple_seq_add_stmt (&stmts, g);
++  lhs2 = gimple_assign_lhs (g);
++
++  unsigned int code = AARCH64_BUILTIN_CRC32B;
++  tree lhs3;
++  gimple *call_builtin = call_builtin_fun (code, lhs3, lhs1, lhs2);
++  crc_map.put (new_bb,lhs3);
++  gimple_seq_add_stmt (&stmts,call_builtin);
++
++  tree lhs4 = copy_ssa_name (origin_loop.base_c);
++  g = gimple_build_assign (lhs4, NOP_EXPR, lhs3);
++  gimple_seq_add_stmt (&stmts, g);
++  c_map.put (new_bb, lhs4);
++
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++}
++
++static bool
++optional_add_phi_arg (gphi * phi, tree phi_res, tree phi_arg, edge e)
++{
++  location_t loc;
++  if (same_ssa_name_var_p (phi_arg, phi_res))
++    {
++      if (virtual_operand_p (phi_arg))
++        loc = UNKNOWN_LOCATION;
++      else
++        loc = gimple_location (SSA_NAME_DEF_STMT (phi_arg));
++      add_phi_arg (phi, phi_arg, e, loc);
++
++      return true;
++    }
++
++  return false;
++}
++
++/* Add phi_arg for bb with phi node.  */
++static void
++update_phi_nodes (basic_block bb)
++{
++  edge e;
++  edge_iterator ei;
++  gphi *phi;
++  gphi_iterator gsi;
++  tree res;
++
++  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      phi = gsi.phi ();
++      res = gimple_phi_result (phi);
++
++      FOR_EACH_EDGE (e, ei, bb->preds)
++      {
++        if (PHI_ARG_DEF_FROM_EDGE (phi, e))
++          continue;
++        tree var_c;
++        tree *ptr_var_c = c_map.get (e->src);
++        if (ptr_var_c == NULL)
++          var_c = origin_loop.base_c;
++        else
++          var_c = *ptr_var_c;
++        if (optional_add_phi_arg (phi, res, var_c, e))
++          continue;
++
++        tree var_nn;
++        tree *ptr_var_nn = nn_map.get (e->src);
++        if (ptr_var_nn == NULL)
++          var_nn = nn_tree;
++        else
++          var_nn = *ptr_var_nn;
++        if (optional_add_phi_arg (phi, res, var_nn, e))
++          continue;
++
++        tree var_s;
++        tree *ptr_var_s = s_map.get (e->src);
++        if (ptr_var_s == NULL)
++          var_s = origin_loop.base_s;
++        else
++          var_s = *ptr_var_s;
++        if (optional_add_phi_arg (phi, res, var_s, e))
++          continue;
++      }
++    }
++}
++
++static void
++create_new_loops (edge entry_edge)
++{
++  class loop *new_loop = NULL;
++  basic_block loop_bb, cond_bb, cond_true_bb, cond_false_bb, lastcond_true_bb;
++  class loop *outer = entry_edge->src->loop_father;
++  change_preheader_bb (entry_edge);
++
++  create_loop_bb (loop_bb, entry_edge->src, entry_edge->src, outer, entry_edge);
++  create_cond_bb (cond_bb, loop_bb, loop_bb, outer);
++  make_edge (loop_bb, loop_bb, EDGE_TRUE_VALUE);
++  make_edge (loop_bb, cond_bb, EDGE_FALSE_VALUE);
++  update_phi_nodes (loop_bb);
++
++  new_loop = alloc_loop ();
++  new_loop->header = loop_bb;
++  new_loop->latch = loop_bb;
++  add_loop (new_loop, outer);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nPrint byte new loop %d:\n", new_loop->num);
++      flow_loop_dump (new_loop, dump_file, NULL, 1);
++      fprintf (dump_file, "\n\n");
++    }
++
++  create_cond_true_bb (cond_true_bb, cond_bb, cond_bb, outer);
++  make_edge (cond_bb, cond_true_bb, EDGE_TRUE_VALUE);
++  create_cond_false_bb (cond_false_bb, cond_true_bb, cond_bb, outer);
++  make_edge (cond_bb, cond_false_bb, EDGE_FALSE_VALUE);
++  update_phi_nodes (cond_bb);
++  update_phi_nodes (cond_false_bb);
++  create_lastcond_true_bb (lastcond_true_bb, cond_false_bb,
++                           cond_false_bb, outer);
++  make_edge (cond_false_bb, lastcond_true_bb, EDGE_TRUE_VALUE);
++  make_edge (cond_false_bb, origin_loop.exit_bb, EDGE_FALSE_VALUE);
++  make_single_succ_edge (lastcond_true_bb, origin_loop.exit_bb, EDGE_FALLTHRU);
++
++  update_phi_nodes (origin_loop.exit_bb);
++  remove_edge (origin_loop.exit_edge);
++}
++
++/* Clear information about the original loop.  */
++static void
++remove_origin_loop (class loop *loop)
++{
++  basic_block *body = get_loop_body_in_dom_order (loop);
++  unsigned n = loop->num_nodes;
++  for (int i = 0; i < n; ++i)
++    delete_basic_block (body[i]);
++  free (body);
++  delete_loop (loop);
++}
++
++/* Make sure that the dominance relationship of the newly inserted cfg
++   is not missing.  */
++static void
++update_loop_dominator (cdi_direction dir)
++{
++  gcc_assert (dom_info_available_p (dir));
++
++  basic_block bb;
++  FOR_EACH_BB_FN (bb, cfun)
++  {
++    basic_block imm_bb = get_immediate_dominator (dir, bb);
++    if (!imm_bb || bb == origin_loop.exit_bb)
++      {
++        set_immediate_dominator (CDI_DOMINATORS, bb,
++                               recompute_dominator (CDI_DOMINATORS, bb));
++        continue;
++      }
++  }
++}
++
++/* Perform the conversion of origin_loop to new_loop.  */
++static void
++convert_to_new_loop (class loop *loop)
++{
++  create_new_loops (origin_loop.entry_edge);
++  remove_origin_loop (loop);
++  update_loop_dominator (CDI_DOMINATORS);
++  update_ssa (TODO_update_ssa);
++}
++
++/* The main entry of loop crc optimizes.  */
++static unsigned int
++tree_ssa_loop_crc ()
++{
++  if (TARGET_CRC32 == false)
++    {
++      warning (OPT____,"The loop-crc optimization is not working." \
++                      "You should make sure that the specified architecture" \
++                      "supports crc:-march=armv8.1-a");
++      return 0;
++    }
++  unsigned int todo = 0;
++  class loop *loop;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      flow_loops_dump (dump_file, NULL, 1);
++      fprintf (dump_file, "\nStarting the loop_crc pass\n");
++    }
++
++  enum li_flags LI = LI_FROM_INNERMOST;
++  for (auto loop : loops_list (cfun, LI))
++  {
++    if (dump_file && (dump_flags & TDF_DETAILS))
++      {
++        fprintf (dump_file, "======================================\n");
++        fprintf (dump_file, "Processing loop %d:\n", loop->num);
++        fprintf (dump_file, "======================================\n");
++        flow_loop_dump (loop, dump_file, NULL, 1);
++        fprintf (dump_file, "\n\n");
++      }
++
++    if (match_crc_loop (loop))
++      {
++        if (dump_file && (dump_flags & TDF_DETAILS))
++          {
++             fprintf (dump_file, "The %dth loop form is success matched,"
++                                "and the loop can be optimized.\n",
++                     loop->num);
++          }
++
++        convert_to_new_loop (loop);
++        todo |= (TODO_update_ssa);
++      }
++  }
++  return todo;
++}
++
++/* Loop crc.  */
++
++namespace {
++const pass_data pass_data_tree_loop_crc =
++{
++  GIMPLE_PASS,
++  "loop_crc",
++  OPTGROUP_LOOP,
++  TV_TREE_LOOP_CRC,
++  (PROP_cfg | PROP_ssa),
++  0,
++  0,
++  0,
++  (TODO_update_ssa | TODO_verify_all)
++};
++
++class pass_loop_crc : public gimple_opt_pass
++{
++public:
++  pass_loop_crc (gcc::context *ctxt)
++    : gimple_opt_pass (pass_data_tree_loop_crc, ctxt)
++  {}
++
++  /* Opt_pass methods: */
++  virtual bool gate (function *);
++  virtual unsigned int execute (function *);
++}; // Class pass_loop_crc
++
++bool
++pass_loop_crc::gate (function *)
++{
++  return (flag_loop_crc > 0 && optimize >= 3);
++}
++
++unsigned int
++pass_loop_crc::execute (function *fun)
++{
++  if (number_of_loops (fun) <= 1)
++    return 0;
++
++  /* Only supports LP64 data mode.  */
++  if (TYPE_PRECISION (long_integer_type_node) != 64
++      || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "The current data mode is not supported,"
++                            "only the LP64 date mode is supported.\n");
++      return 0;
++    }
++
++  return tree_ssa_loop_crc ();
++}
++
++} // Anon namespace
++
++gimple_opt_pass *
++make_pass_loop_crc (gcc::context *ctxt)
++{
++  return new pass_loop_crc (ctxt);
++}
+-- 
+2.33.0
+
diff --git a/0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch b/0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch
new file mode 100644
index 0000000000000000000000000000000000000000..5589f82dbb1c1b19cf8b1a12816e2f7e0e2b5cf3
--- /dev/null
+++ b/0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch
@@ -0,0 +1,34 @@
+From 72c48ade495ef99ef032a6c44365eb102b74888e Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Fri, 23 Aug 2024 15:14:04 +0800
+Subject: [PATCH 004/157] [SME] Remove hip09 and hip11 in aarch64-cores.def to
+ backport SME
+
+Will apply it in the end.
+---
+ gcc/config/aarch64/aarch64-cores.def | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index 601b72abb..70b11eb80 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -130,7 +130,6 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
+ 
+ /* HiSilicon ('H') cores. */
+ AARCH64_CORE("tsv110",  tsv110, tsv110, 8_2A,  AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110,   0x48, 0xd01, -1)
+-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
+ 
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+@@ -173,7 +172,6 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 |
+ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, 9A,  AARCH64_FL_FOR_ARCH9 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_MEMTAG | AARCH64_FL_I8MM | AARCH64_FL_BF16, neoversen2, 0x41, 0xd48, -1)
+ 
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversen2, 0x41, 0xd49, -1)
+-AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
+ 
+ AARCH64_CORE("demeter", demeter, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+ AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, 9A, AARCH64_FL_FOR_ARCH9 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG | AARCH64_FL_PROFILE, neoversev2, 0x41, 0xd4f, -1)
+-- 
+2.33.0
+
diff --git a/0097-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch b/0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
similarity index 100%
rename from 0097-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
rename to 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
diff --git a/0098-Backport-SME-AArch64-Cleanup-option-processing-code.patch b/0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch
similarity index 100%
rename from 0098-Backport-SME-AArch64-Cleanup-option-processing-code.patch
rename to 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch
diff --git a/0099-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch b/0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
similarity index 100%
rename from 0099-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
rename to 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
diff --git a/0100-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch b/0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
similarity index 100%
rename from 0100-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
rename to 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
diff --git a/0101-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch b/0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
similarity index 100%
rename from 0101-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
rename to 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
diff --git a/0102-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch b/0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
similarity index 100%
rename from 0102-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
rename to 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
diff --git a/0103-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch b/0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
similarity index 100%
rename from 0103-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
rename to 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
diff --git a/0104-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch b/0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
similarity index 100%
rename from 0104-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
rename to 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
diff --git a/0105-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch b/0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
similarity index 100%
rename from 0105-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
rename to 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
diff --git a/0106-Backport-SME-aarch64-Small-config.gcc-cleanups.patch b/0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
similarity index 100%
rename from 0106-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
rename to 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
diff --git a/0107-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch b/0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
similarity index 100%
rename from 0107-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
rename to 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
diff --git a/0108-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch b/0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
similarity index 100%
rename from 0108-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
rename to 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
diff --git a/0109-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch b/0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
similarity index 100%
rename from 0109-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
rename to 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
diff --git a/0110-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch b/0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
similarity index 100%
rename from 0110-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
rename to 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
diff --git a/0111-Backport-SME-aarch64-Simplify-feature-definitions.patch b/0118-Backport-SME-aarch64-Simplify-feature-definitions.patch
similarity index 100%
rename from 0111-Backport-SME-aarch64-Simplify-feature-definitions.patch
rename to 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch
diff --git a/0112-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch b/0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
similarity index 100%
rename from 0112-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
rename to 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
diff --git a/0113-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch b/0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
similarity index 100%
rename from 0113-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
rename to 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
diff --git a/0114-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch b/0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
similarity index 100%
rename from 0114-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
rename to 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
diff --git a/0115-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch b/0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
similarity index 100%
rename from 0115-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
rename to 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
diff --git a/0116-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch b/0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
similarity index 100%
rename from 0116-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
rename to 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
diff --git a/0117-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch b/0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
similarity index 100%
rename from 0117-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
rename to 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
diff --git a/0118-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch b/0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
similarity index 100%
rename from 0118-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
rename to 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
diff --git a/0119-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch b/0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
similarity index 100%
rename from 0119-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
rename to 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
diff --git a/0120-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch b/0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
similarity index 100%
rename from 0120-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
rename to 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
diff --git a/0121-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch b/0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
similarity index 100%
rename from 0121-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
rename to 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
diff --git a/0122-Backport-SME-aarch64-Commonise-some-folding-code.patch b/0129-Backport-SME-aarch64-Commonise-some-folding-code.patch
similarity index 100%
rename from 0122-Backport-SME-aarch64-Commonise-some-folding-code.patch
rename to 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch
diff --git a/0123-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch b/0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
similarity index 100%
rename from 0123-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
rename to 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
diff --git a/0124-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch b/0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
similarity index 100%
rename from 0124-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
rename to 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
diff --git a/0125-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch b/0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
similarity index 100%
rename from 0125-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
rename to 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
diff --git a/0126-Backport-SME-mode-switching-Add-note-problem.patch b/0133-Backport-SME-mode-switching-Add-note-problem.patch
similarity index 100%
rename from 0126-Backport-SME-mode-switching-Add-note-problem.patch
rename to 0133-Backport-SME-mode-switching-Add-note-problem.patch
diff --git a/0127-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch b/0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
similarity index 100%
rename from 0127-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
rename to 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
diff --git a/0128-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch b/0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
similarity index 100%
rename from 0128-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
rename to 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
diff --git a/0129-Backport-SME-mode-switching-Simplify-recording-of-tr.patch b/0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
similarity index 100%
rename from 0129-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
rename to 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
diff --git a/0130-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch b/0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
similarity index 100%
rename from 0130-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
rename to 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
diff --git a/0131-Backport-SME-mode-switching-Allow-targets-to-set-the.patch b/0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
similarity index 100%
rename from 0131-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
rename to 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
diff --git a/0132-Backport-SME-mode-switching-Pass-set-of-live-registe.patch b/0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
similarity index 100%
rename from 0132-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
rename to 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
diff --git a/0133-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch b/0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
similarity index 100%
rename from 0133-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
rename to 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
diff --git a/0134-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch b/0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
similarity index 100%
rename from 0134-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
rename to 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
diff --git a/0135-Backport-SME-mode-switching-Add-a-target-configurabl.patch b/0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch
similarity index 100%
rename from 0135-Backport-SME-mode-switching-Add-a-target-configurabl.patch
rename to 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch
diff --git a/0136-Backport-SME-mode-switching-Add-a-backprop-hook.patch b/0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch
similarity index 100%
rename from 0136-Backport-SME-mode-switching-Add-a-backprop-hook.patch
rename to 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch
diff --git a/0137-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch b/0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
similarity index 100%
rename from 0137-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
rename to 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
diff --git a/0138-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch b/0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
similarity index 100%
rename from 0138-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
rename to 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
diff --git a/0139-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch b/0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
similarity index 100%
rename from 0139-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
rename to 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
diff --git a/0140-Backport-SME-function-Change-return-type-of-predicat.patch b/0147-Backport-SME-function-Change-return-type-of-predicat.patch
similarity index 100%
rename from 0140-Backport-SME-function-Change-return-type-of-predicat.patch
rename to 0147-Backport-SME-function-Change-return-type-of-predicat.patch
diff --git a/0141-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch b/0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
similarity index 100%
rename from 0141-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
rename to 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
diff --git a/0142-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch b/0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
similarity index 100%
rename from 0142-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
rename to 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
diff --git a/0143-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch b/0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
similarity index 100%
rename from 0143-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
rename to 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
diff --git a/0144-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch b/0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
similarity index 100%
rename from 0144-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
rename to 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
diff --git a/0145-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch b/0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
similarity index 100%
rename from 0145-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
rename to 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
diff --git a/0146-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch b/0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
similarity index 100%
rename from 0146-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
rename to 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
diff --git a/0147-Backport-SME-recog-Support-space-in-cons.patch b/0154-Backport-SME-recog-Support-space-in-cons.patch
similarity index 100%
rename from 0147-Backport-SME-recog-Support-space-in-cons.patch
rename to 0154-Backport-SME-recog-Support-space-in-cons.patch
diff --git a/0148-Backport-SME-aarch64-Generalise-require_immediate_la.patch b/0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch
similarity index 100%
rename from 0148-Backport-SME-aarch64-Generalise-require_immediate_la.patch
rename to 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch
diff --git a/0149-Backport-SME-aarch64-Add-backend-support-for-DFP.patch b/0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
similarity index 100%
rename from 0149-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
rename to 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
diff --git a/0150-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch b/0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
similarity index 100%
rename from 0150-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
rename to 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
diff --git a/0151-Backport-SME-aarch64-Simplify-output-template-emissi.patch b/0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch
similarity index 100%
rename from 0151-Backport-SME-aarch64-Simplify-output-template-emissi.patch
rename to 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch
diff --git a/0152-Backport-SME-Improve-immediate-expansion-PR106583.patch b/0159-Backport-SME-Improve-immediate-expansion-PR106583.patch
similarity index 100%
rename from 0152-Backport-SME-Improve-immediate-expansion-PR106583.patch
rename to 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch
diff --git a/0153-Backport-SME-AArch64-Cleanup-move-immediate-code.patch b/0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
similarity index 100%
rename from 0153-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
rename to 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
diff --git a/0154-Backport-SME-AArch64-convert-some-patterns-to-compac.patch b/0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
similarity index 100%
rename from 0154-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
rename to 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
diff --git a/0155-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch b/0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
similarity index 100%
rename from 0155-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
rename to 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
diff --git a/0156-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch b/0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
similarity index 100%
rename from 0156-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
rename to 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
diff --git a/0157-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch b/0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
similarity index 100%
rename from 0157-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
rename to 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
diff --git a/0158-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch b/0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
similarity index 100%
rename from 0158-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
rename to 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
diff --git a/0159-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch b/0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
similarity index 100%
rename from 0159-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
rename to 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
diff --git a/0160-Backport-SME-aarch64-Replace-vague-previous-argument.patch b/0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch
similarity index 100%
rename from 0160-Backport-SME-aarch64-Replace-vague-previous-argument.patch
rename to 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch
diff --git a/0161-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch b/0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
similarity index 100%
rename from 0161-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
rename to 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
diff --git a/0162-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch b/0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
similarity index 100%
rename from 0162-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
rename to 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
diff --git a/0163-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch b/0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
similarity index 100%
rename from 0163-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
rename to 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
diff --git a/0164-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch b/0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
similarity index 100%
rename from 0164-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
rename to 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
diff --git a/0165-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch b/0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
similarity index 100%
rename from 0165-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
rename to 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
diff --git a/0166-Backport-SME-aarch64-Fix-plugin-header-install.patch b/0173-Backport-SME-aarch64-Fix-plugin-header-install.patch
similarity index 100%
rename from 0166-Backport-SME-aarch64-Fix-plugin-header-install.patch
rename to 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch
diff --git a/0167-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch b/0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
similarity index 100%
rename from 0167-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
rename to 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
diff --git a/0168-Backport-SME-aarch64-Add-sme.patch b/0175-Backport-SME-aarch64-Add-sme.patch
similarity index 100%
rename from 0168-Backport-SME-aarch64-Add-sme.patch
rename to 0175-Backport-SME-aarch64-Add-sme.patch
diff --git a/0169-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch b/0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
similarity index 100%
rename from 0169-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
rename to 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
diff --git a/0170-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch b/0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
similarity index 100%
rename from 0170-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
rename to 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
diff --git a/0171-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch b/0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
similarity index 100%
rename from 0171-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
rename to 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
diff --git a/0172-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch b/0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
similarity index 100%
rename from 0172-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
rename to 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
diff --git a/0173-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch b/0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
similarity index 100%
rename from 0173-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
rename to 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
diff --git a/0174-Backport-SME-AArch64-Support-new-tbranch-optab.patch b/0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
similarity index 100%
rename from 0174-Backport-SME-AArch64-Support-new-tbranch-optab.patch
rename to 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
diff --git a/0175-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch b/0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
similarity index 100%
rename from 0175-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
rename to 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
diff --git a/0176-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch b/0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
similarity index 100%
rename from 0176-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
rename to 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
diff --git a/0177-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch b/0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
similarity index 100%
rename from 0177-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
rename to 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
diff --git a/0178-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch b/0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
similarity index 100%
rename from 0178-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
rename to 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
diff --git a/0179-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch b/0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
similarity index 100%
rename from 0179-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
rename to 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
diff --git a/0180-Backport-SME-aarch64-Robustify-stack-tie-handling.patch b/0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
similarity index 100%
rename from 0180-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
rename to 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
diff --git a/0181-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch b/0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
similarity index 100%
rename from 0181-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
rename to 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
diff --git a/0182-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch b/0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
similarity index 100%
rename from 0182-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
rename to 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
diff --git a/0183-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch b/0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
similarity index 100%
rename from 0183-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
rename to 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
diff --git a/0184-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch b/0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
similarity index 100%
rename from 0184-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
rename to 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
diff --git a/0185-Backport-SME-aarch64-Tweak-frame_size-comment.patch b/0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch
similarity index 100%
rename from 0185-Backport-SME-aarch64-Tweak-frame_size-comment.patch
rename to 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch
diff --git a/0186-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch b/0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
similarity index 100%
rename from 0186-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
rename to 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
diff --git a/0187-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch b/0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
similarity index 100%
rename from 0187-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
rename to 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
diff --git a/0188-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch b/0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
similarity index 100%
rename from 0188-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
rename to 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
diff --git a/0189-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch b/0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
similarity index 100%
rename from 0189-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
rename to 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
diff --git a/0190-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch b/0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
similarity index 100%
rename from 0190-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
rename to 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
diff --git a/0191-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch b/0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
similarity index 100%
rename from 0191-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
rename to 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
diff --git a/0192-Backport-SME-aarch64-Explicitly-record-probe-registe.patch b/0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
similarity index 100%
rename from 0192-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
rename to 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
diff --git a/0193-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch b/0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
similarity index 100%
rename from 0193-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
rename to 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
diff --git a/0194-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch b/0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
similarity index 100%
rename from 0194-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
rename to 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
diff --git a/0195-Backport-SME-Handle-epilogues-that-contain-jumps.patch b/0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
similarity index 100%
rename from 0195-Backport-SME-Handle-epilogues-that-contain-jumps.patch
rename to 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
diff --git a/0196-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch b/0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
similarity index 100%
rename from 0196-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
rename to 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
diff --git a/0197-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch b/0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
similarity index 100%
rename from 0197-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
rename to 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
diff --git a/0198-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch b/0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
similarity index 100%
rename from 0198-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
rename to 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
diff --git a/0199-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch b/0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
similarity index 100%
rename from 0199-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
rename to 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
diff --git a/0200-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch b/0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
similarity index 100%
rename from 0200-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
rename to 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
diff --git a/0201-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch b/0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
similarity index 100%
rename from 0201-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
rename to 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
diff --git a/0202-Backport-SME-aarch64-Generalise-unspec_based_functio.patch b/0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
similarity index 100%
rename from 0202-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
rename to 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
diff --git a/0203-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch b/0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
similarity index 100%
rename from 0203-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
rename to 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
diff --git a/0204-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch b/0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
similarity index 100%
rename from 0204-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
rename to 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
diff --git a/0205-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch b/0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
similarity index 100%
rename from 0205-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
rename to 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
diff --git a/0206-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch b/0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
similarity index 100%
rename from 0206-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
rename to 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
diff --git a/0207-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch b/0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
similarity index 100%
rename from 0207-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
rename to 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
diff --git a/0208-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch b/0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
similarity index 100%
rename from 0208-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
rename to 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
diff --git a/0209-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch b/0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
similarity index 100%
rename from 0209-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
rename to 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
diff --git a/0210-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch b/0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
similarity index 100%
rename from 0210-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
rename to 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
diff --git a/0211-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch b/0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
similarity index 100%
rename from 0211-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
rename to 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
diff --git a/0212-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch b/0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
similarity index 100%
rename from 0212-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
rename to 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
diff --git a/0213-Backport-SME-libgcc-Fix-config.in.patch b/0220-Backport-SME-libgcc-Fix-config.in.patch
similarity index 100%
rename from 0213-Backport-SME-libgcc-Fix-config.in.patch
rename to 0220-Backport-SME-libgcc-Fix-config.in.patch
diff --git a/0214-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch b/0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
similarity index 100%
rename from 0214-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
rename to 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
diff --git a/0215-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch b/0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
similarity index 100%
rename from 0215-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
rename to 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
diff --git a/0216-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch b/0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
similarity index 100%
rename from 0216-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
rename to 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
diff --git a/0217-Backport-SME-aarch64-Add-V1DI-mode.patch b/0224-Backport-SME-aarch64-Add-V1DI-mode.patch
similarity index 100%
rename from 0217-Backport-SME-aarch64-Add-V1DI-mode.patch
rename to 0224-Backport-SME-aarch64-Add-V1DI-mode.patch
diff --git a/0218-Backport-SME-Allow-md-iterators-to-include-other-ite.patch b/0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
similarity index 100%
rename from 0218-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
rename to 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
diff --git a/0219-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch b/0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
similarity index 100%
rename from 0219-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
rename to 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
diff --git a/0220-Backport-SME-attribs-Add-overloads-with-namespace-na.patch b/0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
similarity index 100%
rename from 0220-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
rename to 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
diff --git a/0221-Backport-SME-vec-Add-array_slice-constructors-from-n.patch b/0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
similarity index 100%
rename from 0221-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
rename to 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
diff --git a/0222-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch b/0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
similarity index 100%
rename from 0222-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
rename to 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
diff --git a/0223-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch b/0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
similarity index 100%
rename from 0223-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
rename to 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
diff --git a/0224-SME-Add-missing-header-file-in-aarch64.cc.patch b/0231-SME-Add-missing-header-file-in-aarch64.cc.patch
similarity index 100%
rename from 0224-SME-Add-missing-header-file-in-aarch64.cc.patch
rename to 0231-SME-Add-missing-header-file-in-aarch64.cc.patch
diff --git a/0225-Backport-SME-c-Add-support-for-__extension__.patch b/0232-Backport-SME-c-Add-support-for-__extension__.patch
similarity index 100%
rename from 0225-Backport-SME-c-Add-support-for-__extension__.patch
rename to 0232-Backport-SME-c-Add-support-for-__extension__.patch
diff --git a/0226-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch b/0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
similarity index 100%
rename from 0226-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
rename to 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
diff --git a/0227-Backport-SME-c-Support-C2x-empty-initializer-braces.patch b/0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
similarity index 100%
rename from 0227-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
rename to 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
diff --git a/0228-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch b/0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
similarity index 100%
rename from 0228-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
rename to 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
diff --git a/0229-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch b/0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
similarity index 100%
rename from 0229-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
rename to 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
diff --git a/0230-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch b/0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
similarity index 100%
rename from 0230-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
rename to 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
diff --git a/0231-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch b/0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
similarity index 100%
rename from 0231-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
rename to 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
diff --git a/0232-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch b/0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
similarity index 100%
rename from 0232-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
rename to 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
diff --git a/0233-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch b/0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
similarity index 100%
rename from 0233-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
rename to 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
diff --git a/0234-Backport-SME-aarch64-Remove-expected-error-for-compo.patch b/0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
similarity index 100%
rename from 0234-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
rename to 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
diff --git a/0235-Backport-SME-aarch64-Remove-redundant-builtins-code.patch b/0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
similarity index 100%
rename from 0235-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
rename to 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
diff --git a/0236-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch b/0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
similarity index 100%
rename from 0236-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
rename to 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
diff --git a/0237-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch b/0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
similarity index 100%
rename from 0237-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
rename to 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
diff --git a/0238-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch b/0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
similarity index 100%
rename from 0238-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
rename to 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
diff --git a/0239-Backport-SME-explow-Allow-dynamic-allocations-after-.patch b/0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
similarity index 100%
rename from 0239-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
rename to 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
diff --git a/0240-Backport-SME-PR105169-Fix-references-to-discarded-se.patch b/0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
similarity index 100%
rename from 0240-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
rename to 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
diff --git a/0241-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch b/0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
similarity index 100%
rename from 0241-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
rename to 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
diff --git a/0242-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch b/0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
similarity index 100%
rename from 0242-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
rename to 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
diff --git a/0243-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch b/0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
similarity index 100%
rename from 0243-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
rename to 0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
diff --git a/0244-SME-Adapt-some-testsuites.patch b/0251-SME-Adapt-some-testsuites.patch
similarity index 100%
rename from 0244-SME-Adapt-some-testsuites.patch
rename to 0251-SME-Adapt-some-testsuites.patch
diff --git a/0245-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch b/0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
similarity index 100%
rename from 0245-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
rename to 0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
diff --git a/0246-aarch64-Fix-return-register-handling-in-untyped_call.patch b/0253-aarch64-Fix-return-register-handling-in-untyped_call.patch
similarity index 100%
rename from 0246-aarch64-Fix-return-register-handling-in-untyped_call.patch
rename to 0253-aarch64-Fix-return-register-handling-in-untyped_call.patch
diff --git a/0247-aarch64-Fix-loose-ldpstp-check.patch b/0254-aarch64-Fix-loose-ldpstp-check.patch
similarity index 100%
rename from 0247-aarch64-Fix-loose-ldpstp-check.patch
rename to 0254-aarch64-Fix-loose-ldpstp-check.patch
diff --git a/0248-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch b/0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
similarity index 100%
rename from 0248-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
rename to 0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
diff --git a/0249-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch b/0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
similarity index 100%
rename from 0249-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
rename to 0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
diff --git a/0250-Make-option-mvzeroupper-independent-of-optimization-.patch b/0257-Make-option-mvzeroupper-independent-of-optimization-.patch
similarity index 100%
rename from 0250-Make-option-mvzeroupper-independent-of-optimization-.patch
rename to 0257-Make-option-mvzeroupper-independent-of-optimization-.patch
diff --git a/0251-i386-Sync-tune_string-with-arch_string-for-target-at.patch b/0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
similarity index 100%
rename from 0251-i386-Sync-tune_string-with-arch_string-for-target-at.patch
rename to 0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
diff --git a/0252-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch b/0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
similarity index 100%
rename from 0252-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
rename to 0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
diff --git a/0253-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch b/0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
similarity index 100%
rename from 0253-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
rename to 0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
diff --git a/0254-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch b/0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
similarity index 100%
rename from 0254-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
rename to 0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
diff --git a/0255-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch b/0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
similarity index 100%
rename from 0255-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
rename to 0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
diff --git a/0256-Software-mitigation-Disable-gather-generation-in-vec.patch b/0263-Software-mitigation-Disable-gather-generation-in-vec.patch
similarity index 100%
rename from 0256-Software-mitigation-Disable-gather-generation-in-vec.patch
rename to 0263-Software-mitigation-Disable-gather-generation-in-vec.patch
diff --git a/0257-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch b/0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
similarity index 100%
rename from 0257-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
rename to 0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
diff --git a/0258-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch b/0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
similarity index 100%
rename from 0258-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
rename to 0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
diff --git a/0259-Disparage-slightly-for-the-alternative-which-move-DF.patch b/0266-Disparage-slightly-for-the-alternative-which-move-DF.patch
similarity index 100%
rename from 0259-Disparage-slightly-for-the-alternative-which-move-DF.patch
rename to 0266-Disparage-slightly-for-the-alternative-which-move-DF.patch
diff --git a/0260-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch b/0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
similarity index 100%
rename from 0260-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
rename to 0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
diff --git a/0261-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch b/0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
similarity index 100%
rename from 0261-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
rename to 0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
diff --git a/0262-Disable-FMADD-in-chains-for-Zen4-and-generic.patch b/0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
similarity index 100%
rename from 0262-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
rename to 0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
diff --git a/0263-Initial-Raptorlake-Support.patch b/0270-Initial-Raptorlake-Support.patch
similarity index 100%
rename from 0263-Initial-Raptorlake-Support.patch
rename to 0270-Initial-Raptorlake-Support.patch
diff --git a/0264-Initial-Meteorlake-Support.patch b/0271-Initial-Meteorlake-Support.patch
similarity index 100%
rename from 0264-Initial-Meteorlake-Support.patch
rename to 0271-Initial-Meteorlake-Support.patch
diff --git a/0265-Support-Intel-AMX-FP16-ISA.patch b/0272-Support-Intel-AMX-FP16-ISA.patch
similarity index 100%
rename from 0265-Support-Intel-AMX-FP16-ISA.patch
rename to 0272-Support-Intel-AMX-FP16-ISA.patch
diff --git a/0266-Support-Intel-prefetchit0-t1.patch b/0273-Support-Intel-prefetchit0-t1.patch
similarity index 100%
rename from 0266-Support-Intel-prefetchit0-t1.patch
rename to 0273-Support-Intel-prefetchit0-t1.patch
diff --git a/0267-Initial-Granite-Rapids-Support.patch b/0274-Initial-Granite-Rapids-Support.patch
similarity index 100%
rename from 0267-Initial-Granite-Rapids-Support.patch
rename to 0274-Initial-Granite-Rapids-Support.patch
diff --git a/0268-Support-Intel-AMX-COMPLEX.patch b/0275-Support-Intel-AMX-COMPLEX.patch
similarity index 100%
rename from 0268-Support-Intel-AMX-COMPLEX.patch
rename to 0275-Support-Intel-AMX-COMPLEX.patch
diff --git a/0269-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch b/0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
similarity index 100%
rename from 0269-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
rename to 0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
diff --git a/0270-Initial-Granite-Rapids-D-Support.patch b/0277-Initial-Granite-Rapids-D-Support.patch
similarity index 100%
rename from 0270-Initial-Granite-Rapids-D-Support.patch
rename to 0277-Initial-Granite-Rapids-D-Support.patch
diff --git a/0271-Correct-Granite-Rapids-D-documentation.patch b/0278-Correct-Granite-Rapids-D-documentation.patch
similarity index 100%
rename from 0271-Correct-Granite-Rapids-D-documentation.patch
rename to 0278-Correct-Granite-Rapids-D-documentation.patch
diff --git a/0272-i386-Remove-Meteorlake-s-family_model.patch b/0279-i386-Remove-Meteorlake-s-family_model.patch
similarity index 100%
rename from 0272-i386-Remove-Meteorlake-s-family_model.patch
rename to 0279-i386-Remove-Meteorlake-s-family_model.patch
diff --git a/0273-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch b/0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
similarity index 100%
rename from 0273-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
rename to 0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
diff --git a/0274-x86-Update-model-values-for-Raptorlake.patch b/0281-x86-Update-model-values-for-Raptorlake.patch
similarity index 100%
rename from 0274-x86-Update-model-values-for-Raptorlake.patch
rename to 0281-x86-Update-model-values-for-Raptorlake.patch
diff --git a/0275-Fix-target_clone-arch-graniterapids-d.patch b/0282-Fix-target_clone-arch-graniterapids-d.patch
similarity index 100%
rename from 0275-Fix-target_clone-arch-graniterapids-d.patch
rename to 0282-Fix-target_clone-arch-graniterapids-d.patch
diff --git a/0276-i386-Change-prefetchi-output-template.patch b/0283-i386-Change-prefetchi-output-template.patch
similarity index 100%
rename from 0276-i386-Change-prefetchi-output-template.patch
rename to 0283-i386-Change-prefetchi-output-template.patch
diff --git a/0277-i386-Add-non-optimize-prefetchi-intrins.patch b/0284-i386-Add-non-optimize-prefetchi-intrins.patch
similarity index 100%
rename from 0277-i386-Add-non-optimize-prefetchi-intrins.patch
rename to 0284-i386-Add-non-optimize-prefetchi-intrins.patch
diff --git a/0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch b/0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch
new file mode 100644
index 0000000000000000000000000000000000000000..24dacd1eb01c3309f74cfa17af6c94d333dcbcef
--- /dev/null
+++ b/0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch
@@ -0,0 +1,32 @@
+From 239f0637307ff2f6afb1473e99d0bb0eaf8946b2 Mon Sep 17 00:00:00 2001
+From: xiezhiheng <xiezhiheng@huawei.com>
+Date: Fri, 23 Aug 2024 15:37:17 +0800
+Subject: [PATCH 154/157] [SME] Recover hip09 and hip11 in aarch64-cores.def
+
+---
+ gcc/config/aarch64/aarch64-cores.def | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
+index f069c81cf..3337fd1a0 100644
+--- a/gcc/config/aarch64/aarch64-cores.def
++++ b/gcc/config/aarch64/aarch64-cores.def
+@@ -130,6 +130,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A,  (F16, SVE), a64fx, 0x46, 0x001, -1)
+ 
+ /* HiSilicon ('H') cores. */
+ AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  (CRYPTO, F16), tsv110,   0x48, 0xd01, -1)
++AARCH64_CORE("hip09", hip09, hip09, V8_5A,  (SVE, I8MM, F32MM, F64MM, PROFILE, PREDRES), hip09, 0x48, 0xd02, 0x0)
+ 
+ /* ARMv8.3-A Architecture Processors.  */
+ 
+@@ -171,6 +172,7 @@ AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG,
+ AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
+ 
+ AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1)
++AARCH64_CORE("hip11", hip11, hip11, V8_5A,  (SVE, SVE2, F16), hip11, 0x48, 0xd22, -1)
+ 
+ AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+ AARCH64_CORE("neoverse-v2", neoversev2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
+-- 
+2.33.0
+
diff --git a/0286-Try-to-use-AI-model-to-guide-optimization.patch b/0286-Try-to-use-AI-model-to-guide-optimization.patch
new file mode 100644
index 0000000000000000000000000000000000000000..a697dcc7815d3697b9a09a95881de29f6e1ae30e
--- /dev/null
+++ b/0286-Try-to-use-AI-model-to-guide-optimization.patch
@@ -0,0 +1,671 @@
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index fcfa54697..f42aeb8e8 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1449,6 +1449,7 @@ OBJS = \
+ 	inchash.o \
+ 	incpath.o \
+ 	init-regs.o \
++	ipa-hardware-detection.o \
+ 	internal-fn.o \
+ 	ipa-struct-reorg/ipa-struct-reorg.o \
+ 	ipa-cp.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index fd98382fa..99e626641 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -185,6 +185,9 @@ const char *main_input_basename
+ Variable
+ int main_input_baselength
+ 
++Variable
++bool optimize_maximum
++
+ ; The base name used for auxiliary output files.
+ ; dump_base_name minus dump_base_ext.
+ 
+@@ -469,6 +472,10 @@ Ofast
+ Common Optimization
+ Optimize for speed disregarding exact standards compliance.
+ 
++Om
++Common Optimization
++Optimize for maximizing radical optimization.
++
+ Og
+ Common Optimization
+ Optimize for debugging experience rather than speed or size.
+diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
+index 309ecc3d9..ad853af9a 100644
+--- a/gcc/config/aarch64/aarch64.cc
++++ b/gcc/config/aarch64/aarch64.cc
+@@ -18637,6 +18637,134 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
+   return stmt_cost;
+ }
+ 
++/* Check whether in C language or LTO with only C language.  */
++extern bool lang_c_p (void);
++
++static void
++override_C_optimize_options (struct gcc_options *opts)
++{
++  opts->x_flag_ipa_reorder_fields = 1;
++  opts->x_flag_ipa_struct_reorg = 6;
++  opts->x_struct_layout_optimize_level = 6;
++  opts->x_flag_gnu89_inline = 1;
++  opts->x_flag_ccmp2 = 1;
++  opts->x_flag_array_widen_compare = 1;
++  opts->x_flag_convert_minmax = 1;
++  opts->x_flag_tree_slp_transpose_vectorize = 1;
++  opts->x_param_max_inline_insns_auto = 64;
++  opts->x_param_inline_unit_growth = 96;
++}
++
++/* Check whether in CPP language or LTO with only CPP language.  */
++static bool
++lang_cpp_p (void)
++{
++  const char *language_string = lang_hooks.name;
++  if (!language_string)
++    {
++      return false;
++    }
++  if (lang_GNU_CXX ())
++    {
++      return true;
++    }
++  else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
++    {
++      unsigned i = 0;
++      tree t = NULL_TREE;
++      FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
++	{
++	  language_string = TRANSLATION_UNIT_LANGUAGE (t);
++	  if (language_string == NULL
++	      || strncmp (lang_hooks.name, "GNU C++", 7))
++	    {
++	      return false;
++	    }
++	}
++      return true;
++    }
++  return false;
++}
++
++static void
++override_CPP_optimize_options (struct gcc_options *opts)
++{
++  opts->x_flag_finite_loops = 1;
++  opts->x_flag_omit_frame_pointer = 1;
++  opts->x_flag_sized_deallocation = 0;
++  opts->x_flag_loop_elim = 1;
++  opts->x_flag_convert_minmax = 1;
++  opts->x_param_early_inlining_insns = 256;
++  opts->x_param_max_inline_insns_auto = 128;
++  opts->x_param_inline_unit_growth = 256;
++  opts->x_flag_cmlt_arith = 1;
++}
++
++static void
++override_optimize_options_1 (struct gcc_options *opts)
++{
++  opts->x_flag_split_ldp_stp = 1;
++  opts->x_flag_if_conversion_gimple = 1;
++  opts->x_flag_ifcvt_allow_complicated_cmps = 1;
++  opts->x_param_ifcvt_allow_register_renaming = 2;
++  opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48;
++  opts->x_param_max_rtl_if_conversion_predictable_cost = 48;
++}
++
++static void
++override_Fortran_optimize_options (struct gcc_options *opts)
++{
++  opts->x_flag_unroll_loops = 1;
++  opts->x_flag_unconstrained_commons = 1;
++  opts->x_param_ipa_cp_eval_threshold = 1;
++  opts->x_param_ipa_cp_unit_growth = 80;
++  opts->x_param_ipa_cp_max_recursive_depth = 8;
++  opts->x_param_large_unit_insns = 30000;
++  opts->x_flag_ira_loop_pressure = 1;
++  opts->x_flag_inline_functions_called_once = 0;
++  opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY;
++  opts->x_flag_delayed_branch = 1;
++  opts->x_flag_gcse_las = 1;
++  opts->x_flag_gcse_sm = 1;
++  opts->x_flag_ipa_pta = 1;
++  opts->x_flag_reorder_blocks_and_partition = 1;
++  opts->x_flag_reorder_blocks = 1;
++  opts->x_flag_crypto_accel_aes = 1;
++  opts->x_param_flexible_seg_len = 1;
++}
++
++/* Reset the optimize option.
++   After checking the model result, this function can
++   reset the more appropriate options.  */
++static void
++reset_machine_option (struct gcc_options *opts)
++{
++  if (!(opts->x_optimize_maximum)
++      || strstr (opts->x_aarch64_tune_string, "hip09") == NULL)
++    {
++      return;
++    }
++
++  const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
++  if (ai_infer_level)
++    {
++      override_optimize_options_1 (opts);
++      if (lang_c_p ())
++	{
++	  override_C_optimize_options (opts);
++	}
++      else if (lang_cpp_p ())
++	{
++	  override_CPP_optimize_options (opts);
++	}
++      else if (lang_GNU_Fortran ())
++	{
++	  override_Fortran_optimize_options (opts);
++	}
++    }
++}
++
++
+ /* STMT_COST is the cost calculated for STMT_INFO, which has cost kind KIND
+    and which when vectorized would operate on vector type VECTYPE.  Add the
+    cost of any embedded operations.  */
+@@ -20089,6 +20217,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
+       && opts->x_optimize >= aarch64_tune_params.prefetch->default_opt_level)
+     opts->x_flag_prefetch_loop_arrays = 1;
+ 
++  reset_machine_option (opts);
+   aarch64_override_options_after_change_1 (opts);
+ }
+ 
+diff --git a/gcc/ipa-hardware-detection.cc b/gcc/ipa-hardware-detection.cc
+new file mode 100644
+index 000000000..8085a8c65
+--- /dev/null
++++ b/gcc/ipa-hardware-detection.cc
+@@ -0,0 +1,243 @@
++/* Hardware Detection.
++   Copyright (C) 2024-2024 Free Software Foundation, Inc.
++This file is part of GCC.
++GCC is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by the
++Free Software Foundation; either version 3, or (at your option) any
++later version.
++GCC is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "target.h"
++#include "tree.h"
++#include "gimple.h"
++#include "tree-pass.h"
++#include "gimple-ssa.h"
++#include "tree-pretty-print.h"
++#include "fold-const.h"
++#include "gimplify.h"
++#include "gimple-iterator.h"
++#include "tree-ssa-loop-manip.h"
++#include "tree-ssa-loop.h"
++#include "ssa.h"
++#include "tree-into-ssa.h"
++#include "cfganal.h"
++#include "cfgloop.h"
++#include "gimple-pretty-print.h"
++#include "tree-cfg.h"
++#include "cgraph.h"
++#include "print-tree.h"
++#include "cfghooks.h"
++#include "gimple-fold.h"
++#include "gimplify-me.h"
++
++namespace {
++
++/* Build a binary operation and gimplify it.  Emit code before GSI.
++   Return the gimple_val holding the result.  */
++
++static tree
++gimplify_build2 (gimple_stmt_iterator *gsi, enum tree_code code,
++		 tree type, tree a, tree b)
++{
++  tree ret;
++
++  ret = fold_build2_loc (gimple_location (gsi_stmt (*gsi)), code, type, a, b);
++  return force_gimple_operand_gsi (gsi, ret, true, NULL, true,
++				   GSI_SAME_STMT);
++}
++
++static basic_block
++create_abort_bb (basic_block last_bb)
++{
++  basic_block bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  gimple_stmt_iterator gsi = gsi_last_bb (bb);
++  tree fn = builtin_decl_implicit (BUILT_IN_ABORT);
++  gimple *g = gimple_build_call (fn, 0);
++  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
++  return bb;
++}
++
++static basic_block
++create_part_bb (basic_block last_bb, tree part_base)
++{
++  basic_block bb = create_empty_bb (last_bb);
++  if (last_bb->loop_father != NULL)
++    {
++      add_bb_to_loop (bb, last_bb->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  gimple_stmt_iterator gsi = gsi_last_bb (bb);
++  gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
++  /* This number is used to efficiently identify the supported part range.  */
++  tree part_cond = gimplify_build2 (
++		     &gsi, PLUS_EXPR, unsigned_type_node, part_base,
++		     build_int_cst (unsigned_type_node, 4294963967));
++  gcond *cond = gimple_build_cond (LE_EXPR, part_cond,
++				   build_int_cst (unsigned_type_node, 2),
++				   NULL_TREE, NULL_TREE);
++  gimple_set_location (cond, input_location);
++  gsi_insert_before (&gsi, cond, GSI_SAME_STMT);
++  gsi_remove (&gsi, true);
++  return bb;
++}
++
++static void
++create_detection_bb ()
++{
++  edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
++  basic_block ret_bb = old_e->dest;
++
++  basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
++  if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL)
++    {
++      add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
++      loops_state_set (LOOPS_NEED_FIXUP);
++    }
++  tree cpuid_decl = build_decl (input_location, VAR_DECL,
++				get_identifier ("cpuid"), unsigned_type_node);
++  add_local_decl (cfun, cpuid_decl);
++
++  gimple_stmt_iterator gsi = gsi_last_bb (detection_bb);
++  vec<tree, va_gc> *outputs = NULL;
++  tree purpose = build_string (strlen ("=r"), "=r");
++  tree output = build_tree_list (
++		  build_tree_list (NULL_TREE, purpose), cpuid_decl);
++  vec_safe_push (outputs, output);
++  gasm *asm_stmt = gimple_build_asm_vec (
++		     "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL);
++  gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT);
++  gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
++
++  tree implementer = gimplify_build2 (
++		       &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
++		       build_int_cst (unsigned_type_node, 24));
++  tree part_base = gimplify_build2 (
++		     &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
++		     build_int_cst (unsigned_type_node, 4));
++  tree part = gimplify_build2 (
++		&gsi, BIT_AND_EXPR, unsigned_type_node, part_base,
++		build_int_cst (unsigned_type_node, 4095));
++  gcond *implementer_cond = gimple_build_cond (
++			      EQ_EXPR, implementer,
++			      build_int_cst (unsigned_type_node, 72),
++			      NULL_TREE, NULL_TREE);
++  gimple_set_location (implementer_cond, input_location);
++  gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT);
++  gsi_remove (&gsi, true);
++
++  basic_block part_bb = create_part_bb (detection_bb, part);
++  basic_block abort_bb = create_abort_bb (part_bb);
++
++  remove_edge_raw (old_e);
++  make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun),
++			 detection_bb, EDGE_FALLTHRU);
++  edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE);
++  etrue->probability = profile_probability::likely ();
++  edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE);
++  efalse->probability = profile_probability::unlikely ();
++  edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE);
++  part_true->probability = profile_probability::likely ();
++  edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE);
++  part_false->probability = profile_probability::unlikely ();
++  make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU);
++  if (dom_info_available_p (CDI_DOMINATORS))
++    {
++      set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb);
++      set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb);
++      set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb);
++    }
++}
++
++const pass_data pass_data_ipa_hardware_detection =
++{
++  SIMPLE_IPA_PASS,
++  "hardware_detection",
++  OPTGROUP_NONE,
++  TV_IPA_HARDWARE_DETECTION,
++  (PROP_cfg | PROP_ssa),
++  0,
++  0,
++  0,
++  (TODO_update_ssa | TODO_verify_all)
++};
++
++class pass_ipa_hardware_detection : public simple_ipa_opt_pass
++{
++public:
++  pass_ipa_hardware_detection (gcc::context *ctxt)
++    : simple_ipa_opt_pass (pass_data_ipa_hardware_detection, ctxt)
++  {}
++
++  virtual bool gate (function *);
++  virtual unsigned int execute (function *);
++}; // class pass_ipa_hardware_detection
++
++bool
++pass_ipa_hardware_detection::gate (function *)
++{
++  const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
++  return (ai_infer_level
++	  && optimize_maximum > 0
++	  /* Only enable in lto or whole_program.  */
++	  && (in_lto_p || flag_whole_program));
++}
++
++unsigned int
++pass_ipa_hardware_detection::execute (function *)
++{
++  unsigned int ret = 0;
++  cgraph_node *cnode;
++  FOR_EACH_FUNCTION (cnode)
++    {
++      if (!cnode->real_symbol_p ())
++	{
++	  continue;
++	}
++      if (cnode->definition)
++	{
++	  if (!cnode->has_gimple_body_p () || cnode->inlined_to)
++	      continue;
++
++	  cnode->get_body ();
++	  function *fn = DECL_STRUCT_FUNCTION (cnode->decl);
++	  if (!fn)
++	      continue;
++
++	  if (DECL_NAME (cnode->decl)
++      	      && MAIN_NAME_P (DECL_NAME (cnode->decl)))
++	    {
++	      push_cfun (fn);
++	      calculate_dominance_info (CDI_DOMINATORS);
++
++	      create_detection_bb ();
++
++	      cgraph_edge::rebuild_edges ();
++	      free_dominance_info (CDI_DOMINATORS);
++	      pop_cfun ();
++	    }
++	}
++    }
++  return ret;
++}
++} // anon namespace
++
++simple_ipa_opt_pass *
++make_pass_ipa_hardware_detection (gcc::context *ctxt)
++{
++  return new pass_ipa_hardware_detection (ctxt);
++}
+diff --git a/gcc/opts-common.cc b/gcc/opts-common.cc
+index 489a6e02a..12c3f7299 100644
+--- a/gcc/opts-common.cc
++++ b/gcc/opts-common.cc
+@@ -992,6 +992,158 @@ opts_concat (const char *first, ...)
+   return newstr;
+ }
+ 
++typedef int64_t (*run_ai_model_func)(int, const char **,
++				     const char *, int, int64_t *);
++#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; }
++#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q)
++#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq)
++
++static int64_t
++ai_infer_optimization (int argc, const char **argv,
++		       const char *mcpu_option,
++		       int argc_hw, int64_t *argv_hw)
++{
++  /* Load dependent AI-framework libraries.  */
++  void *onnxruntime_lib_handle = NULL;
++  const char *onnxruntime_lib_path = "libonnxruntime.so";
++
++  onnxruntime_lib_handle = dlopen (onnxruntime_lib_path,
++				   RTLD_LAZY | RTLD_GLOBAL);
++  if (!onnxruntime_lib_handle)
++    {
++      return -1;
++    }
++
++  void *ai4c_lib_handle = NULL;
++  const char *ai4c_lib_path = "libONNXRunner.so";
++
++  ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL);
++  if (!ai4c_lib_handle)
++    {
++      return -1;
++    }
++
++  /* Clear any existing error.  */
++  dlerror ();
++
++  /* Run AI4Compiler model.  */
++  if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL)
++    {
++      return -1;
++    }
++
++  run_ai_model_func run_ai_model;
++  PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union;
++  PTR_UNION_AS_VOID_PTR (run_ai_model_func_union)
++    = dlsym (ai4c_lib_handle, "runONNXModelOptimizer");
++  run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union);
++  if (!run_ai_model)
++    {
++      dlclose (ai4c_lib_handle);
++      dlclose (onnxruntime_lib_handle);
++      return -1;
++    }
++  int64_t model_pred = (*run_ai_model) (argc, argv,
++					mcpu_option, argc_hw, argv_hw);
++
++  if (ai4c_lib_handle)
++    dlclose (ai4c_lib_handle);
++
++  if (onnxruntime_lib_handle)
++    dlclose (onnxruntime_lib_handle);
++
++  if (model_pred == 1)
++    putenv ("AI_INFER_LEVEL=1");
++  return model_pred;
++}
++
++static int
++handle_lto_option (unsigned int lang_mask,
++		   unsigned int num_decoded_options,
++		   unsigned int argc,
++		   const char **argv,
++		   struct cl_decoded_option *&opt_array)
++{
++  int ret = 0;
++  char *lan = "";
++  char *compiler = xstrdup (argv[0]);
++  lan = strrchr (compiler, '/');
++  if (lan != NULL)
++    lan ++;
++  else
++    lan = compiler;
++  if (strstr (lan, "gcc") != NULL)
++    {
++      opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2);
++      const char* lto_flag = "-flto=8";
++      decode_cmdline_option (&lto_flag, lang_mask,
++			     &opt_array[num_decoded_options]);
++      ret++;
++      const char* ltopartition_flag = "-flto-partition=one";
++      decode_cmdline_option (&ltopartition_flag, lang_mask,
++			     &opt_array[num_decoded_options + 1]);
++      ret++;
++    }
++  else if (strstr (lan, "g++") != NULL
++	   || strstr (lan, "gfortran") != NULL)
++    {
++      opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1);
++      const char* lto_flag = "-flto=8";
++      decode_cmdline_option (&lto_flag, lang_mask,
++			     &opt_array[num_decoded_options]);
++      ret++;
++    }
++  if (compiler)
++    free (compiler);
++  return ret;
++}
++
++static int
++handle_machine_option (unsigned int lang_mask,
++		       unsigned int num_decoded_options,
++		       unsigned int argc,
++		       const char **argv,
++		       struct cl_decoded_option *&opt_array)
++{
++  int ret = 0;
++  bool flag_Om = false;
++  bool flag_hip09 = false;
++  for (unsigned i = 1; i < argc; i ++)
++    {
++      if (strcmp (argv[i], "-Om") == 0)
++	flag_Om = true;
++      if (strstr (argv[i], "mcpu=hip09") != NULL)
++	flag_hip09 = true;
++    }
++  if (!flag_hip09 || !flag_Om)
++    {
++      return ret;
++    }
++
++  const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
++  if (ai_infer_level)
++    {
++      return ret;
++    }
++  int argc_hw = 6;
++  int64_t argv_hw[argc_hw] = {
++    global_options.x_param_simultaneous_prefetches,
++    global_options.x_param_l1_cache_size,
++    global_options.x_param_l1_cache_line_size,
++    global_options.x_param_l2_cache_size,
++    global_options.x_param_prefetch_latency,
++    global_options.x_param_ipa_prefetch_distance_factor};
++  int64_t output_pred = ai_infer_optimization (
++			  argc, argv, "hip09", argc_hw, argv_hw);
++  if (output_pred != 1)
++    {
++      return ret;
++    }
++
++  return handle_lto_option (lang_mask, num_decoded_options,
++			    argc, argv, opt_array);
++}
++
+ /* Decode command-line options (ARGC and ARGV being the arguments of
+    main) into an array, setting *DECODED_OPTIONS to a pointer to that
+    array and *DECODED_OPTIONS_COUNT to the number of entries in the
+@@ -1090,6 +1242,9 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
+       num_decoded_options++;
+     }
+ 
++  num_decoded_options += handle_machine_option (lang_mask, num_decoded_options,
++						argc, argv, opt_array);
++
+   *decoded_options = opt_array;
+   *decoded_options_count = num_decoded_options;
+   prune_options (decoded_options, decoded_options_count, lang_mask);
+diff --git a/gcc/opts.cc b/gcc/opts.cc
+index e34e5ee8e..d97f6079f 100644
+--- a/gcc/opts.cc
++++ b/gcc/opts.cc
+@@ -780,6 +780,14 @@ default_options_optimization (struct gcc_options *opts,
+ 	  opts->x_optimize_debug = 1;
+ 	  break;
+ 
++	case OPT_Om:
++	  /* -Om adds flags to -O3.  */
++	  opts->x_optimize_size = 0;
++	  opts->x_optimize = 3;
++	  opts->x_optimize_maximum = true;
++	  opts->x_optimize_debug = 0;
++	  break;
++
+ 	case OPT_fopenacc:
+ 	  if (opt->value)
+ 	    openacc_mode = true;
+@@ -2733,6 +2741,8 @@ common_handle_option (struct gcc_options *opts,
+ 	  &= ~(SANITIZE_UNDEFINED | SANITIZE_UNDEFINED_NONDEFAULT);
+       break;
+ 
++    case OPT_Om:
++      break;
+     case OPT_O:
+     case OPT_Os:
+     case OPT_Ofast:
+diff --git a/gcc/passes.def b/gcc/passes.def
+index 8797f166f..690d344c0 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -179,6 +179,7 @@ along with GCC; see the file COPYING3.  If not see
+      passes are executed after partitioning and thus see just parts of the
+      compiled unit.  */
+   INSERT_PASSES_AFTER (all_late_ipa_passes)
++  NEXT_PASS (pass_ipa_hardware_detection);
+   NEXT_PASS (pass_ipa_pta);
+   /* FIXME: this should be a normal IP pass.  */
+   NEXT_PASS (pass_ipa_struct_reorg);
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index 8e7510eb3..bd8c9a4f7 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP     , "ipa cp")
+ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining heuristics")
+ DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS	     , "ipa comdats")
++DEFTIMEVAR (TV_IPA_HARDWARE_DETECTION, "ipa detection")
+ DEFTIMEVAR (TV_IPA_PREFETCH	     , "ipa prefetch")
+ DEFTIMEVAR (TV_IPA_STRUCT_REORG      , "ipa struct reorg optimization")
+ DEFTIMEVAR (TV_IPA_OPT		     , "ipa various optimizations")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index 1c983ef71..ee873f0b2 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -528,6 +528,8 @@ extern ipa_opt_pass_d *make_pass_ipa_icp (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
++extern simple_ipa_opt_pass *make_pass_ipa_hardware_detection (gcc::context *
++							      ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
diff --git a/0287-Add-dynamic-memory-access-checks.patch b/0287-Add-dynamic-memory-access-checks.patch
new file mode 100644
index 0000000000000000000000000000000000000000..e23d8f64c0d87f6c3d65f9d5ead4bdace2fdca5d
--- /dev/null
+++ b/0287-Add-dynamic-memory-access-checks.patch
@@ -0,0 +1,774 @@
+From 08fb60d0a0707af4004b20358f4a921e4ae6cca6 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Thu, 22 Aug 2024 15:23:36 +0800
+Subject: [PATCH 156/157] Add dynamic memory access checks
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 622 +++++++++++++++++++++++++++++++++++++-------
+ gcc/params.opt      |   4 +
+ 2 files changed, 525 insertions(+), 101 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 94290ea9c..b000d4d75 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -368,6 +368,7 @@ typedef std::map<memref_t *, tree> memref_tree_map;
+ typedef std::set<gimple *> stmt_set;
+ typedef std::set<tree> tree_set;
+ typedef std::map<tree, tree> tree_map;
++typedef std::map<tree, poly_offset_int> tree_poly_offset_map;
+ 
+ tree_memref_map *tm_map;
+ funct_mrs_map *fmrs_map;
+@@ -710,6 +711,20 @@ get_mem_ref_address_ssa_name (tree mem, tree base)
+   return NULL_TREE;
+ }
+ 
++static void
++dump_base_addr (tree base_addr)
++{
++  if (base_addr)
++    {
++      fprintf (dump_file, "Base addr (%s): ",
++	      get_tree_code_name (TREE_CODE (base_addr)));
++      print_generic_expr (dump_file, base_addr);
++    }
++  else
++    fprintf (dump_file, "Base addr (%s): ", "null");
++  fprintf (dump_file, "\n");
++}
++
+ static void
+ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr)
+ {
+@@ -736,14 +751,7 @@ analyse_mem_ref (gimple *stmt, tree mem, memref_t* mr)
+       {
+ 	tree base_addr = get_mem_ref_address_ssa_name (mem, base);
+ 	if (dump_file)
+-	  {
+-	    fprintf (dump_file, "Base addr (%s): ",
+-		     base_addr ? get_tree_code_name (TREE_CODE (base_addr))
+-			       : "null");
+-	    if (base_addr)
+-	      print_generic_expr (dump_file, base_addr);
+-	    fprintf (dump_file, "\n");
+-	  }
++	  dump_base_addr (base_addr);
+ 	if (base_addr)
+ 	  {
+ 	    mr->base = analyse_addr_eval (base_addr, mr);
+@@ -1187,7 +1195,7 @@ reduce_memref_set (memref_set *set, vec<memref_t *> &vec)
+ }
+ 
+ static void
+-find_nearest_common_dominator (memref_t *mr, basic_block &dom)
++find_nearest_common_post_dominator (memref_t *mr, basic_block &dom)
+ {
+   for (unsigned int i = 0; i < mr->stmts.length (); i++)
+     {
+@@ -1196,7 +1204,7 @@ find_nearest_common_dominator (memref_t *mr, basic_block &dom)
+       if (dom == bb)
+ 	continue;
+       if (dom)
+-	dom = nearest_common_dominator (CDI_DOMINATORS, dom, bb);
++	dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, bb);
+       else
+ 	dom = bb;
+     }
+@@ -1495,10 +1503,13 @@ gimple_copy_and_remap (gimple *stmt)
+ 
+ static gimple *
+ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
+-				    int last_idx, stmt_set &processed)
++				    int first_idx, int last_idx,
++				    stmt_set &processed)
+ {
+   gimple *last_stmt = NULL;
+-  for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
++  if (first_idx == 0)
++    first_idx = mr->stmts.length () - 1;
++  for (int i = first_idx; i >= last_idx; i--)
+     {
+       if (processed.count (mr->stmts[i]))
+ 	continue;
+@@ -1515,6 +1526,436 @@ gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
+   return last_stmt;
+ }
+ 
++/* Check if prefetch insertion may be always unsafe in this case.  For now
++   reject cases with access to arrays with no domain or with no elements.  */
++
++static bool
++check_prefetch_safety (vec<memref_t *> &mrs, memref_t *cmr)
++{
++  for (unsigned int i = 0; i < mrs.length (); i++)
++    {
++      memref_t *mr = mrs[i];
++      if (mr == cmr || mr->used_mrs.empty ())
++	continue;
++      bool is_store;
++      tree *mem = simple_mem_ref_in_stmt (mr->stmts[0], &is_store);
++      if (mem == NULL || TREE_CODE (*mem) != ARRAY_REF)
++	continue;
++      tree array = TREE_OPERAND (*mem, 0);
++      tree atype = TREE_TYPE (array);
++      gcc_assert (atype);
++      tree domain = TYPE_DOMAIN (atype);
++      if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain))
++	  || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain)))
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "Unsupported array type: ");
++	      print_generic_expr (dump_file, atype);
++	      fprintf (dump_file, "\n");
++	    }
++	  return false;
++	}
++      unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain));
++      unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain));
++      if (min_val == 0 && max_val == 0)
++	{
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "Unsupported array type's bounds: ");
++	      print_generic_expr (dump_file, atype);
++	      fprintf (dump_file, "\n");
++	    }
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Collect base addresses which we need to check.  */
++
++static void
++collect_base_addresses (vec<memref_t *> &used_mr_vec, HOST_WIDE_INT dist_val,
++			memref_t *comp_mr, tree_poly_offset_map &offset_map)
++{
++  if (dump_file)
++    fprintf (dump_file, "Collect base addresses which we need to check.\n");
++  for (unsigned int i = 0; i < used_mr_vec.length (); i++)
++    {
++      memref_t *mr = used_mr_vec[i];
++      if (mr == comp_mr || mr->used_mrs.empty ())
++	continue;
++      bool is_store;
++      tree *mem = simple_mem_ref_in_stmt (mr->stmts[0], &is_store);
++      if (mem == NULL || TREE_CODE (*mem) != MEM_REF)
++	continue;
++      tree base = get_base_address (*mem);
++      tree base_addr = get_mem_ref_address_ssa_name (*mem, base);
++      if (!base_addr)
++	continue;
++      if (dump_file)
++	{
++	  dump_base_addr (base_addr);
++	  if (base)
++	    {
++	      fprintf (dump_file, "Base:");
++	      print_generic_expr (dump_file, base);
++	      fprintf (dump_file, "\n");
++	    }
++	}
++      if (!TREE_OPERAND (base, 1))
++	continue;
++      poly_offset_int curr_offset = mem_ref_offset (base);
++      poly_offset_int saved_offset = 0;
++      if (offset_map.count (base_addr))
++	{
++	  saved_offset = offset_map[base_addr];
++	  if ((dist_val > 0 && known_gt (curr_offset, saved_offset))
++	      || (dist_val < 0 && known_lt (curr_offset, saved_offset)))
++	    offset_map[base_addr] = curr_offset;
++	  else if (dump_file)
++	    fprintf (dump_file, "Off: step=%ld gt=%d lt=%d\n", dist_val,
++		     known_gt (curr_offset, saved_offset),
++		     known_lt (curr_offset, saved_offset));
++	}
++      else
++	offset_map[base_addr] = curr_offset;
++    }
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Final list of base addresses:\n");
++      for (tree_poly_offset_map::iterator it1 = offset_map.begin ();
++	   it1 != offset_map.end (); ++it1)
++	{
++	  tree base_addr = it1->first;
++	  poly_offset_int off = it1->second;
++	  fprintf (dump_file, "Base:");
++	  print_generic_expr (dump_file, base_addr);
++	  HOST_WIDE_INT val = estimated_poly_value (off.force_shwi (),
++						    POLY_VALUE_LIKELY);
++	  fprintf (dump_file, "\nOff: %ld\n", val);
++	}
++      fprintf (dump_file, "Finish collecting base addresses.\n");
++    }
++}
++
++/* Return true if we need page check to access memory at this address.  */
++
++static bool
++need_page_check (tree base_addr, tree_set &checked_base_addrs)
++{
++  if (dump_file)
++    dump_base_addr (base_addr);
++  if (base_addr == NULL)
++    {
++      if (dump_file)
++	fprintf (dump_file, "Base address not found\n");
++      return false;
++    }
++  if (checked_base_addrs.count (base_addr))
++    {
++      if (dump_file)
++	fprintf (dump_file, "Base address is already checked\n");
++      return false;
++    }
++  return true;
++}
++
++/* Insert instructions to check the original address and newly evaluated
++   adress for prefetch correspond the same page.  */
++
++static gimple *
++insert_page_check (tree addr, tree_poly_offset_map &offset_map,
++		   gimple_seq &stmts)
++{
++  poly_offset_int offset = 0;
++  if (offset_map.count (addr))
++    offset = offset_map[addr];
++  tree addr_type = TREE_TYPE (addr);
++  tree utype = unsigned_type_for (addr_type);
++  tree new_addr = build_int_cst (addr_type, 0);
++  if (decl_map->count (addr))
++    new_addr = (*decl_map)[addr];
++  tree t1 = make_ssa_name (utype);
++  tree t2 = make_ssa_name (utype);
++  unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1);
++  tree pmask_cst = build_int_cst (utype, pmask);
++  tree off_tree = wide_int_to_tree (sizetype, offset);
++  gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE);
++  tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR,
++					addr_type, addr, off_tree);
++  tree conv_addr = make_ssa_name (utype);
++  tree conv_new_addr = make_ssa_name (utype);
++  gimple *conv1 = gimple_build_assign (conv_addr,
++				       fold_convert (utype, addr_with_offset));
++  gimple *conv2 = gimple_build_assign (conv_new_addr,
++				       fold_convert (utype, new_addr));
++  gimple *paddr = gimple_build_assign (t1, BIT_AND_EXPR,
++				       conv_addr, pmask_cst);
++  gimple *new_paddr = gimple_build_assign (t2, BIT_AND_EXPR,
++					   conv_new_addr, pmask_cst);
++  gcond *cond = gimple_build_cond (EQ_EXPR, t1, t2, NULL, NULL);
++  gimple_seq_add_stmt (&stmts, conv1);
++  gimple_seq_add_stmt (&stmts, paddr);
++  gimple_seq_add_stmt (&stmts, conv2);
++  gimple_seq_add_stmt (&stmts, new_paddr);
++  gimple_seq_add_stmt (&stmts, cond);
++  return cond;
++}
++
++/* Check if this array access needs dynamic address verification.  Support only
++   arrays with 1-d indexing.  */
++
++static bool
++need_array_index_check (tree mem)
++{
++  /* Check pattern: t1 = (type) t0; ld/st array[t1].  If any index of type (t0)
++     does not go beyond the bounds of the array, we don't need the check.  */
++  tree array = TREE_OPERAND (mem, 0);
++  tree atype = TREE_TYPE (array);
++  tree index = TREE_OPERAND (mem, 1);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Array ind: ");
++      print_generic_expr (dump_file, index);
++      fprintf (dump_file, "\nMem: ");
++      print_generic_expr (dump_file, array);
++      fprintf (dump_file, "\nInd type: ");
++      print_generic_expr (dump_file, TREE_TYPE (index));
++      fprintf (dump_file, "\nMem type: ");
++      print_generic_expr (dump_file, atype);
++      fprintf (dump_file, "\n");
++    }
++  tree domain = TYPE_DOMAIN (atype);
++  if (!domain || !tree_fits_uhwi_p (TYPE_MIN_VALUE (domain))
++      || !tree_fits_uhwi_p (TYPE_MAX_VALUE (domain)))
++    {
++      if (dump_file)
++	fprintf (dump_file, "Unsupported array type domain.\n");
++      return true;
++    }
++  unsigned HOST_WIDE_INT min_val = tree_to_uhwi (TYPE_MIN_VALUE (domain));
++  unsigned HOST_WIDE_INT max_val = tree_to_uhwi (TYPE_MAX_VALUE (domain));
++  if (dump_file)
++    fprintf (dump_file, "Array bounds (%ld, %ld)\n", min_val, max_val);
++  if (TREE_CODE (index) != SSA_NAME)
++    return true;
++
++  gimple *stmt = SSA_NAME_DEF_STMT (index);
++  if (!is_gimple_assign (stmt))
++    {
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Is not assign, stop analysis: ");
++	  print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS);
++	}
++      return true;
++    }
++  tree *lhs = gimple_assign_lhs_ptr (stmt);
++  tree *rhs = gimple_assign_rhs1_ptr (stmt);
++  tree lhs_type = TREE_TYPE (*lhs);
++  tree rhs_type = TREE_TYPE (*rhs);
++  tree ind_type = (TYPE_PRECISION (lhs_type) < TYPE_PRECISION (rhs_type))
++		  ? lhs_type : rhs_type;
++  if (!ind_type || !tree_fits_uhwi_p (TYPE_MIN_VALUE (ind_type))
++      || !tree_fits_uhwi_p (TYPE_MAX_VALUE (ind_type)))
++    {
++      if (dump_file)
++	fprintf (dump_file, "Unsupported index type.\n");
++      return true;
++    }
++  int prec = tree_to_uhwi (TYPE_SIZE (ind_type));
++  unsigned HOST_WIDE_INT t_max_val = tree_to_uhwi (TYPE_MAX_VALUE (ind_type));
++  unsigned HOST_WIDE_INT t_min_val = tree_to_uhwi (TYPE_MIN_VALUE (ind_type));
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Index type (%d, %ld, %ld): ", prec,
++	       t_min_val, t_max_val);
++      print_generic_expr (dump_file, ind_type);
++      fprintf (dump_file, "\n");
++    }
++  return !((t_max_val <= max_val) && (t_min_val >= min_val));
++}
++
++/* Insert instructions to check the new index is within the array bounds.  */
++
++static gimple *
++insert_index_check (tree mem, gimple_seq &stmts)
++{
++  if (dump_file)
++    fprintf (dump_file, "Insert array index check\n");
++  tree atype = TREE_TYPE (TREE_OPERAND (mem, 0));
++  tree ind = TREE_OPERAND (mem, 1);
++  if (decl_map->count (ind))
++    ind = (*decl_map)[ind];
++  tree domain = TYPE_DOMAIN (atype);
++  gcc_assert (domain && tree_fits_uhwi_p (TYPE_MIN_VALUE (domain))
++	      && tree_fits_uhwi_p (TYPE_MAX_VALUE (domain)));
++
++  tree ind_min_val = TYPE_MIN_VALUE (domain);
++  tree ind_max_val = TYPE_MAX_VALUE (domain);
++  tree t1 = make_ssa_name (boolean_type_node);
++  tree t2 = make_ssa_name (boolean_type_node);
++  tree t3 = make_ssa_name (boolean_type_node);
++  t1 = fold_build2 (LE_EXPR, boolean_type_node, ind, ind_max_val);
++  t2 = fold_build2 (GE_EXPR, boolean_type_node, ind, ind_min_val);
++  t3 = fold_build2 (TRUTH_ANDIF_EXPR, boolean_type_node, t1, t2);
++  gcond *cond = gimple_build_cond (EQ_EXPR, t3, boolean_true_node, NULL, NULL);
++  gimple_seq_add_stmt (&stmts, cond);
++  return cond;
++}
++
++/* Insert safety checks for memory access stmts newly created to evaluate
++   prefetch addresses.  */
++
++static void
++process_used_mr (memref_t *mr, tree_poly_offset_map &offset_map,
++		 tree_set &checked_base_addrs, gimple_seq &stmts,
++		 vec<gimple *> &bbends)
++{
++  bool is_store;
++  tree *mem = simple_mem_ref_in_stmt (mr->stmts[0], &is_store);
++  if (mem == NULL)
++    return;
++  if (dump_file)
++    {
++      fprintf (dump_file, "MR (%d) maybe need to insert address check: ",
++	       mr->mr_id);
++      print_generic_expr (dump_file, *mem);
++      fprintf (dump_file, "\n");
++    }
++  gimple *bbend = NULL;
++  if (TREE_CODE (*mem) == MEM_REF)
++    {
++      tree base = get_base_address (*mem);
++      tree base_addr = get_mem_ref_address_ssa_name (*mem, base);
++      if (!need_page_check (base_addr, checked_base_addrs))
++	return;
++      bbend = insert_page_check (base_addr, offset_map, stmts);
++      checked_base_addrs.insert (base_addr);
++    }
++  else if (TREE_CODE (*mem) == ARRAY_REF && need_array_index_check (*mem))
++    bbend = insert_index_check (*mem, stmts);
++  if (bbend)
++    bbends.safe_push (bbend);
++}
++
++/* Create new variables and insert new stmts to evaluate prefetch addresses.  */
++
++static void
++create_stmts_for_used_mrs (vec<memref_t *> &used_mr_vec, vec<gimple *> &bbends,
++			   gimple_seq &stmts, stmt_set &processed_stmts,
++			   HOST_WIDE_INT dist_val, memref_t *comp_mr)
++{
++  tree_poly_offset_map offset_map;
++  collect_base_addresses (used_mr_vec, dist_val, comp_mr, offset_map);
++
++  /* Insert stmts to evaluate prefetch addresses.  */
++  tree_set checked_base_addrs;
++  for (unsigned int i = 0; i < used_mr_vec.length (); i++)
++    {
++      memref_t *mr = used_mr_vec[i];
++      if (mr == comp_mr)
++	continue;
++      gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 1,
++							      processed_stmts);
++      if (last_stmt && dump_file)
++	{
++	  fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
++	  print_generic_expr (dump_file, gimple_assign_lhs (last_stmt));
++	  fprintf (dump_file, "\n");
++	}
++      if (!mr->used_mrs.empty ())
++	process_used_mr (mr, offset_map, checked_base_addrs, stmts, bbends);
++      last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 0,
++						      processed_stmts);
++    }
++}
++
++/* Insert prefetch instructions.  */
++
++static void
++insert_prefetch_stmts (vec<gimple *> &pcalls, gimple_seq &stmts,
++		       gimple *&last_pref, vec<memref_t *> &vmrs,
++		       stmt_set &processed_stmts)
++{
++  if (dump_file)
++    fprintf (dump_file, "Evaluate addresses and insert prefetch insns.\n");
++
++  tree local;
++  switch (param_ipa_prefetch_locality)
++    {
++    case 0:
++      local = integer_zero_node;
++      break;
++    case 1:
++      local = integer_one_node;
++      break;
++    case 2:
++      local = build_int_cst (integer_type_node, 2);
++      break;
++    default:
++    case 3:
++      local = integer_three_node;
++      break;
++    }
++  tree_set prefetched_addrs;
++  for (unsigned int i = 0; i < vmrs.length (); i++)
++    {
++      memref_t *mr = vmrs[i];
++      /* Don't need to copy the last stmt, since we insert prefetch insn
++	 instead of it.  */
++      gimple_copy_and_remap_memref_stmts (mr, stmts, 0, 1, processed_stmts);
++      gimple *last_stmt = mr->stmts[0];
++      gcc_assert (last_stmt);
++
++      tree old_addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
++      tree new_addr = old_addr;
++      if (decl_map->count (old_addr))
++	new_addr = (*decl_map)[old_addr];
++      if (prefetched_addrs.count (new_addr))
++	continue;
++      /* Insert prefetch intrinsic call.  */
++      tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
++      last_pref = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
++				     3, new_addr, write_p, local);
++      pcalls.safe_push (last_pref);
++      gimple_seq_add_stmt (&stmts, last_pref);
++      prefetched_addrs.insert (new_addr);
++
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Insert %d prefetch stmt:\n", i);
++	  print_gimple_stmt (dump_file, last_pref, 0);
++	}
++    }
++}
++
++/* Split bbs after condition stmts and fix control flow graph.  */
++
++static void
++correct_cfg (vec<gimple *> &bbends, gimple *last_pref, basic_block &dom_bb)
++{
++  edge e_last = split_block (dom_bb, last_pref);
++  if (!bbends.length () || last_pref == NULL)
++    return;
++  for (int i = bbends.length () - 1; i >= 0; i--)
++    {
++      gimple *bbend = bbends[i];
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Split dom_bb after condition stmts:\n");
++	  print_gimple_stmt (dump_file, bbend, 0);
++	}
++      basic_block last_bb = e_last->dest;
++      edge e = split_block (dom_bb, bbend);
++      e->flags &= ~EDGE_FALLTHRU;
++      e->flags |= EDGE_TRUE_VALUE;
++      edge e_false = make_edge (dom_bb, last_bb, EDGE_FALSE_VALUE);
++      e_false->probability = profile_probability::never ();
++    }
++}
++
+ static void
+ create_cgraph_edge (cgraph_node *n, gimple *stmt)
+ {
+@@ -1529,6 +1970,17 @@ create_cgraph_edge (cgraph_node *n, gimple *stmt)
+   ipa_call_summaries->get_create (e);
+ }
+ 
++/* Modify cgraph inserting calls to prefetch intrinsics.  */
++
++static void
++modify_ipa_info (cgraph_node *n, vec<gimple *> &pcalls)
++{
++  for (unsigned i = 0; i < pcalls.length (); i++)
++    create_cgraph_edge (n, pcalls[i]);
++  ipa_update_overall_fn_summary (n);
++  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl));
++}
++
+ /* Insert prefetch intrinsics in this function, return nonzero on success.  */
+ 
+ static int
+@@ -1607,6 +2059,18 @@ optimize_function (cgraph_node *n, function *fn)
+       return 0;
+     }
+ 
++  vec<memref_t *> used_mr_vec = vNULL;
++  for (memref_set::const_iterator it = used_mrs.begin ();
++       it != used_mrs.end (); it++)
++    used_mr_vec.safe_push (*it);
++  used_mr_vec.qsort (memref_id_cmp);
++  if (!check_prefetch_safety (used_mr_vec, comp_mr))
++    {
++      if (dump_file)
++	fprintf (dump_file, "Prefetching may be unsafe.  Skip the case.\n");
++      return 0;
++    }
++
+   /* Filter out memrefs with the same memory references.
+      TODO: maybe do the same with used mrs.  */
+   vec<memref_t *> vmrs = vNULL;
+@@ -1616,18 +2080,18 @@ optimize_function (cgraph_node *n, function *fn)
+   /* TODO: maybe it is useful to process also used_mrs.  */
+   basic_block dom_bb = NULL;
+   for (unsigned int i = 0; i < vmrs.length (); i++)
+-    find_nearest_common_dominator (vmrs[i], dom_bb);
++    find_nearest_common_post_dominator (vmrs[i], dom_bb);
+ 
+   if (!dom_bb)
+     {
+       if (dump_file)
+-	fprintf (dump_file, "Dominator bb for MRs is not found.  "
++	fprintf (dump_file, "Post dominator bb for MRs is not found.  "
+ 		 "Skip the case.\n");
+       return 0;
+     }
+   else if (dump_file)
+     {
+-      fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
++      fprintf (dump_file, "Post dominator bb %d for MRs:\n", dom_bb->index);
+       gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
+       fprintf (dump_file, "\n");
+     }
+@@ -1636,19 +2100,33 @@ optimize_function (cgraph_node *n, function *fn)
+   gimple *last_used = NULL;
+   for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+        gsi_prev (&si))
+-    if (comp_mr->stmts[0] == gsi_stmt (si))
+-      {
+-	last_used = gsi_stmt (si);
+-	if (dump_file)
++    {
++      bool found = false;
++      for (unsigned int i = 0; i < vmrs.length (); i++)
++	/* TODO: take into account only those MRs that should be
++	   checked memory.  */
++	if (vmrs[i]->stmts[0] == gsi_stmt (si))
+ 	  {
+-	    fprintf (dump_file, "Last used stmt in dominator bb:\n");
+-	    print_gimple_stmt (dump_file, last_used, 0);
++	    found = true;
++	    break;
+ 	  }
+-	break;
+-      }
++      if (found || comp_mr->stmts[0] == gsi_stmt (si))
++	{
++	  last_used = gsi_stmt (si);
++	  if (dump_file)
++	    {
++	      fprintf (dump_file, "Last used stmt in post dominator bb:\n");
++	      print_gimple_stmt (dump_file, last_used, 0);
++	    }
++	  break;
++	}
++    }
+ 
+-  split_block (dom_bb, last_used);
+-  gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
++  gimple_stmt_iterator gsi;
++  if (last_used)
++    gsi = gsi_for_stmt (last_used);
++  else
++    gsi = gsi_last_bb (dom_bb);
+ 
+   /* Create new inc var.  Insert new_var = old_var + step * factor.  */
+   decl_map = new tree_map;
+@@ -1660,7 +2138,7 @@ optimize_function (cgraph_node *n, function *fn)
+   stmt_set processed_stmts;
+   if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0])))
+     {
+-      gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
++      gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0,
+ 							processed_stmts);
+       inc_var = gimple_assign_lhs (tmp);
+     }
+@@ -1683,86 +2161,26 @@ optimize_function (cgraph_node *n, function *fn)
+       fprintf (dump_file, "\n");
+     }
+ 
+-  /* Create other new vars.  Insert new stmts.  */
+-  vec<memref_t *> used_mr_vec = vNULL;
+-  for (memref_set::const_iterator it = used_mrs.begin ();
+-       it != used_mrs.end (); it++)
+-    used_mr_vec.safe_push (*it);
+-  used_mr_vec.qsort (memref_id_cmp);
+-
+-  for (unsigned int j = 0; j < used_mr_vec.length (); j++)
+-    {
+-      memref_t *mr = used_mr_vec[j];
+-      if (mr == comp_mr)
+-	continue;
+-      gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
+-							      processed_stmts);
+-      gcc_assert (last_stmt);
+-      if (dump_file)
+-	{
+-	  fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
+-	  print_generic_expr (dump_file, gimple_assign_lhs (last_stmt));
+-	  fprintf (dump_file, "\n");
+-	}
+-    }
+-  /* On new load check page fault.  */
+-  /* Insert prefetch instructions.  */
+-  if (dump_file)
+-    fprintf (dump_file, "Evaluate addresses and insert prefetch insn.\n");
++  vec<gimple *> bbends = vNULL;
++  create_stmts_for_used_mrs (used_mr_vec, bbends, stmts, processed_stmts,
++			     dist_val, comp_mr);
+ 
+   vec<gimple *> pcalls = vNULL;
+-  tree local;
+-  switch (param_ipa_prefetch_locality)
+-    {
+-    case 0:
+-      local = integer_zero_node;
+-      break;
+-    case 1:
+-      local = integer_one_node;
+-      break;
+-    case 2:
+-      local = build_int_cst (integer_type_node, 2);
+-      break;
+-    default:
+-    case 3:
+-      local = integer_three_node;
+-      break;
+-    }
+-  tree_set prefetched_addrs;
+-  for (unsigned int j = 0; j < vmrs.length (); j++)
+-    {
+-      memref_t *mr = vmrs[j];
+-      /* Don't need to copy the last stmt, since we insert prefetch insn
+-	 instead of it.  */
+-      gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts);
+-      gimple *last_stmt = mr->stmts[0];
+-      gcc_assert (last_stmt);
+-      tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
+-      tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
+-      if (decl_map->count (addr))
+-	addr = (*decl_map)[addr];
+-      if (prefetched_addrs.count (addr))
+-	continue;
+-      last_stmt = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
+-				     3, addr, write_p, local);
+-      pcalls.safe_push (last_stmt);
+-      gimple_seq_add_stmt (&stmts, last_stmt);
+-      prefetched_addrs.insert (addr);
+-      if (dump_file)
+-	{
+-	  fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
+-	  print_gimple_stmt (dump_file, last_stmt, 0);
+-	}
+-    }
+-
++  gimple *last_pref = NULL;
++  insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts);
+   gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++
++  correct_cfg (bbends, last_pref, dom_bb);
++
+   delete decl_map;
+ 
+-  /* Modify cgraph inserting calls to prefetch intrinsics.  */
+-  for (unsigned i = 0; i < pcalls.length (); i++)
+-    create_cgraph_edge (n, pcalls[i]);
+-  ipa_update_overall_fn_summary (n);
+-  renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl));
++  modify_ipa_info (n, pcalls);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "After optimization:\n");
++      dump_function_to_file (cfun->decl, dump_file, (dump_flags_t)0);
++    }
+ 
+   return 1;
+ }
+@@ -1781,8 +2199,10 @@ insert_prefetch ()
+ 	fprintf (dump_file, "Optimize function %s\n", n->dump_name ());
+       push_cfun (DECL_STRUCT_FUNCTION (n->decl));
+       calculate_dominance_info (CDI_DOMINATORS);
++      calculate_dominance_info (CDI_POST_DOMINATORS);
+       res |= optimize_function (n, fn);
+       free_dominance_info (CDI_DOMINATORS);
++      free_dominance_info (CDI_POST_DOMINATORS);
+       pop_cfun ();
+     }
+   return res;
+diff --git a/gcc/params.opt b/gcc/params.opt
+index 747d0f829..fc700ab79 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -317,6 +317,10 @@ The factor represents the number of inductive variable incrementations to evalua
+ Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization
+ The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data.
+ 
++-param=ipa-prefetch-pagesize=
++Common Joined UInteger Var(param_ipa_prefetch_pagesize) Init(4096) Param Optimization
++The flag represents current pagesize for runtime checks of memory access addresses.
++
+ -param=ira-loop-reserved-regs=
+ Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization
+ The number of registers in each class kept unused by loop invariant motion.
+-- 
+2.33.0
+
diff --git a/0288-Enable-macro-use-commandline.patch b/0288-Enable-macro-use-commandline.patch
new file mode 100644
index 0000000000000000000000000000000000000000..cafe01b5c6851ce46b31cb2e97b778be3ddb029e
--- /dev/null
+++ b/0288-Enable-macro-use-commandline.patch
@@ -0,0 +1,207 @@
+From 7a578a8725f8fd7d92fcbbac14841ea7e8d0870f Mon Sep 17 00:00:00 2001
+From: zhangxiaohua <xiaohua20100827@163.com>
+Date: Sun, 25 Aug 2024 23:08:53 +0800
+Subject: [PATCH 157/157] Enable macro-use-commandline
+
+Signed-off-by: zhangxiaohua <xiaohua20100827@163.com>
+---
+ gcc/c-family/c-opts.cc                        |  4 +++
+ gcc/c-family/c.opt                            |  4 +++
+ gcc/doc/cppopts.texi                          |  4 +++
+ gcc/doc/invoke.texi                           |  1 +
+ .../gcc.dg/cpp/macro-use-cmdline-1.c          | 26 ++++++++++++++
+ .../gcc.dg/cpp/macro-use-cmdline-2.c          | 34 +++++++++++++++++++
+ libcpp/include/cpplib.h                       |  3 ++
+ libcpp/init.cc                                |  1 +
+ libcpp/macro.cc                               | 16 ++++++++-
+ 9 files changed, 92 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c
+
+diff --git a/gcc/c-family/c-opts.cc b/gcc/c-family/c-opts.cc
+index 5134f6128..744b54dc3 100644
+--- a/gcc/c-family/c-opts.cc
++++ b/gcc/c-family/c-opts.cc
+@@ -527,6 +527,10 @@ c_common_handle_option (size_t scode, const char *arg, HOST_WIDE_INT value,
+ 	cpp_opts->track_macro_expansion = 2;
+       break;
+ 
++    case OPT_fmacro_use_commandline:
++      cpp_opts->macro_use_commandline = 1;
++      break;
++
+     case OPT_fexec_charset_:
+       cpp_opts->narrow_charset = arg;
+       break;
+diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
+index 07da40ef4..a36c27f07 100644
+--- a/gcc/c-family/c.opt
++++ b/gcc/c-family/c.opt
+@@ -2012,6 +2012,10 @@ ftrack-macro-expansion=
+ C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
+ -ftrack-macro-expansion=<0|1|2>	Track locations of tokens coming from macro expansion and display them in error messages.
+ 
++fmacro-use-commandline
++C ObjC C++ ObjC++ JoinedOrMissing RejectNegative UInteger
++Preferentially use options from the commandline.
++
+ fpretty-templates
+ C++ ObjC++ Var(flag_pretty_templates) Init(1)
+ Do not pretty-print template specializations as the template signature followed by the arguments.
+diff --git a/gcc/doc/cppopts.texi b/gcc/doc/cppopts.texi
+index c0a92b370..8c8a81eac 100644
+--- a/gcc/doc/cppopts.texi
++++ b/gcc/doc/cppopts.texi
+@@ -277,6 +277,10 @@ correct column numbers in warnings or errors, even if tabs appear on the
+ line.  If the value is less than 1 or greater than 100, the option is
+ ignored.  The default is 8.
+ 
++@item -fmacro-use-commandline
++@opindex fmacro-use-commandline
++Preferentially use options from the command line.
++
+ @item -ftrack-macro-expansion@r{[}=@var{level}@r{]}
+ @opindex ftrack-macro-expansion
+ Track locations of tokens across macro expansions. This allows the
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index bdd8b9429..2ff7d860d 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -630,6 +630,7 @@ Objective-C and Objective-C++ Dialects}.
+ -fexec-charset=@var{charset}  -fextended-identifiers  @gol
+ -finput-charset=@var{charset}  -flarge-source-files  @gol
+ -fmacro-prefix-map=@var{old}=@var{new} -fmax-include-depth=@var{depth} @gol
++-fmacro-use-commandline @gol
+ -fno-canonical-system-headers  -fpch-deps  -fpch-preprocess  @gol
+ -fpreprocessed  -ftabstop=@var{width}  -ftrack-macro-expansion  @gol
+ -fwide-exec-charset=@var{charset}  -fworking-directory @gol
+diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c
+new file mode 100644
+index 000000000..f85d9c268
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-1.c
+@@ -0,0 +1,26 @@
++/*
++   { dg-options "-fmacro-use-commandline -DTEST_MACRO=1 -DTEST_MACRO=20" }
++   { dg-do compile }
++   { dg-do run }
++*/
++
++/* { dg-warning "-:redefined" "redef TEST_MACRO"      { target *-*-* } 0  }
++   { dg-message "-:previous"  "prev def TEST_MACRO"   { target *-*-* } 0  }
++*/
++
++#if DEBUG
++extern int puts (const char *);
++#else
++#define puts(X)
++#endif
++extern void abort (void);
++
++#define err(str) do { puts(str); abort(); } while (0)
++
++int main (int argc, char *argv[])
++{
++  int macroValue = TEST_MACRO;
++  if (macroValue != 20)
++    err("macroValue");
++  return 0;
++}
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c
+new file mode 100644
+index 000000000..99d92d1e4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/cpp/macro-use-cmdline-2.c
+@@ -0,0 +1,34 @@
++/*
++   { dg-options "-fmacro-use-commandline -DTEST_MACRO=1" }
++   { dg-do compile }
++   { dg-do run }
++*/
++
++#define TEST_MACRO 300
++#define TEST_MACRO_1 400
++/*
++   { dg-warning "-:redefined" "redef TEST_MACRO"      { target *-*-* } 7  }
++   { dg-message "-:previous"  "prev def TEST_MACRO"   { target *-*-* } 0  }
++*/
++
++#if DEBUG
++extern int puts (const char *);
++#else
++#define puts(X)
++#endif
++
++extern void abort (void);
++
++#define err(str) do { puts(str); abort(); } while (0)
++
++int main (int argc, char *argv[])
++{
++  int macroValue = TEST_MACRO;
++  if (macroValue != 1)
++    err("macroValue");
++
++  int macroValue1 = TEST_MACRO_1;
++  if (macroValue1 != 400)
++    err("macroValue1");
++  return 0;
++}
+\ No newline at end of file
+diff --git a/libcpp/include/cpplib.h b/libcpp/include/cpplib.h
+index 3eba6f74b..c6101ca01 100644
+--- a/libcpp/include/cpplib.h
++++ b/libcpp/include/cpplib.h
+@@ -471,6 +471,9 @@ struct cpp_options
+      consumes the highest amount of memory.  */
+   unsigned char track_macro_expansion;
+ 
++  /* Use the options on the command line first.  */
++  unsigned char macro_use_commandline;
++
+   /* Nonzero means handle C++ alternate operator names.  */
+   unsigned char operator_names;
+ 
+diff --git a/libcpp/init.cc b/libcpp/init.cc
+index f4ab83d21..47be60a36 100644
+--- a/libcpp/init.cc
++++ b/libcpp/init.cc
+@@ -215,6 +215,7 @@ cpp_create_reader (enum c_lang lang, cpp_hash_table *table,
+      cpp_options::track_macro_expansion to learn about the other
+      values.  */
+   CPP_OPTION (pfile, track_macro_expansion) = 2;
++  CPP_OPTION (pfile, macro_use_commandline) = 0;
+   CPP_OPTION (pfile, warn_normalize) = normalized_C;
+   CPP_OPTION (pfile, warn_literal_suffix) = 1;
+   CPP_OPTION (pfile, canonical_system_headers)
+diff --git a/libcpp/macro.cc b/libcpp/macro.cc
+index 8ebf360c0..aa9e4ffa6 100644
+--- a/libcpp/macro.cc
++++ b/libcpp/macro.cc
+@@ -3852,7 +3852,21 @@ _cpp_create_definition (cpp_reader *pfile, cpp_hashnode *node)
+ 				 node->value.macro->line, 0,
+ 			 "this is the location of the previous definition");
+ 	}
+-      _cpp_free_definition (node);
++#define LOCATION_FROM_LINEMAP 0
++#define MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN 96
++#define MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN 128
++     if (CPP_OPTION (pfile, macro_use_commandline)
++	    && node->value.macro->line >= MIN_LINE_OF_MACRO_BEEN_OVERRIDDEN
++	    && node->value.macro->line <= MAX_LINE_OF_MACRO_BEEN_OVERRIDDEN
++	    && pfile->forced_token_location == LOCATION_FROM_LINEMAP)
++	{
++	  cpp_pedwarning_with_line (pfile, CPP_W_NONE,
++	    node->value.macro->line, 0,
++	    "use the previous definition from commandline");
++	    return false;
++	}
++	else
++	   _cpp_free_definition (node);
+     }
+ 
+   /* Enter definition in hash table.  */
+-- 
+2.33.0
+
diff --git a/0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch b/0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch
new file mode 100644
index 0000000000000000000000000000000000000000..05818083d7a37e65e0e4e43ca980d3f49391cc39
--- /dev/null
+++ b/0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch
@@ -0,0 +1,35 @@
+From 63f99f46e851aecc070496a0e688a0d118c820a4 Mon Sep 17 00:00:00 2001
+From: YunQiang Su <yunqiang@isrc.iscas.ac.cn>
+Date: Mon, 2 Sep 2024 17:57:52 +0800
+Subject: [PATCH] tree-ssa-loop-crc.cc: TARGET_CRC32 may be not defined
+
+TARGET_CRC32 may be not defined on some architectures, RISC-V is one example.
+---
+ gcc/tree-ssa-loop-crc.cc | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/gcc/tree-ssa-loop-crc.cc b/gcc/tree-ssa-loop-crc.cc
+index b9c2f71ca..7eee9446d 100644
+--- a/gcc/tree-ssa-loop-crc.cc
++++ b/gcc/tree-ssa-loop-crc.cc
+@@ -1227,6 +1227,9 @@ convert_to_new_loop (class loop *loop)
+ static unsigned int
+ tree_ssa_loop_crc ()
+ {
++#ifndef TARGET_CRC32
++  return 0;
++#else
+   if (TARGET_CRC32 == false)
+     {
+       warning (OPT____,"The loop-crc optimization is not working." \
+@@ -1269,6 +1272,7 @@ tree_ssa_loop_crc ()
+       }
+   }
+   return todo;
++#endif
+ }
+ 
+ /* Loop crc.  */
+-- 
+2.33.0
+
diff --git a/0290-Add-ipa-prefetch-test-for-gcc-s-case.patch b/0290-Add-ipa-prefetch-test-for-gcc-s-case.patch
new file mode 100644
index 0000000000000000000000000000000000000000..4545420167bc764595b22b12d7ce486786325429
--- /dev/null
+++ b/0290-Add-ipa-prefetch-test-for-gcc-s-case.patch
@@ -0,0 +1,209 @@
+From 0534ae05fc313c0d449b48ffe3e01642b644e6d2 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilial@huawei-partners.com>
+Date: Fri, 6 Sep 2024 10:40:50 +0800
+Subject: [PATCH 1/2] Add ipa-prefetch test for gcc's case
+
+---
+ gcc/ipa-prefetch.cc                         |   4 +-
+ gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c | 167 ++++++++++++++++++++
+ 2 files changed, 170 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index b000d4d75..8e628390b 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -1668,6 +1668,8 @@ static gimple *
+ insert_page_check (tree addr, tree_poly_offset_map &offset_map,
+ 		   gimple_seq &stmts)
+ {
++  if (dump_file)
++    fprintf (dump_file, "Insert page check.\n");
+   poly_offset_int offset = 0;
+   if (offset_map.count (addr))
+     offset = offset_map[addr];
+@@ -1783,7 +1785,7 @@ static gimple *
+ insert_index_check (tree mem, gimple_seq &stmts)
+ {
+   if (dump_file)
+-    fprintf (dump_file, "Insert array index check\n");
++    fprintf (dump_file, "Insert array index check.\n");
+   tree atype = TREE_TYPE (TREE_OPERAND (mem, 0));
+   tree ind = TREE_OPERAND (mem, 1);
+   if (decl_map->count (ind))
+diff --git a/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c
+new file mode 100644
+index 000000000..f1001c350
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ipa/ipa-prefetch-gcc.c
+@@ -0,0 +1,167 @@
++/* { dg-do link } */
++/* { dg-options "-O3 -fipa-prefetch -flto -flto-partition=one -fdump-ipa-ipa_prefetch" } */
++/* { dg-require-effective-target lto } */
++
++/* Based on opensource gcc code.  */
++
++#include <stdbool.h>
++#include <stdlib.h>
++#include <stddef.h>
++
++#define SPARSESET_ELT_TYPE unsigned int
++#define ALLOCNO_NUM(A) ((A)->num)
++
++typedef struct sparseset_def
++{
++  SPARSESET_ELT_TYPE *dense;	/* Dense array.  */
++  SPARSESET_ELT_TYPE *sparse;	/* Sparse array.  */
++  SPARSESET_ELT_TYPE members;	/* Number of elements.  */
++  SPARSESET_ELT_TYPE size;	/* Maximum number of elements.  */
++  SPARSESET_ELT_TYPE iter;	/* Iterator index.  */
++  unsigned char iter_inc;	/* Iteration increment amount.  */
++  bool iterating;
++  SPARSESET_ELT_TYPE elms[2];   /* Combined dense and sparse arrays.  */
++} *sparseset;
++
++struct ira_allocno
++{
++  /* The allocno order number starting with 0.  Each allocno has an
++     unique number and the number is never changed for the
++     allocno.  */
++  int num;
++  /* Regno for allocno or cap.  */
++  int regno;
++  /*...*/
++};
++
++typedef struct ira_allocno_live_range *allocno_live_range_t;
++typedef struct ira_allocno *ira_allocno_t;
++
++struct ira_allocno_live_range
++{
++  /* Allocno whose live range is described by given structure.  */
++  ira_allocno_t allocno;
++  /* Program point range.  */
++  int start, finish;
++  /* Next structure describing program points where the allocno
++     lives.  */
++  allocno_live_range_t next;
++  /* Pointer to structures with the same start/finish.  */
++  allocno_live_range_t start_next, finish_next;
++};
++
++bool
++sparseset_bit_p (sparseset s, SPARSESET_ELT_TYPE e)
++{
++  SPARSESET_ELT_TYPE idx;
++
++  idx = s->sparse[e];
++
++  return idx < s->members && s->dense[idx] == e;
++}
++
++bool new_pseudos_p;
++int ira_max_point, ira_allocnos_num;
++allocno_live_range_t *ira_finish_point_ranges;
++
++static inline void
++sparseset_clear (sparseset s)
++{
++  s->members = 0;
++  s->iterating = false;
++}
++
++sparseset
++sparseset_alloc (SPARSESET_ELT_TYPE n_elms)
++{
++  unsigned int n_bytes = sizeof (struct sparseset_def)
++			 + ((n_elms - 1) * 2 * sizeof (SPARSESET_ELT_TYPE));
++
++  /* We use xcalloc rather than xmalloc to silence some valgrind uninitialized
++     read errors when accessing set->sparse[n] when "n" is not, and never has
++     been, in the set.  These uninitialized reads are expected, by design and
++     harmless.  If this turns into a performance problem due to some future
++     additional users of sparseset, we can revisit this decision.  */
++  sparseset set = (sparseset) calloc (1, n_bytes);
++  set->dense = &(set->elms[0]);
++  set->sparse = &(set->elms[n_elms]);
++  set->size = n_elms;
++  sparseset_clear (set);
++  return set;
++}
++
++void
++sparseset_insert_bit (sparseset s, SPARSESET_ELT_TYPE e, SPARSESET_ELT_TYPE idx)
++{
++  s->sparse[e] = idx;
++  s->dense[idx] = e;
++}
++
++void
++sparseset_swap (sparseset s, SPARSESET_ELT_TYPE idx1, SPARSESET_ELT_TYPE idx2)
++{
++  SPARSESET_ELT_TYPE tmp = s->dense[idx2];
++  sparseset_insert_bit (s, s->dense[idx1], idx2);
++  sparseset_insert_bit (s, tmp, idx1);
++}
++
++void __attribute__ ((noinline))
++sparseset_clear_bit (sparseset s, SPARSESET_ELT_TYPE e)
++{
++  if (sparseset_bit_p (s, e))
++    {
++      SPARSESET_ELT_TYPE idx = s->sparse[e];
++      SPARSESET_ELT_TYPE iter = s->iter;
++      SPARSESET_ELT_TYPE mem = s->members - 1;
++
++      /* If we are iterating over this set and we want to delete a
++	 member we've already visited, then we swap the element we
++	 want to delete with the element at the current iteration
++	 index so that it plays well together with the code below
++	 that actually removes the element.  */
++      if (s->iterating && idx <= iter)
++	{
++	  if (idx < iter)
++	    {
++	      sparseset_swap (s, idx, iter);
++	      idx = iter;
++	    }
++	  s->iter_inc = 0;
++	}
++
++      /* Replace the element we want to delete with the last element
++	 in the dense array and then decrement s->members, effectively
++	 removing the element we want to delete.  */
++      sparseset_insert_bit (s, s->dense[mem], idx);
++      s->members = mem;
++    }
++}
++
++allocno_live_range_t r;
++sparseset allocnos_live;
++
++void
++ira_flattening ()
++{
++  int i;
++
++  if (new_pseudos_p)
++    {
++      allocnos_live = sparseset_alloc (ira_allocnos_num);
++      for (i = 0; i < ira_max_point; i++)
++	{
++	  for (r = ira_finish_point_ranges[i]; r != NULL; r = r->finish_next)
++	    sparseset_clear_bit (allocnos_live, ALLOCNO_NUM (r->allocno));
++	}
++    }
++}
++
++int main()
++{
++  ira_flattening ();
++  return 0;
++}
++
++/* { dg-final { scan-wpa-ipa-dump-times "Insert page check" 1 "ipa_prefetch"} } */
++/* { dg-final { scan-wpa-ipa-dump-times "Insert 0 prefetch stmt:" 1 "ipa_prefetch"} } */
++/* { dg-final { scan-wpa-ipa-dump-times "Split dom_bb after condition stmts:" 1 "ipa_prefetch"} } */
+-- 
+2.33.0
+
diff --git a/0291-Fix-settings-for-wide-operations-tests.patch b/0291-Fix-settings-for-wide-operations-tests.patch
new file mode 100644
index 0000000000000000000000000000000000000000..1e368b6d4a9ee7dc54af81a9af071f73f3c96ad5
--- /dev/null
+++ b/0291-Fix-settings-for-wide-operations-tests.patch
@@ -0,0 +1,73 @@
+From 411792b0bbb63715d8e90d46eb4f0d9c810ce8ba Mon Sep 17 00:00:00 2001
+From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
+Date: Tue, 3 Sep 2024 21:26:03 +0800
+Subject: [PATCH 2/2] Fix settings for wide operations tests
+
+Signed-off-by: lin-houzhong <hz_lin8@163.com>
+---
+ gcc/testsuite/gcc.dg/double_sized_mul-1.c | 8 +++++---
+ gcc/testsuite/gcc.dg/double_sized_mul-2.c | 9 +++++----
+ gcc/testsuite/gcc.dg/uaddsub.c            | 6 ++++--
+ 3 files changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+index d32a25223..b848e02de 100644
+--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c
++++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+@@ -1,7 +1,8 @@
+-/* { dg-do compile } */
++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */
+ /* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
+    proper overflow detection in some cases.  */
+-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+@@ -138,4 +139,5 @@ uint128_t mul128_perm (uint64_t a, uint64_t b)
+   return res;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */
++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" { target aarch64*-*-* } } } */
++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 4 "widening_mul" { target x86_64*-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+index ff35902b7..cf8f0aedd 100644
+--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c
++++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+@@ -1,7 +1,8 @@
+-/* { dg-do compile } */
+-/* fif-conversion-gimple is required for proper overflow detection
+-   in some cases.  */
+-/* { dg-options "-O2 -fif-conversion-gimple -march=armv8.2-a -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-do compile { target aarch64*-*-* x86_64*-*-*} } */
++/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
++   proper overflow detection in some cases.  */
++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
+index 96c26d308..dcb587fc8 100644
+--- a/gcc/testsuite/gcc.dg/uaddsub.c
++++ b/gcc/testsuite/gcc.dg/uaddsub.c
+@@ -1,5 +1,6 @@
+-/* { dg-do compile } */
++/* { dg-do compile { target aarch64*-*-* x86_64-*-* } } */
+ /* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
++/* { dg-additional-options "-march=armv8.2-a" { target aarch64*-*-* } } */
+ #include <stdint.h>
+ 
+ typedef unsigned __int128 uint128_t;
+@@ -140,4 +141,5 @@ uint256_t sub256 (uint128_t a, uint128_t b)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
+-/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" { target aarch64*-*-* } } } */
++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 4 "optimized" { target x86_64*-*-* } } } */
+-- 
+2.33.0
+
diff --git a/0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch b/0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch
new file mode 100644
index 0000000000000000000000000000000000000000..13341df6672c078b4bc6e3cfba77b18e2c763634
--- /dev/null
+++ b/0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch
@@ -0,0 +1,42 @@
+From 808294bf0f32aaff1cc7e56a756b246d328b3402 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 6 Sep 2024 11:10:03 +0800
+Subject: [PATCH 2/3] Fix errors in ipa-prefetch (IAORPF and IAOSJ0)
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 9 +++++----
+ 1 file changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index b000d4d75..74af55af0 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -1681,7 +1681,8 @@ insert_page_check (tree addr, tree_poly_offset_map &offset_map,
+   unsigned long long pmask = ~(param_ipa_prefetch_pagesize - 1);
+   tree pmask_cst = build_int_cst (utype, pmask);
+   tree off_tree = wide_int_to_tree (sizetype, offset);
+-  gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE);
++  gcc_assert (TREE_CODE (addr_type) == POINTER_TYPE
++	      || TREE_CODE (addr_type) == REFERENCE_TYPE);
+   tree addr_with_offset = gimple_build (&stmts, POINTER_PLUS_EXPR,
+ 					addr_type, addr, off_tree);
+   tree conv_addr = make_ssa_name (utype);
+@@ -2082,11 +2083,11 @@ optimize_function (cgraph_node *n, function *fn)
+   for (unsigned int i = 0; i < vmrs.length (); i++)
+     find_nearest_common_post_dominator (vmrs[i], dom_bb);
+ 
+-  if (!dom_bb)
++  if (!dom_bb || dom_bb->index == ENTRY_BLOCK || dom_bb->index == EXIT_BLOCK)
+     {
+       if (dump_file)
+-	fprintf (dump_file, "Post dominator bb for MRs is not found.  "
+-		 "Skip the case.\n");
++	fprintf (dump_file, "Post dominator bb for MRs is not found or "
++		 "it's an entry/exit block.  Skip the case.\n");
+       return 0;
+     }
+   else if (dump_file)
+-- 
+2.33.0
+
diff --git a/0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch b/0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch
new file mode 100644
index 0000000000000000000000000000000000000000..3c9ec2575784c8c42d58935fa61dd66807e1686f
--- /dev/null
+++ b/0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch
@@ -0,0 +1,51 @@
+From bfb77997f423ffe3bdcbd8bb8d7f739fe51ce4f5 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Fri, 6 Sep 2024 11:36:11 +0800
+Subject: [PATCH 3/3] Fix error with stmts insertion in ipa-prefetch (for
+ IAO6R3)
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 19 +++++++++++++++++--
+ 1 file changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index b000d4d75..6190c2ebb 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -2096,7 +2096,7 @@ optimize_function (cgraph_node *n, function *fn)
+       fprintf (dump_file, "\n");
+     }
+ 
+-  /* Try to find comp_mr's stmt in the dominator bb.  */
++  /* Try to find comp_mr's stmt in the post dominator bb.  */
+   gimple *last_used = NULL;
+   for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+        gsi_prev (&si))
+@@ -2168,7 +2168,22 @@ optimize_function (cgraph_node *n, function *fn)
+   vec<gimple *> pcalls = vNULL;
+   gimple *last_pref = NULL;
+   insert_prefetch_stmts (pcalls, stmts, last_pref, vmrs, processed_stmts);
+-  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++
++  gimple *gstmt = gsi_stmt (gsi);
++  bool insert_after = last_used || gstmt == NULL || !is_ctrl_stmt (gstmt);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Insert prefetch sequence %s stmt:\n",
++	       insert_after ? "after": "before");
++      if (gstmt)
++	print_gimple_stmt (dump_file, gstmt, 0);
++      else
++	fprintf (dump_file, "(no stmts)\n");
++    }
++  if (insert_after)
++    gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  else
++    gsi_insert_seq_before (&gsi, stmts, GSI_NEW_STMT);
+ 
+   correct_cfg (bbends, last_pref, dom_bb);
+ 
+-- 
+2.33.0
+
diff --git a/0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch b/0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch
new file mode 100644
index 0000000000000000000000000000000000000000..43a88b8a4f5c5dd482deb6f23f77e4d47885141d
--- /dev/null
+++ b/0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch
@@ -0,0 +1,80 @@
+From cd79fc29d2cdb73836f8699355113e94b833e0e0 Mon Sep 17 00:00:00 2001
+From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+Date: Wed, 11 Sep 2024 17:18:58 +0800
+Subject: [PATCH 2/2] Fix errors in ipa-prefetch(IAO50J and IAO5H7)
+
+Signed-off-by: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
+---
+ gcc/ipa-prefetch.cc | 35 ++++++++++++++++++++++++++++++-----
+ 1 file changed, 30 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/ipa-prefetch.cc b/gcc/ipa-prefetch.cc
+index 5184687aa..685f9c267 100644
+--- a/gcc/ipa-prefetch.cc
++++ b/gcc/ipa-prefetch.cc
+@@ -2099,6 +2099,18 @@ optimize_function (cgraph_node *n, function *fn)
+       fprintf (dump_file, "\n");
+     }
+ 
++  /* Check that all used mrs dominate found post dominator bb.  This case
++     may be supported later by copying MR evaluation to the bb.  */
++  for (unsigned int i = 0; i < used_mr_vec.length (); i++)
++    if (!dominated_by_p (CDI_DOMINATORS, dom_bb,
++			 gimple_bb (used_mr_vec[i]->stmts[0])))
++      {
++	if (dump_file)
++	  fprintf (dump_file, "MR's (%d) bb is not dominate the found bb %d.  "
++		   "Skip the case.\n", used_mr_vec[i]->mr_id, dom_bb->index);
++	return 0;
++      }
++
+   /* Try to find comp_mr's stmt in the post dominator bb.  */
+   gimple *last_used = NULL;
+   for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+@@ -2133,17 +2145,29 @@ optimize_function (cgraph_node *n, function *fn)
+ 
+   /* Create new inc var.  Insert new_var = old_var + step * factor.  */
+   decl_map = new tree_map;
+-  gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0]));
+-  tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]);
++  gimple *old_inc_stmt = comp_mr->stmts[0];
++  gcc_assert (old_inc_stmt && gimple_assign_single_p (old_inc_stmt));
++  tree inc_var = gimple_assign_lhs (old_inc_stmt);
++  if (dump_file)
++    {
++      fprintf (dump_file, "Old inc stmt: ");
++      print_gimple_stmt (dump_file, old_inc_stmt, 0);
++    }
+   /* If old_var definition dominates the current use, just use it, otherwise
+      evaluate it just before new inc var evaluation.  */
+   gimple_seq stmts = NULL;
+   stmt_set processed_stmts;
+-  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0])))
++  tree local_inc_var = inc_var;
++  if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (old_inc_stmt)))
+     {
+       gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0, 0,
+ 							processed_stmts);
+-      inc_var = gimple_assign_lhs (tmp);
++      local_inc_var = gimple_assign_lhs (tmp);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Localized old inc stmt: ");
++	  print_gimple_stmt (dump_file, tmp, 0);
++	}
+     }
+   tree var_type = TREE_TYPE (inc_var);
+   enum tree_code inc_code;
+@@ -2155,7 +2179,8 @@ optimize_function (cgraph_node *n, function *fn)
+   HOST_WIDE_INT dist_val = tree_to_shwi (step)
+ 			   * param_ipa_prefetch_distance_factor;
+   tree dist = build_int_cst (TREE_TYPE (step), dist_val);
+-  tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
++  tree new_inc_var = gimple_build (&stmts, inc_code, var_type, local_inc_var,
++				   dist);
+   (*decl_map)[inc_var] = new_inc_var;
+   if (dump_file)
+     {
+-- 
+2.33.0
+
diff --git a/0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch b/0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch
new file mode 100644
index 0000000000000000000000000000000000000000..8540cd4aca03f8077c98480d442cc194836ea137
--- /dev/null
+++ b/0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch
@@ -0,0 +1,30 @@
+From 7b4cce4896cefefedba9545a9633585e086b7621 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Wed, 11 Sep 2024 18:26:22 +0800
+Subject: [PATCH 1/2] Fix error with grouped_load merge in
+ slp-transpose-vectorize (for IALR8B)
+
+---
+ gcc/tree-vect-slp.cc | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index e3e246977..d4870de43 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -3807,7 +3807,11 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
+ 	 these two grouped loads need to be merged.  */
+       tree opb = get_op_base_address (first_element);
+       unsigned int grp_size_b = DR_GROUP_SIZE (first_element);
+-      if (opa == opb && grp_size_a == grp_size_b)
++      /* Ensure that the elements merge to load group meet the alignment condition (dr_misalignment) */
++      HOST_WIDE_INT diff = 0;
++      diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr))
++	      - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr)));
++      if (opa == opb && grp_size_a == grp_size_b && diff >= 0)
+ 	{
+ 	  res.safe_push (first_element);
+ 	  visited[i] = true;
+-- 
+2.33.0
+
diff --git a/0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch b/0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch
new file mode 100644
index 0000000000000000000000000000000000000000..34862f283b12674816bd1fd597c62a6101312055
--- /dev/null
+++ b/0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch
@@ -0,0 +1,28 @@
+From b3a6a170bf1dc0e460e98a7fd02c92e6b036784a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Fri, 13 Sep 2024 14:13:07 +0800
+Subject: [PATCH 2/2] Fix error in slp-transpose-vectorize (for IAQFM3)
+
+---
+ gcc/tree-vect-slp.cc | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index d4870de43..d7e198dff 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -3811,7 +3811,10 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
+       HOST_WIDE_INT diff = 0;
+       diff = (TREE_INT_CST_LOW (DR_INIT (first_element->dr_aux.dr))
+ 	      - TREE_INT_CST_LOW (DR_INIT (merge_first_element->dr_aux.dr)));
+-      if (opa == opb && grp_size_a == grp_size_b && diff >= 0)
++      if (opa == opb
++	  && grp_size_a == grp_size_b
++	  && diff >= 0
++	  && check_same_bb (first_element, merge_first_element))
+ 	{
+ 	  res.safe_push (first_element);
+ 	  visited[i] = true;
+-- 
+2.33.0
+
diff --git a/0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch b/0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch
new file mode 100644
index 0000000000000000000000000000000000000000..21a24c0f4bcaa8f6dce6f75c5be868871a9c1ea0
--- /dev/null
+++ b/0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch
@@ -0,0 +1,26 @@
+From 8b30d71f881e15bfbc514f9b65fee178610e1536 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?=E9=83=91=E6=99=A8=E5=8D=89?= <zhengchenhui1@huawei.com>
+Date: Wed, 18 Sep 2024 10:48:55 +0800
+Subject: [PATCH] Fix error in slp-transpose-vectorize (for IARHFM)
+
+---
+ gcc/tree-vect-slp.cc | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index d7e198dff..fbd638333 100644
+--- a/gcc/tree-vect-slp.cc
++++ b/gcc/tree-vect-slp.cc
+@@ -3814,7 +3814,8 @@ vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
+       if (opa == opb
+ 	  && grp_size_a == grp_size_b
+ 	  && diff >= 0
+-	  && check_same_bb (first_element, merge_first_element))
++	  && check_same_bb (first_element, merge_first_element)
++	  && DR_PTR_INFO (first_element->dr_aux.dr) != DR_PTR_INFO (merge_first_element->dr_aux.dr))
+ 	{
+ 	  res.safe_push (first_element);
+ 	  visited[i] = true;
+-- 
+2.33.0
+
diff --git a/0298-Mark-prefetch-builtin-as-willreturn.patch b/0298-Mark-prefetch-builtin-as-willreturn.patch
new file mode 100644
index 0000000000000000000000000000000000000000..7a489a5d9b0d7e1c7454a66a02ef59ac805532a6
--- /dev/null
+++ b/0298-Mark-prefetch-builtin-as-willreturn.patch
@@ -0,0 +1,99 @@
+From a252bbd11d22481a1e719ed36d800e2192abb369 Mon Sep 17 00:00:00 2001
+From: Pronin Alexander <pronin.alexander@huawei.com>
+Date: Thu, 31 Oct 2024 15:49:27 +0800
+Subject: [PATCH 1/6] Mark prefetch builtin as willreturn
+
+Signed-off-by: Pronin Alexander <pronin.alexander@huawei.com>
+---
+ gcc/common.opt      |  4 ++++
+ gcc/gimple.cc       | 30 ++++++++++++++++++++++++++++++
+ gcc/gimple.h        |  1 +
+ gcc/tree-ssa-pre.cc |  4 +---
+ 4 files changed, 36 insertions(+), 3 deletions(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 688d65e4d..be5fcc681 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1313,6 +1313,10 @@ fdelete-null-pointer-checks
+ Common Var(flag_delete_null_pointer_checks) Init(-1) Optimization
+ Delete useless null pointer checks.
+ 
++fbuiltin-will-return
++Common Var(flag_builtin_will_return) Optimization
++Consider some of the builtins as definitely returning.
++
+ fdevirtualize-at-ltrans
+ Common Var(flag_ltrans_devirtualize)
+ Stream extra data to support more aggressive devirtualization in LTO local transformation mode.
+diff --git a/gcc/gimple.cc b/gcc/gimple.cc
+index 9e62da426..04ca9f161 100644
+--- a/gcc/gimple.cc
++++ b/gcc/gimple.cc
+@@ -2998,6 +2998,36 @@ nonbarrier_call_p (gimple *call)
+   return false;
+ }
+ 
++static inline bool
++will_return_builtin_p (gimple *call)
++{
++  if (!flag_builtin_will_return)
++    return false;
++
++  if (!gimple_call_builtin_p (call, BUILT_IN_NORMAL))
++    return false;
++
++  switch (DECL_FUNCTION_CODE (gimple_call_fndecl (call)))
++    {
++    case BUILT_IN_PREFETCH:
++      return true;
++    default:
++      return false;
++    }
++}
++
++bool
++will_return_call_p (gimple *call, function *fun)
++{
++  int flags = gimple_call_flags (call);
++  if (!(flags & (ECF_CONST|ECF_PURE))
++      || (flags & ECF_LOOPING_CONST_OR_PURE)
++      || stmt_can_throw_external (fun, call))
++    return will_return_builtin_p (call);
++
++  return true;
++}
++
+ /* Callback for walk_stmt_load_store_ops.
+  
+    Return TRUE if OP will dereference the tree stored in DATA, FALSE
+diff --git a/gcc/gimple.h b/gcc/gimple.h
+index 77a5a07e9..bb05a7664 100644
+--- a/gcc/gimple.h
++++ b/gcc/gimple.h
+@@ -1628,6 +1628,7 @@ extern bool gimple_asm_clobbers_memory_p (const gasm *);
+ extern void dump_decl_set (FILE *, bitmap);
+ extern bool nonfreeing_call_p (gimple *);
+ extern bool nonbarrier_call_p (gimple *);
++extern bool will_return_call_p (gimple *, function *);
+ extern bool infer_nonnull_range (gimple *, tree);
+ extern bool infer_nonnull_range_by_dereference (gimple *, tree);
+ extern bool infer_nonnull_range_by_attribute (gimple *, tree);
+diff --git a/gcc/tree-ssa-pre.cc b/gcc/tree-ssa-pre.cc
+index 98134b5d3..b5264133a 100644
+--- a/gcc/tree-ssa-pre.cc
++++ b/gcc/tree-ssa-pre.cc
+@@ -3988,9 +3988,7 @@ compute_avail (function *fun)
+ 		 that forbids hoisting possibly trapping expressions
+ 		 before it.  */
+ 	      int flags = gimple_call_flags (stmt);
+-	      if (!(flags & (ECF_CONST|ECF_PURE))
+-		  || (flags & ECF_LOOPING_CONST_OR_PURE)
+-		  || stmt_can_throw_external (fun, stmt))
++	      if (!will_return_call_p (stmt, fun))
+ 		/* Defer setting of BB_MAY_NOTRETURN to avoid it
+ 		   influencing the processing of the call itself.  */
+ 		set_bb_may_notreturn = true;
+-- 
+2.33.0
+
diff --git a/0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch b/0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch
new file mode 100644
index 0000000000000000000000000000000000000000..c0a733c1ad49595a8e148a4f51f1df371a84eb46
--- /dev/null
+++ b/0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch
@@ -0,0 +1,156 @@
+From 3b109376d057342a31267ea4c9bd422d940874cb Mon Sep 17 00:00:00 2001
+From: Jakub Jelinek <jakub@redhat.com>
+Date: Thu, 31 Oct 2024 16:09:43 +0800
+Subject: [PATCH 2/6] [Backport]Disallow pointer operands for |,^ and partly
+ &[PR106878]
+
+Signed-off-by: Jakub Jelinek <jakub@redhat.com>
+---
+ gcc/match.pd                                  |  6 ++++-
+ .../gcc.c-torture/compile/pr106878.c          | 15 +++++++++++++
+ gcc/tree-cfg.cc                               | 22 ++++++++++++++++---
+ gcc/tree-ssa-reassoc.cc                       | 16 +++++++++++++-
+ 4 files changed, 54 insertions(+), 5 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr106878.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 8f41c292f..822e065e8 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -1655,6 +1655,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ 	 && (int_fits_type_p (@1, TREE_TYPE (@0))
+ 	     || tree_nop_conversion_p (TREE_TYPE (@0), type)))
+ 	|| types_match (@0, @1))
++       && !POINTER_TYPE_P (TREE_TYPE (@0))
++       && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE
+        /* ???  This transform conflicts with fold-const.cc doing
+ 	  Convert (T)(x & c) into (T)x & (T)c, if c is an integer
+ 	  constants (if x has signed type, the sign bit cannot be set
+@@ -1691,7 +1693,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+   (if (GIMPLE
+        && TREE_CODE (@1) != INTEGER_CST
+        && tree_nop_conversion_p (type, TREE_TYPE (@2))
+-       && types_match (type, @0))
++       && types_match (type, @0)
++       && !POINTER_TYPE_P (TREE_TYPE (@0))
++       && TREE_CODE (TREE_TYPE (@0)) != OFFSET_TYPE)
+    (bitop @0 (convert @1)))))
+ 
+ (for bitop (bit_and bit_ior)
+diff --git a/gcc/testsuite/gcc.c-torture/compile/pr106878.c b/gcc/testsuite/gcc.c-torture/compile/pr106878.c
+new file mode 100644
+index 000000000..c84571894
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/compile/pr106878.c
+@@ -0,0 +1,15 @@
++/* PR tree-optimization/106878 */
++
++typedef __INTPTR_TYPE__ intptr_t;
++typedef __UINTPTR_TYPE__ uintptr_t;
++int a;
++
++int
++foo (const int *c)
++{
++  uintptr_t d = ((intptr_t) c | (intptr_t) &a) & 65535 << 16;
++  intptr_t e = (intptr_t) c;
++  if (d != (e & 65535 << 16))
++    return 1;
++  return 0;
++}
+diff --git a/gcc/tree-cfg.cc b/gcc/tree-cfg.cc
+index 48b52f785..d33aaec8c 100644
+--- a/gcc/tree-cfg.cc
++++ b/gcc/tree-cfg.cc
+@@ -4163,7 +4163,9 @@ verify_gimple_assign_binary (gassign *stmt)
+     case ROUND_MOD_EXPR:
+     case RDIV_EXPR:
+     case EXACT_DIV_EXPR:
+-      /* Disallow pointer and offset types for many of the binary gimple. */
++    case BIT_IOR_EXPR:
++    case BIT_XOR_EXPR:
++      /* Disallow pointer and offset types for many of the binary gimple.  */
+       if (POINTER_TYPE_P (lhs_type)
+ 	  || TREE_CODE (lhs_type) == OFFSET_TYPE)
+ 	{
+@@ -4178,9 +4180,23 @@ verify_gimple_assign_binary (gassign *stmt)
+ 
+     case MIN_EXPR:
+     case MAX_EXPR:
+-    case BIT_IOR_EXPR:
+-    case BIT_XOR_EXPR:
++      /* Continue with generic binary expression handling.  */
++      break;
++
+     case BIT_AND_EXPR:
++      if (POINTER_TYPE_P (lhs_type)
++	  && TREE_CODE (rhs2) == INTEGER_CST)
++	break;
++      /* Disallow pointer and offset types for many of the binary gimple.  */
++      if (POINTER_TYPE_P (lhs_type)
++	  || TREE_CODE (lhs_type) == OFFSET_TYPE)
++	{
++	  error ("invalid types for %qs", code_name);
++	  debug_generic_expr (lhs_type);
++	  debug_generic_expr (rhs1_type);
++	  debug_generic_expr (rhs2_type);
++	  return true;
++	}
+       /* Continue with generic binary expression handling.  */
+       break;
+ 
+diff --git a/gcc/tree-ssa-reassoc.cc b/gcc/tree-ssa-reassoc.cc
+index e3d521e32..6baef4764 100644
+--- a/gcc/tree-ssa-reassoc.cc
++++ b/gcc/tree-ssa-reassoc.cc
+@@ -3617,10 +3617,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 	tree type2 = NULL_TREE;
+ 	bool strict_overflow_p = false;
+ 	candidates.truncate (0);
++	if (POINTER_TYPE_P (type1))
++	  type1 = pointer_sized_int_node;
+ 	for (j = i; j; j = chains[j - 1])
+ 	  {
+ 	    tree type = TREE_TYPE (ranges[j - 1].exp);
+ 	    strict_overflow_p |= ranges[j - 1].strict_overflow_p;
++	    if (POINTER_TYPE_P (type))
++	      type = pointer_sized_int_node;
+ 	    if ((b % 4) == 3)
+ 	      {
+ 		/* For the signed < 0 cases, the types should be
+@@ -3651,6 +3655,8 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 	    tree type = TREE_TYPE (ranges[j - 1].exp);
+ 	    if (j == k)
+ 	      continue;
++	    if (POINTER_TYPE_P (type))
++	      type = pointer_sized_int_node;
+ 	    if ((b % 4) == 3)
+ 	      {
+ 		if (!useless_type_conversion_p (type1, type))
+@@ -3680,7 +3686,7 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 		op = r->exp;
+ 		continue;
+ 	      }
+-	    if (id == l)
++	    if (id == l || POINTER_TYPE_P (TREE_TYPE (op)))
+ 	      {
+ 		code = (b % 4) == 3 ? BIT_NOT_EXPR : NOP_EXPR;
+ 		g = gimple_build_assign (make_ssa_name (type1), code, op);
+@@ -3704,6 +3710,14 @@ optimize_range_tests_cmp_bitwise (enum tree_code opcode, int first, int length,
+ 	    gimple_seq_add_stmt_without_update (&seq, g);
+ 	    op = gimple_assign_lhs (g);
+ 	  }
++	type1 = TREE_TYPE (ranges[k - 1].exp);
++	if (POINTER_TYPE_P (type1))
++	  {
++	    gimple *g
++	      = gimple_build_assign (make_ssa_name (type1), NOP_EXPR, op);
++	    gimple_seq_add_stmt_without_update (&seq, g);
++	    op = gimple_assign_lhs (g);
++	  }
+ 	candidates.pop ();
+ 	if (update_range_test (&ranges[k - 1], NULL, candidates.address (),
+ 			       candidates.length (), opcode, ops, op,
+-- 
+2.33.0
+
diff --git a/0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch b/0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch
new file mode 100644
index 0000000000000000000000000000000000000000..0eca175156e2190e6135a0e1fb80b979df4f8a7b
--- /dev/null
+++ b/0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch
@@ -0,0 +1,55 @@
+From 91ef8899a80e493042fd2687ad89064c9f90cf17 Mon Sep 17 00:00:00 2001
+From: Pronin Alexander <pronin.alexander@huawei.com>
+Date: Thu, 31 Oct 2024 16:14:34 +0800
+Subject: [PATCH 3/6] Remove erroneous pattern from gimple ifcvt
+
+Signed-off-by: Pronin Alexander  <pronin.alexander@huawei.com>
+---
+ gcc/match.pd                          |  2 +-
+ gcc/testsuite/gcc.dg/ifcvt-gimple-1.c | 21 +++++++++++++++++++++
+ 2 files changed, 22 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple-1.c
+
+diff --git a/gcc/match.pd b/gcc/match.pd
+index 8f41c292f..2dd6581d1 100644
+--- a/gcc/match.pd
++++ b/gcc/match.pd
+@@ -4276,7 +4276,7 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
+ )
+ 
+ (if (flag_if_conversion_gimple)
+- (for simple_op (plus minus bit_and bit_ior bit_xor)
++ (for simple_op (plus minus bit_ior bit_xor)
+   (simplify
+    (cond @0 (simple_op @1 INTEGER_CST@2) @1)
+    (switch
+diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c
+new file mode 100644
+index 000000000..381a4ad51
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple-1.c
+@@ -0,0 +1,21 @@
++/* { dg-do run } */
++/* { dg-options "-O2 -fno-inline -fif-conversion-gimple" } */
++
++#include <stdlib.h>
++
++void foo(int a, int *p) {
++    *p = a;
++}
++
++void verify (int a) {
++    if (a != 3)
++        abort ();
++}
++
++int main() {
++    int a = 0;
++    foo (3, &a);
++    int tmp = (a > 7) ? a & 1 : a;
++    verify (tmp);
++    return 0;
++}
+-- 
+2.33.0
+
diff --git a/0301-Add-required-check-for-iteration-through-uses.patch b/0301-Add-required-check-for-iteration-through-uses.patch
new file mode 100644
index 0000000000000000000000000000000000000000..105f4f75616777a5f8e3437645f41c75bc7b5d2b
--- /dev/null
+++ b/0301-Add-required-check-for-iteration-through-uses.patch
@@ -0,0 +1,33 @@
+From ca24d352e98e357f4f7b8f0d262201765705a08a Mon Sep 17 00:00:00 2001
+From: Pronin Alexander <pronin.alexander@huawei.com>
+Date: Thu, 31 Oct 2024 16:31:33 +0800
+Subject: [PATCH 4/6] Add required check for iteration through uses
+
+Signed-off-by: Pronin Alexander  <pronin.alexander@huawei.com>
+---
+ gcc/tree-ssa-math-opts.cc | 9 +++++++--
+ 1 file changed, 7 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/tree-ssa-math-opts.cc b/gcc/tree-ssa-math-opts.cc
+index 2c06b8a60..80c06fa01 100644
+--- a/gcc/tree-ssa-math-opts.cc
++++ b/gcc/tree-ssa-math-opts.cc
+@@ -4938,8 +4938,13 @@ convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt)
+ 
+   /* Find the mult low part getter.  */
+   FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match[3])
+-    if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
+-      break;
++    {
++      if (!is_gimple_assign (use_stmt))
++	continue;
++
++      if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
++	break;
++    }
+ 
+   /* Create high and low (if needed) parts extractors.  */
+   /* Low part.  */
+-- 
+2.33.0
+
diff --git a/0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch b/0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch
new file mode 100644
index 0000000000000000000000000000000000000000..da25a9e25c950f625e6e27963f2ab5c54f33d32f
--- /dev/null
+++ b/0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch
@@ -0,0 +1,158 @@
+From 210147e28d542a03588ba3c3fa473301a03bb687 Mon Sep 17 00:00:00 2001
+From: Gmyrikov Konstantin <gmyrikov.konstantin@huawei-partners.com>
+Date: Thu, 31 Oct 2024 16:45:15 +0800
+Subject: [PATCH 6/6] Added param for optimization for merging bb's with cheap
+ insns.Zero param means turned off optimization(default implementation),One
+ means turned on
+
+Signed-off-by: Gmyrikov Konstantin  <gmyrikov.konstantin@huawei-partners.com>
+---
+ gcc/params.opt                  |  4 +++
+ gcc/testsuite/gcc.dg/if_comb1.c | 13 +++++++++
+ gcc/testsuite/gcc.dg/if_comb2.c | 13 +++++++++
+ gcc/testsuite/gcc.dg/if_comb3.c | 12 +++++++++
+ gcc/tree-ssa-ifcombine.cc       | 47 ++++++++++++++++++++++++++++++---
+ 5 files changed, 86 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/if_comb1.c
+ create mode 100644 gcc/testsuite/gcc.dg/if_comb2.c
+ create mode 100644 gcc/testsuite/gcc.dg/if_comb3.c
+
+diff --git a/gcc/params.opt b/gcc/params.opt
+index fc700ab79..3ddfaf5b2 100644
+--- a/gcc/params.opt
++++ b/gcc/params.opt
+@@ -789,6 +789,10 @@ Maximum number of VALUEs handled during a single find_base_term call.
+ Common Joined UInteger Var(param_max_vrp_switch_assertions) Init(10) Param Optimization
+ Maximum number of assertions to add along the default edge of a switch statement during VRP.
+ 
++-param=merge-assign-stmts-ifcombine=
++Common Joined UInteger Var(param_merge_assign_stmts_ifcombine) Init(0) IntegerRange(0, 1) Param Optimization
++Whether bb's with cheap gimple_assign stmts should be merged in the ifcombine pass.
++
+ -param=min-crossjump-insns=
+ Common Joined UInteger Var(param_min_crossjump_insns) Init(5) IntegerRange(1, 65536) Param Optimization
+ The minimum number of matching instructions to consider for crossjumping.
+diff --git a/gcc/testsuite/gcc.dg/if_comb1.c b/gcc/testsuite/gcc.dg/if_comb1.c
+new file mode 100644
+index 000000000..e00adc37d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/if_comb1.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */
++
++int foo (double a, double b, int c)
++{
++    if (c < 10 || a - b > 1.0)
++        return 0;
++    else 
++        return 1;
++}
++
++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */
++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */
+diff --git a/gcc/testsuite/gcc.dg/if_comb2.c b/gcc/testsuite/gcc.dg/if_comb2.c
+new file mode 100644
+index 000000000..176e7e726
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/if_comb2.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */
++
++int foo (int a, int b, int c)
++{
++    if (a > 1 || b * c < 10)
++        return 0;
++    else 
++        return 1;
++}
++
++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */
++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */
+diff --git a/gcc/testsuite/gcc.dg/if_comb3.c b/gcc/testsuite/gcc.dg/if_comb3.c
+new file mode 100644
+index 000000000..aa2e4510c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/if_comb3.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast -S --param=merge-assign-stmts-ifcombine=1 -fdump-tree-ifcombine" } */
++
++int foo (int a, int b, int c)
++{
++    if (a > 1 && b + c < 10)
++        a++;
++    return a;
++}
++
++/* { dg-final { scan-tree-dump "optimizing two comparisons" "ifcombine"} } */
++/* { dg-final { scan-tree-dump "Merging blocks" "ifcombine"} } */
+diff --git a/gcc/tree-ssa-ifcombine.cc b/gcc/tree-ssa-ifcombine.cc
+index ce9bbebf9..264a8bcae 100644
+--- a/gcc/tree-ssa-ifcombine.cc
++++ b/gcc/tree-ssa-ifcombine.cc
+@@ -110,6 +110,18 @@ recognize_if_then_else (basic_block cond_bb,
+   return true;
+ }
+ 
++/* Verify if gimple insn cheap for param=merge-assign-stmts-ifcombine
++   optimization.  */
++
++bool is_insn_cheap (enum tree_code t)
++{
++  static enum tree_code cheap_insns[] = {MULT_EXPR, PLUS_EXPR, MINUS_EXPR};
++  for (int i = 0; i < sizeof (cheap_insns)/sizeof (enum tree_code); i++)
++    if (t == cheap_insns[i])
++      return 1;
++  return 0;
++}
++
+ /* Verify if the basic block BB does not have side-effects.  Return
+    true in this case, else false.  */
+ 
+@@ -572,9 +584,38 @@ ifcombine_ifandif (basic_block inner_cond_bb, bool inner_inv,
+ 	      = param_logical_op_non_short_circuit;
+ 	  if (!logical_op_non_short_circuit || sanitize_coverage_p ())
+ 	    return false;
+-	  /* Only do this optimization if the inner bb contains only the conditional. */
+-	  if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb (inner_cond_bb)))
+-	    return false;
++	  if (param_merge_assign_stmts_ifcombine)
++	    {
++	      int number_cheap_insns = 0;
++	      int number_conds = 0;
++	      for (auto i = gsi_start_nondebug_after_labels_bb
++	           (outer_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i))
++	        if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN
++	            && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i))))
++	          number_cheap_insns++;
++	        else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND)
++	          number_conds++;
++	      for (auto i = gsi_start_nondebug_after_labels_bb
++	           (inner_cond_bb); !gsi_end_p (i); gsi_next_nondebug (&i))
++	        if (gimple_code (gsi_stmt (i)) == GIMPLE_ASSIGN
++	            && is_insn_cheap (gimple_assign_rhs_code (gsi_stmt (i))))
++	          number_cheap_insns++;
++	        else if (gimple_code (gsi_stmt (i)) == GIMPLE_COND)
++	          number_conds++;
++	      if (!(number_cheap_insns == 1 && number_conds == 2)
++	          && !gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb
++	          (inner_cond_bb)))
++	        return false;
++	    }
++	  else
++	    {
++	    /* Only do this optimization if the inner bb contains
++	    only the conditional.  */
++	      if (!gsi_one_before_end_p (gsi_start_nondebug_after_labels_bb
++	          (inner_cond_bb)))
++	        return false;
++	    }
++
+ 	  t1 = fold_build2_loc (gimple_location (inner_cond),
+ 				inner_cond_code,
+ 				boolean_type_node,
+-- 
+2.33.0
+
diff --git a/gcc.spec b/gcc.spec
index a49e701513400ce5e6ff9a139ae57e5fe108f2ec..449ea3ba4e26e54261920249341f2bc80a5f2633 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -2,7 +2,7 @@
 %global gcc_major 12
 # Note, gcc_release must be integer, if you want to add suffixes to
 # %%{release}, append them after %%{gcc_release} on Release: line.
-%global gcc_release 37
+%global gcc_release 39
 
 %global _unpackaged_files_terminate_build 0
 %global _performance_build 1
@@ -203,187 +203,212 @@ Patch93: 0093-fix-bugs-within-pointer-compression-and-DFE.patch
 Patch94: 0094-BUGFIX-AutoBOLT-function-miss-bind-type.patch
 Patch95: 0095-STABS-remove-gstabs-and-gxcoff-functionality.patch
 Patch96: 0096-Bugfix-Autofdo-use-PMU-sampling-set-num-eauals-den.patch
-Patch97: 0097-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
-Patch98: 0098-Backport-SME-AArch64-Cleanup-option-processing-code.patch
-Patch99: 0099-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
-Patch100: 0100-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
-Patch101: 0101-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
-Patch102: 0102-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
-Patch103: 0103-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
-Patch104: 0104-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
-Patch105: 0105-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
-Patch106: 0106-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
-Patch107: 0107-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
-Patch108: 0108-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
-Patch109: 0109-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
-Patch110: 0110-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
-Patch111: 0111-Backport-SME-aarch64-Simplify-feature-definitions.patch
-Patch112: 0112-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
-Patch113: 0113-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
-Patch114: 0114-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
-Patch115: 0115-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
-Patch116: 0116-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
-Patch117: 0117-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
-Patch118: 0118-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
-Patch119: 0119-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
-Patch120: 0120-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
-Patch121: 0121-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
-Patch122: 0122-Backport-SME-aarch64-Commonise-some-folding-code.patch
-Patch123: 0123-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
-Patch124: 0124-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
-Patch125: 0125-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
-Patch126: 0126-Backport-SME-mode-switching-Add-note-problem.patch
-Patch127: 0127-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
-Patch128: 0128-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
-Patch129: 0129-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
-Patch130: 0130-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
-Patch131: 0131-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
-Patch132: 0132-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
-Patch133: 0133-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
-Patch134: 0134-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
-Patch135: 0135-Backport-SME-mode-switching-Add-a-target-configurabl.patch
-Patch136: 0136-Backport-SME-mode-switching-Add-a-backprop-hook.patch
-Patch137: 0137-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
-Patch138: 0138-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
-Patch139: 0139-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
-Patch140: 0140-Backport-SME-function-Change-return-type-of-predicat.patch
-Patch141: 0141-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
-Patch142: 0142-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
-Patch143: 0143-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
-Patch144: 0144-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
-Patch145: 0145-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
-Patch146: 0146-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
-Patch147: 0147-Backport-SME-recog-Support-space-in-cons.patch
-Patch148: 0148-Backport-SME-aarch64-Generalise-require_immediate_la.patch
-Patch149: 0149-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
-Patch150: 0150-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
-Patch151: 0151-Backport-SME-aarch64-Simplify-output-template-emissi.patch
-Patch152: 0152-Backport-SME-Improve-immediate-expansion-PR106583.patch
-Patch153: 0153-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
-Patch154: 0154-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
-Patch155: 0155-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
-Patch156: 0156-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
-Patch157: 0157-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
-Patch158: 0158-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
-Patch159: 0159-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
-Patch160: 0160-Backport-SME-aarch64-Replace-vague-previous-argument.patch
-Patch161: 0161-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
-Patch162: 0162-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
-Patch163: 0163-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
-Patch164: 0164-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
-Patch165: 0165-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
-Patch166: 0166-Backport-SME-aarch64-Fix-plugin-header-install.patch
-Patch167: 0167-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
-Patch168: 0168-Backport-SME-aarch64-Add-sme.patch
-Patch169: 0169-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
-Patch170: 0170-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
-Patch171: 0171-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
-Patch172: 0172-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
-Patch173: 0173-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
-Patch174: 0174-Backport-SME-AArch64-Support-new-tbranch-optab.patch
-Patch175: 0175-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
-Patch176: 0176-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
-Patch177: 0177-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
-Patch178: 0178-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
-Patch179: 0179-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
-Patch180: 0180-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
-Patch181: 0181-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
-Patch182: 0182-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
-Patch183: 0183-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
-Patch184: 0184-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
-Patch185: 0185-Backport-SME-aarch64-Tweak-frame_size-comment.patch
-Patch186: 0186-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
-Patch187: 0187-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
-Patch188: 0188-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
-Patch189: 0189-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
-Patch190: 0190-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
-Patch191: 0191-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
-Patch192: 0192-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
-Patch193: 0193-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
-Patch194: 0194-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
-Patch195: 0195-Backport-SME-Handle-epilogues-that-contain-jumps.patch
-Patch196: 0196-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
-Patch197: 0197-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
-Patch198: 0198-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
-Patch199: 0199-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
-Patch200: 0200-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
-Patch201: 0201-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
-Patch202: 0202-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
-Patch203: 0203-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
-Patch204: 0204-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
-Patch205: 0205-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
-Patch206: 0206-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
-Patch207: 0207-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
-Patch208: 0208-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
-Patch209: 0209-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
-Patch210: 0210-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
-Patch211: 0211-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
-Patch212: 0212-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
-Patch213: 0213-Backport-SME-libgcc-Fix-config.in.patch
-Patch214: 0214-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
-Patch215: 0215-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
-Patch216: 0216-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
-Patch217: 0217-Backport-SME-aarch64-Add-V1DI-mode.patch
-Patch218: 0218-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
-Patch219: 0219-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
-Patch220: 0220-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
-Patch221: 0221-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
-Patch222: 0222-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
-Patch223: 0223-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
-Patch224: 0224-SME-Add-missing-header-file-in-aarch64.cc.patch
-Patch225: 0225-Backport-SME-c-Add-support-for-__extension__.patch
-Patch226: 0226-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
-Patch227: 0227-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
-Patch228: 0228-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
-Patch229: 0229-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
-Patch230: 0230-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
-Patch231: 0231-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
-Patch232: 0232-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
-Patch233: 0233-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
-Patch234: 0234-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
-Patch235: 0235-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
-Patch236: 0236-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
-Patch237: 0237-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
-Patch238: 0238-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
-Patch239: 0239-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
-Patch240: 0240-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
-Patch241: 0241-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
-Patch242: 0242-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
-Patch243: 0243-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
-Patch244: 0244-SME-Adapt-some-testsuites.patch
-Patch245: 0245-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
-Patch246: 0246-aarch64-Fix-return-register-handling-in-untyped_call.patch
-Patch247: 0247-aarch64-Fix-loose-ldpstp-check.patch
-Patch248: 0248-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
-Patch249: 0249-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
-Patch250: 0250-Make-option-mvzeroupper-independent-of-optimization-.patch
-Patch251: 0251-i386-Sync-tune_string-with-arch_string-for-target-at.patch
-Patch252: 0252-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
-Patch253: 0253-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
-Patch254: 0254-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
-Patch255: 0255-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
-Patch256: 0256-Software-mitigation-Disable-gather-generation-in-vec.patch
-Patch257: 0257-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
-Patch258: 0258-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
-Patch259: 0259-Disparage-slightly-for-the-alternative-which-move-DF.patch
-Patch260: 0260-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
-Patch261: 0261-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
-Patch262: 0262-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
-Patch263: 0263-Initial-Raptorlake-Support.patch
-Patch264: 0264-Initial-Meteorlake-Support.patch
-Patch265: 0265-Support-Intel-AMX-FP16-ISA.patch
-Patch266: 0266-Support-Intel-prefetchit0-t1.patch
-Patch267: 0267-Initial-Granite-Rapids-Support.patch
-Patch268: 0268-Support-Intel-AMX-COMPLEX.patch
-Patch269: 0269-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
-Patch270: 0270-Initial-Granite-Rapids-D-Support.patch
-Patch271: 0271-Correct-Granite-Rapids-D-documentation.patch
-Patch272: 0272-i386-Remove-Meteorlake-s-family_model.patch
-Patch273: 0273-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
-Patch274: 0274-x86-Update-model-values-for-Raptorlake.patch
-Patch275: 0275-Fix-target_clone-arch-graniterapids-d.patch
-Patch276: 0276-i386-Change-prefetchi-output-template.patch
-Patch277: 0277-i386-Add-non-optimize-prefetchi-intrins.patch
+Patch97: 0097-Improve-non-loop-disambiguation.patch
+Patch98: 0098-CHREC-multiplication-and-undefined-overflow.patch
+Patch99: 0099-Enable-Transposed-SLP.patch
+Patch100: 0100-Add-hip09-machine-discribtion.patch
+Patch101: 0101-Add-hip11-CPU-pipeline-scheduling.patch
+Patch102: 0102-Add-Crc32-Optimization-in-Gzip-For-crc32-algorithm-i.patch
+Patch103: 0103-SME-Remove-hip09-and-hip11-in-aarch64-cores.def-to-b.patch
+Patch104: 0104-Backport-SME-AArch64-Cleanup-CPU-option-processing-c.patch
+Patch105: 0105-Backport-SME-AArch64-Cleanup-option-processing-code.patch
+Patch106: 0106-Backport-SME-aarch64-Add-march-support-for-Armv9.1-A.patch
+Patch107: 0107-Backport-SME-Revert-aarch64-Define-__ARM_FEATURE_RCP.patch
+Patch108: 0108-Backport-SME-Revert-Ampere-1-and-Ampere-1A-core-defi.patch
+Patch109: 0109-Backport-SME-aarch64-Rename-AARCH64_ISA-architecture.patch
+Patch110: 0110-Backport-SME-aarch64-Rename-AARCH64_FL-architecture-.patch
+Patch111: 0111-Backport-SME-aarch64-Rename-AARCH64_FL_FOR_ARCH-macr.patch
+Patch112: 0112-Backport-SME-aarch64-Add-V-to-aarch64-arches.def-nam.patch
+Patch113: 0113-Backport-SME-aarch64-Small-config.gcc-cleanups.patch
+Patch114: 0114-Backport-SME-aarch64-Avoid-redundancy-in-aarch64-cor.patch
+Patch115: 0115-Backport-SME-aarch64-Remove-AARCH64_FL_RCPC8_4-PR107.patch
+Patch116: 0116-Backport-SME-aarch64-Fix-transitive-closure-of-featu.patch
+Patch117: 0117-Backport-SME-aarch64-Reorder-an-entry-in-aarch64-opt.patch
+Patch118: 0118-Backport-SME-aarch64-Simplify-feature-definitions.patch
+Patch119: 0119-Backport-SME-aarch64-Simplify-generation-of-.arch-st.patch
+Patch120: 0120-Backport-SME-aarch64-Avoid-std-string-in-static-data.patch
+Patch121: 0121-Backport-SME-aarch64-Tweak-constness-of-option-relat.patch
+Patch122: 0122-Backport-SME-aarch64-Make-more-use-of-aarch64_featur.patch
+Patch123: 0123-Backport-SME-aarch64-Tweak-contents-of-flags_on-off-.patch
+Patch124: 0124-Backport-SME-aarch64-Tweak-handling-of-mgeneral-regs.patch
+Patch125: 0125-Backport-SME-aarch64-Remove-redundant-TARGET_-checks.patch
+Patch126: 0126-Backport-SME-aarch64-Define-__ARM_FEATURE_RCPC.patch
+Patch127: 0127-Backport-SME-Add-Ampere-1-and-Ampere-1A-core-definit.patch
+Patch128: 0128-Backport-SME-aarch64-Fix-nosimd-handling-of-FPR-move.patch
+Patch129: 0129-Backport-SME-aarch64-Commonise-some-folding-code.patch
+Patch130: 0130-Backport-SME-aarch64-Add-a-Z-operand-modifier-for-SV.patch
+Patch131: 0131-Backport-SME-mode-switching-Remove-unused-bbnum-fiel.patch
+Patch132: 0132-Backport-SME-mode-switching-Tweak-the-macro-hook-doc.patch
+Patch133: 0133-Backport-SME-mode-switching-Add-note-problem.patch
+Patch134: 0134-Backport-SME-mode-switching-Avoid-quadractic-list-op.patch
+Patch135: 0135-Backport-SME-mode-switching-Fix-the-mode-passed-to-t.patch
+Patch136: 0136-Backport-SME-mode-switching-Simplify-recording-of-tr.patch
+Patch137: 0137-Backport-SME-mode-switching-Tweak-entry-exit-handlin.patch
+Patch138: 0138-Backport-SME-mode-switching-Allow-targets-to-set-the.patch
+Patch139: 0139-Backport-SME-mode-switching-Pass-set-of-live-registe.patch
+Patch140: 0140-Backport-SME-mode-switching-Pass-the-set-of-live-reg.patch
+Patch141: 0141-Backport-SME-mode-switching-Use-1-based-edge-aux-fie.patch
+Patch142: 0142-Backport-SME-mode-switching-Add-a-target-configurabl.patch
+Patch143: 0143-Backport-SME-mode-switching-Add-a-backprop-hook.patch
+Patch144: 0144-Backport-SME-aarch64-Add-a-result_mode-helper-functi.patch
+Patch145: 0145-Backport-SME-rtl-Try-to-remove-EH-edges-after-pro-ep.patch
+Patch146: 0146-Backport-SME-Fix-PR-middle-end-107705-ICE-after-recl.patch
+Patch147: 0147-Backport-SME-function-Change-return-type-of-predicat.patch
+Patch148: 0148-Backport-SME-Allow-prologues-and-epilogues-to-be-ins.patch
+Patch149: 0149-Backport-SME-Add-a-target-hook-for-sibcall-epilogues.patch
+Patch150: 0150-Backport-SME-Add-a-new-target-hook-TARGET_START_CALL.patch
+Patch151: 0151-Backport-SME-Allow-targets-to-add-USEs-to-asms.patch
+Patch152: 0152-Backport-SME-New-compact-syntax-for-insn-and-insn_sp.patch
+Patch153: 0153-Backport-SME-recog-Improve-parser-for-pattern-new-co.patch
+Patch154: 0154-Backport-SME-recog-Support-space-in-cons.patch
+Patch155: 0155-Backport-SME-aarch64-Generalise-require_immediate_la.patch
+Patch156: 0156-Backport-SME-aarch64-Add-backend-support-for-DFP.patch
+Patch157: 0157-Backport-SME-aarch64-Vector-move-fixes-for-nosimd.patch
+Patch158: 0158-Backport-SME-aarch64-Simplify-output-template-emissi.patch
+Patch159: 0159-Backport-SME-Improve-immediate-expansion-PR106583.patch
+Patch160: 0160-Backport-SME-AArch64-Cleanup-move-immediate-code.patch
+Patch161: 0161-Backport-SME-AArch64-convert-some-patterns-to-compac.patch
+Patch162: 0162-Backport-SME-aarch64-Use-SVE-s-RDVL-instruction.patch
+Patch163: 0163-Backport-SME-aarch64-Make-AARCH64_FL_SVE-requirement.patch
+Patch164: 0164-Backport-SME-aarch64-Add-group-suffixes-to-SVE-intri.patch
+Patch165: 0165-Backport-SME-aarch64-Add-sve_type-to-SVE-builtins-co.patch
+Patch166: 0166-Backport-SME-aarch64-Generalise-some-SVE-ACLE-error-.patch
+Patch167: 0167-Backport-SME-aarch64-Replace-vague-previous-argument.patch
+Patch168: 0168-Backport-SME-aarch64-Make-more-use-of-sve_type-in-AC.patch
+Patch169: 0169-Backport-SME-aarch64-Tweak-error-message-for-tuple-v.patch
+Patch170: 0170-Backport-SME-aarch64-Add-tuple-forms-of-svreinterpre.patch
+Patch171: 0171-Backport-SME-attribs-Use-existing-traits-for-excl_ha.patch
+Patch172: 0172-Backport-SME-Allow-target-attributes-in-non-gnu-name.patch
+Patch173: 0173-Backport-SME-aarch64-Fix-plugin-header-install.patch
+Patch174: 0174-Backport-SME-aarch64-Add-arm_streaming-_compatible-a.patch
+Patch175: 0175-Backport-SME-aarch64-Add-sme.patch
+Patch176: 0176-Backport-SME-aarch64-Add-r-m-and-m-r-alternatives-to.patch
+Patch177: 0177-Backport-SME-AArch64-Rewrite-simd-move-immediate-pat.patch
+Patch178: 0178-Backport-SME-AArch64-remove-test-comment-from-mov-mo.patch
+Patch179: 0179-Backport-SME-aarch64-Distinguish-streaming-compatibl.patch
+Patch180: 0180-Backport-SME-aarch64-Mark-relevant-SVE-instructions-.patch
+Patch181: 0181-Backport-SME-AArch64-Support-new-tbranch-optab.patch
+Patch182: 0182-Backport-SME-aarch64-Use-local-frame-vars-in-shrink-.patch
+Patch183: 0183-Backport-SME-aarch64-Avoid-a-use-of-callee_offset.patch
+Patch184: 0184-Backport-SME-aarch64-Explicitly-handle-frames-with-n.patch
+Patch185: 0185-Backport-SME-aarch64-Add-bytes_below_saved_regs-to-f.patch
+Patch186: 0186-Backport-SME-aarch64-Add-bytes_below_hard_fp-to-fram.patch
+Patch187: 0187-Backport-SME-aarch64-Robustify-stack-tie-handling.patch
+Patch188: 0188-Backport-SME-aarch64-Tweak-aarch64_save-restore_call.patch
+Patch189: 0189-Backport-SME-aarch64-Only-calculate-chain_offset-if-.patch
+Patch190: 0190-Backport-SME-aarch64-Rename-locals_offset-to-bytes_a.patch
+Patch191: 0191-Backport-SME-aarch64-Rename-hard_fp_offset-to-bytes_.patch
+Patch192: 0192-Backport-SME-aarch64-Tweak-frame_size-comment.patch
+Patch193: 0193-Backport-SME-aarch64-Measure-reg_offset-from-the-bot.patch
+Patch194: 0194-Backport-SME-aarch64-Simplify-top-of-frame-allocatio.patch
+Patch195: 0195-Backport-SME-aarch64-Minor-initial-adjustment-tweak.patch
+Patch196: 0196-Backport-SME-aarch64-Tweak-stack-clash-boundary-cond.patch
+Patch197: 0197-Backport-SME-aarch64-Put-LR-save-probe-in-first-16-b.patch
+Patch198: 0198-Backport-SME-aarch64-Simplify-probe-of-final-frame-a.patch
+Patch199: 0199-Backport-SME-aarch64-Explicitly-record-probe-registe.patch
+Patch200: 0200-Backport-SME-aarch64-Remove-below_hard_fp_saved_regs.patch
+Patch201: 0201-Backport-SME-aarch64-Make-stack-smash-canary-protect.patch
+Patch202: 0202-Backport-SME-Handle-epilogues-that-contain-jumps.patch
+Patch203: 0203-Backport-SME-aarch64-Use-vecs-to-store-register-save.patch
+Patch204: 0204-Backport-SME-aarch64-Put-LR-save-slot-first-in-more-.patch
+Patch205: 0205-Backport-SME-aarch64-Switch-PSTATE.SM-around-calls.patch
+Patch206: 0206-Backport-SME-aarch64-Add-support-for-SME-ZA-attribut.patch
+Patch207: 0207-Backport-SME-aarch64-Add-a-register-class-for-w12-w1.patch
+Patch208: 0208-Backport-SME-aarch64-Add-a-VNx1TI-mode.patch
+Patch209: 0209-Backport-SME-aarch64-Generalise-unspec_based_functio.patch
+Patch210: 0210-Backport-SME-aarch64-Generalise-_m-rules-for-SVE-int.patch
+Patch211: 0211-Backport-SME-aarch64-Add-support-for-arm_sme.h.patch
+Patch212: 0212-Backport-SME-aarch64-Add-support-for-__arm_locally_s.patch
+Patch213: 0213-Backport-SME-aarch64-Handle-PSTATE.SM-across-abnorma.patch
+Patch214: 0214-Backport-SME-aarch64-Enforce-inlining-restrictions-f.patch
+Patch215: 0215-Backport-SME-aarch64-Update-sibcall-handling-for-SME.patch
+Patch216: 0216-Backport-SME-libgcc-aarch64-Configure-check-for-.var.patch
+Patch217: 0217-Backport-SME-libgcc-aarch64-Configure-check-for-__ge.patch
+Patch218: 0218-Backport-SME-libgcc-aarch64-Add-SME-runtime-support.patch
+Patch219: 0219-Backport-SME-libgcc-aarch64-Add-SME-unwinder-support.patch
+Patch220: 0220-Backport-SME-libgcc-Fix-config.in.patch
+Patch221: 0221-Backport-SME-aarch64-Add-funwind-tables-to-some-test.patch
+Patch222: 0222-Backport-SME-aarch64-Skip-some-SME-register-save-tes.patch
+Patch223: 0223-Backport-SME-Add-OPTIONS_H_EXTRA-to-GTFILES.patch
+Patch224: 0224-Backport-SME-aarch64-Add-V1DI-mode.patch
+Patch225: 0225-Backport-SME-Allow-md-iterators-to-include-other-ite.patch
+Patch226: 0226-Backport-SME-riscv-Add-support-for-strlen-inline-exp.patch
+Patch227: 0227-Backport-SME-attribs-Add-overloads-with-namespace-na.patch
+Patch228: 0228-Backport-SME-vec-Add-array_slice-constructors-from-n.patch
+Patch229: 0229-Backport-SME-A-couple-of-va_gc_atomic-tweaks.patch
+Patch230: 0230-Backport-SME-middle-end-Fix-issue-of-poly_uint16-1-1.patch
+Patch231: 0231-SME-Add-missing-header-file-in-aarch64.cc.patch
+Patch232: 0232-Backport-SME-c-Add-support-for-__extension__.patch
+Patch233: 0233-Backport-SME-lra-Updates-of-biggest-mode-for-hard-re.patch
+Patch234: 0234-Backport-SME-c-Support-C2x-empty-initializer-braces.patch
+Patch235: 0235-Backport-SME-aarch64-Update-sizeless-tests-for-recen.patch
+Patch236: 0236-Backport-SME-attribs-Namespace-aware-lookup_attribut.patch
+Patch237: 0237-Backport-SME-c-family-ICE-with-gnu-nocf_check-PR1069.patch
+Patch238: 0238-Backport-SME-AArch64-Fix-assert-in-aarch64_move_imm-.patch
+Patch239: 0239-Backport-SME-testsuite-Only-run-fcf-protection-test-.patch
+Patch240: 0240-Backport-SME-Fix-PRs-106764-106765-and-107307-all-IC.patch
+Patch241: 0241-Backport-SME-aarch64-Remove-expected-error-for-compo.patch
+Patch242: 0242-Backport-SME-aarch64-Remove-redundant-builtins-code.patch
+Patch243: 0243-Backport-SME-AArch64-Fix-Armv9-a-warnings-that-get-e.patch
+Patch244: 0244-Backport-SME-Canonicalize-X-Y-as-X-Y-in-match.pd-whe.patch
+Patch245: 0245-Backport-SME-middle-end-Add-new-tbranch-optab-to-add.patch
+Patch246: 0246-Backport-SME-explow-Allow-dynamic-allocations-after-.patch
+Patch247: 0247-Backport-SME-PR105169-Fix-references-to-discarded-se.patch
+Patch248: 0248-Backport-SME-RISC-V-autovec-Verify-that-GET_MODE_NUN.patch
+Patch249: 0249-Backport-SME-Add-operator-to-gimple_stmt_iterator-an.patch
+Patch250: 0250-Backport-SME-tree-optimization-110221-SLP-and-loop-m.patch
+Patch251: 0251-SME-Adapt-some-testsuites.patch
+Patch252: 0252-SME-Fix-error-by-backported-patches-and-IPA-prefetch.patch
+Patch253: 0253-aarch64-Fix-return-register-handling-in-untyped_call.patch
+Patch254: 0254-aarch64-Fix-loose-ldpstp-check.patch
+Patch255: 0255-x86-Add-a-new-option-mdaz-ftz-to-enable-FTZ-and-DAZ-.patch
+Patch256: 0256-Explicitly-view_convert_expr-mask-to-signed-type-whe.patch
+Patch257: 0257-Make-option-mvzeroupper-independent-of-optimization-.patch
+Patch258: 0258-i386-Sync-tune_string-with-arch_string-for-target-at.patch
+Patch259: 0259-Refine-maskloadmn-pattern-with-UNSPEC_MASKLOAD.patch
+Patch260: 0260-Refine-maskstore-patterns-with-UNSPEC_MASKMOV.patch
+Patch261: 0261-x86-Update-model-values-for-Alderlake-and-Rocketlake.patch
+Patch262: 0262-Workaround-possible-CPUID-bug-in-Sandy-Bridge.patch
+Patch263: 0263-Software-mitigation-Disable-gather-generation-in-vec.patch
+Patch264: 0264-Support-m-no-gather-m-no-scatter-to-enable-disable-v.patch
+Patch265: 0265-Remove-constraint-modifier-for-fcmaddcph-fmaddcph-fc.patch
+Patch266: 0266-Disparage-slightly-for-the-alternative-which-move-DF.patch
+Patch267: 0267-Fix-wrong-code-due-to-vec_merge-pcmp-to-blendvb-spli.patch
+Patch268: 0268-Don-t-assume-it-s-AVX_U128_CLEAN-after-call_insn-who.patch
+Patch269: 0269-Disable-FMADD-in-chains-for-Zen4-and-generic.patch
+Patch270: 0270-Initial-Raptorlake-Support.patch
+Patch271: 0271-Initial-Meteorlake-Support.patch
+Patch272: 0272-Support-Intel-AMX-FP16-ISA.patch
+Patch273: 0273-Support-Intel-prefetchit0-t1.patch
+Patch274: 0274-Initial-Granite-Rapids-Support.patch
+Patch275: 0275-Support-Intel-AMX-COMPLEX.patch
+Patch276: 0276-i386-Add-AMX-COMPLEX-to-Granite-Rapids.patch
+Patch277: 0277-Initial-Granite-Rapids-D-Support.patch
+Patch278: 0278-Correct-Granite-Rapids-D-documentation.patch
+Patch279: 0279-i386-Remove-Meteorlake-s-family_model.patch
+Patch280: 0280-x86-Update-model-values-for-Alderlake-Rocketlake-and.patch
+Patch281: 0281-x86-Update-model-values-for-Raptorlake.patch
+Patch282: 0282-Fix-target_clone-arch-graniterapids-d.patch
+Patch283: 0283-i386-Change-prefetchi-output-template.patch
+Patch284: 0284-i386-Add-non-optimize-prefetchi-intrins.patch
+Patch285: 0285-SME-Recover-hip09-and-hip11-in-aarch64-cores.def.patch
+Patch286: 0286-Try-to-use-AI-model-to-guide-optimization.patch
+Patch287: 0287-Add-dynamic-memory-access-checks.patch
+Patch288: 0288-Enable-macro-use-commandline.patch
+Patch289: 0289-tree-ssa-loop-crc.cc-TARGET_CRC32-may-be-not-defined.patch
+Patch290: 0290-Add-ipa-prefetch-test-for-gcc-s-case.patch
+Patch291: 0291-Fix-settings-for-wide-operations-tests.patch
+Patch292: 0292-Fix-errors-in-ipa-prefetch-IAORPF-and-IAOSJ0.patch
+Patch293: 0293-Fix-error-with-stmts-insertion-in-ipa-prefetch-for-I.patch
+Patch294: 0294-Fix-errors-in-ipa-prefetch-IAO50J-and-IAO5H7.patch
+Patch295: 0295-Fix-error-with-grouped_load-merge-in-slp-transpose-v.patch
+Patch296: 0296-Fix-error-in-slp-transpose-vectorize-for-IAQFM3.patch
+Patch297: 0297-Fix-grouped-load-merging-error-in-SLP-transpose-vectorization.patch
+Patch298: 0298-Mark-prefetch-builtin-as-willreturn.patch
+Patch299: 0299-Backport-Disallow-pointer-operands-for-and-partly-PR.patch
+Patch300: 0300-Remove-erroneous-pattern-from-gimple-ifcvt.patch
+Patch301: 0301-Add-required-check-for-iteration-through-uses.patch
+Patch302: 0302-Added-param-for-optimization-for-merging-bb-s-with-c.patch
 
 
 # Part 3000 ~ 4999
@@ -1425,6 +1450,31 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch275 -p1
 %patch276 -p1
 %patch277 -p1
+%patch278 -p1
+%patch279 -p1
+%patch280 -p1
+%patch281 -p1
+%patch282 -p1
+%patch283 -p1
+%patch284 -p1
+%patch285 -p1
+%patch286 -p1
+%patch287 -p1
+%patch288 -p1
+%patch289 -p1
+%patch290 -p1
+%patch291 -p1
+%patch292 -p1
+%patch293 -p1
+%patch294 -p1
+%patch295 -p1
+%patch296 -p1
+%patch297 -p1
+%patch298 -p1
+%patch299 -p1
+%patch300 -p1
+%patch301 -p1
+%patch302 -p1
 
 
 %ifarch loongarch64
@@ -4014,6 +4064,18 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Thu Nov 21 2024 huangzifeng <huangzifeng6@huawei.com> - 12.3.1-39
+- Type:Sync
+- ID:NA
+- SUG:NA
+- DESC:Sync patches from openeuler/gcc
+
+* Thu Nov 21 2024 huangzifeng <huangzifeng6@huawei.com> - 12.3.1-38
+- Type:Sync
+- ID:NA
+- SUG:NA
+- DESC:Sync patches from branch openEuler-24.09
+
 * Wed Nov 20 2024 Hu,Lin1 <lin1.hu@inte.com> - 12.3.1-37
 - Type:Sync
 - ID:NA