diff --git a/0035-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch b/0035-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
new file mode 100644
index 0000000000000000000000000000000000000000..fe0b533c5a1f7ac0e89473948706ea2e13b3ab2e
--- /dev/null
+++ b/0035-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
@@ -0,0 +1,342 @@
+From cf0f086ec274d794a2a180047123920bf8a5224b Mon Sep 17 00:00:00 2001
+From: dingguangya <dingguangya1@huawei.com>
+Date: Mon, 17 Jan 2022 21:03:47 +0800
+Subject: [PATCH 01/12] [ccmp] Add another optimization opportunity for ccmp
+ instruction
+
+Add flag -fccmp2.
+Enables the use of the ccmp instruction by creating a new conflict
+relationship for instances where temporary expressions replacement
+cannot be effectively created.
+---
+ gcc/ccmp.c                                |  33 ++++
+ gcc/ccmp.h                                |   1 +
+ gcc/common.opt                            |   4 +
+ gcc/testsuite/gcc.target/aarch64/ccmp_3.c |  15 ++
+ gcc/tree-ssa-coalesce.c                   | 197 ++++++++++++++++++++++
+ 5 files changed, 250 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+
+diff --git a/gcc/ccmp.c b/gcc/ccmp.c
+index ca77375a9..8d2d73e52 100644
+--- a/gcc/ccmp.c
++++ b/gcc/ccmp.c
+@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "cfgexpand.h"
+ #include "ccmp.h"
+ #include "predict.h"
++#include "gimple-iterator.h"
+ 
+ /* Check whether T is a simple boolean variable or a SSA name
+    set by a comparison operator in the same basic block.  */
+@@ -129,6 +130,38 @@ ccmp_candidate_p (gimple *g)
+   return false;
+ }
+ 
++/* Check whether bb is a potential conditional compare candidate.  */
++bool
++check_ccmp_candidate (basic_block bb)
++{
++  gimple_stmt_iterator gsi;
++  gimple *bb_last_stmt, *stmt;
++  tree op0, op1;
++
++  gsi = gsi_last_bb (bb);
++  bb_last_stmt = gsi_stmt (gsi);
++
++  if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
++    {
++      op0 = gimple_cond_lhs (bb_last_stmt);
++      op1 = gimple_cond_rhs (bb_last_stmt);
++
++      if (TREE_CODE (op0) == SSA_NAME
++	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
++	  && TREE_CODE (op1) == INTEGER_CST
++	  && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
++	      || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
++	{
++	  stmt = SSA_NAME_DEF_STMT (op0);
++	  if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
++	    {
++	      return ccmp_candidate_p (stmt);
++	    }
++	}
++    }
++  return false;
++}
++
+ /* Extract the comparison we want to do from the tree.  */
+ void
+ get_compare_parts (tree t, int *up, rtx_code *rcode,
+diff --git a/gcc/ccmp.h b/gcc/ccmp.h
+index 199dd581d..ac862f0f6 100644
+--- a/gcc/ccmp.h
++++ b/gcc/ccmp.h
+@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3.  If not see
+ #define GCC_CCMP_H
+ 
+ extern rtx expand_ccmp_expr (gimple *, machine_mode);
++extern bool check_ccmp_candidate (basic_block bb);
+ 
+ #endif  /* GCC_CCMP_H  */
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 24834cf60..4dd566def 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1942,6 +1942,10 @@ fira-verbose=
+ Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
+ -fira-verbose=<number>	Control IRA's level of diagnostic messages.
+ 
++fccmp2
++Common Report Var(flag_ccmp2) Init(0) Optimization
++Optimize potential ccmp instruction in complex scenarios.
++
+ fivopts
+ Common Report Var(flag_ivopts) Init(1) Optimization
+ Optimize induction variables on trees.
+diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+new file mode 100644
+index 000000000..b509ba810
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O -fdump-rtl-expand-details -fccmp2" } */
++
++int func (int a, int b, int c)
++{
++  while(1)
++    {
++      if(a-- == 0 || b >= c)
++	{
++	  return 1;
++	}
++    }
++}
++
++/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
+diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c
+index 0b0b1b18d..e0120a4a4 100644
+--- a/gcc/tree-ssa-coalesce.c
++++ b/gcc/tree-ssa-coalesce.c
+@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3.  If not see
+ #include "explow.h"
+ #include "tree-dfa.h"
+ #include "stor-layout.h"
++#include "ccmp.h"
++#include "target.h"
++#include "tree-outof-ssa.h"
+ 
+ /* This set of routines implements a coalesce_list.  This is an object which
+    is used to track pairs of ssa_names which are desirable to coalesce
+@@ -854,6 +857,198 @@ live_track_clear_base_vars (live_track *ptr)
+   bitmap_clear (&ptr->live_base_var);
+ }
+ 
++/* Return true if gimple is a copy assignment.  */
++
++static inline bool
++gimple_is_assign_copy_p (gimple *gs)
++{
++  return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
++	  && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
++	  && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
++}
++
++#define MAX_CCMP_CONFLICT_NUM 5
++
++/* Clear high-cost conflict graphs.  */
++
++static void
++remove_high_cost_graph_for_ccmp (ssa_conflicts *conflict_graph)
++{
++  unsigned x = 0;
++  int add_conflict_num = 0;
++  bitmap b;
++  FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
++    {
++      if (b)
++	{
++	  add_conflict_num++;
++	}
++    }
++  if (add_conflict_num >= MAX_CCMP_CONFLICT_NUM)
++    {
++      conflict_graph->conflicts.release ();
++    }
++}
++
++/* Adding a new conflict graph to the original graph.  */
++
++static void
++process_add_graph (live_track *live, basic_block bb,
++		   ssa_conflicts *conflict_graph)
++{
++  tree use, def;
++  ssa_op_iter iter;
++  gimple *first_visit_stmt = NULL;
++  for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++       gsi_next (&gsi))
++    {
++      if (gimple_visited_p (gsi_stmt (gsi)))
++	{
++	  first_visit_stmt = gsi_stmt (gsi);
++	  break;
++	}
++    }
++  if (!first_visit_stmt)
++    return;
++
++  for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
++       gsi_stmt (gsi) != first_visit_stmt; gsi_prev (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++      if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
++	{
++	  continue;
++	}
++      if (gimple_is_assign_copy_p (stmt))
++	{
++	  live_track_clear_var (live, gimple_assign_rhs1 (stmt));
++	}
++      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
++	{
++	  live_track_process_def (live, def, conflict_graph);
++	}
++      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
++	{
++	  live_track_process_use (live, use);
++	}
++    }
++}
++
++/* Build a conflict graph based on ccmp candidate.  */
++
++static void
++add_ccmp_conflict_graph (ssa_conflicts *conflict_graph,
++			 tree_live_info_p liveinfo, var_map map, basic_block bb)
++{
++  live_track *live;
++  tree use, def;
++  ssa_op_iter iter;
++  live = new_live_track (map);
++  live_track_init (live, live_on_exit (liveinfo, bb));
++
++  gimple *last_stmt = gsi_stmt (gsi_last_bb (bb));
++  gcc_assert (gimple_cond_lhs (last_stmt));
++
++  auto_vec<tree> stack;
++  stack.safe_push (gimple_cond_lhs (last_stmt));
++  while (!stack.is_empty ())
++    {
++      tree op = stack.pop ();
++      gimple *op_stmt = SSA_NAME_DEF_STMT (op);
++      if (!op_stmt || gimple_bb (op_stmt) != bb
++	  || !is_gimple_assign (op_stmt)
++	  || !ssa_is_replaceable_p (op_stmt))
++	{
++	  continue;
++	}
++      if (gimple_is_assign_copy_p (op_stmt))
++	{
++	  live_track_clear_var (live, gimple_assign_rhs1 (op_stmt));
++	}
++      gimple_set_visited (op_stmt, true);
++      FOR_EACH_SSA_TREE_OPERAND (def, op_stmt, iter, SSA_OP_DEF)
++	{
++	  live_track_process_def (live, def, conflict_graph);
++	}
++      FOR_EACH_SSA_TREE_OPERAND (use, op_stmt, iter, SSA_OP_USE)
++	{
++	  stack.safe_push (use);
++	  live_track_process_use (live, use);
++	}
++    }
++
++  process_add_graph (live, bb, conflict_graph);
++  delete_live_track (live);
++  remove_high_cost_graph_for_ccmp (conflict_graph);
++}
++
++/* Determine whether the ccmp conflict graph can be added.
++   i.e,
++
++   ;;   basic block 3, loop depth 1
++   ;;    pred:		2
++   ;;		     	3
++   # ivtmp.5_10 = PHI <ivtmp.5_12 (2), ivtmp.5_11 (3)>
++   _7 = b_4 (D) >= c_5 (D);
++   _8 = ivtmp.5_10 == 0;
++   _9 = _7 | _8;
++   ivtmp.5_11 = ivtmp.5_10 - 1;
++   if (_9 != 0)
++     goto <bb 4>; [10.70%]
++   else
++     goto <bb 3>; [89.30%]
++
++   In the above loop, the expression will be replaced:
++
++   _7 replaced by b_4 (D) >= c_5 (D)
++   _8 replaced by ivtmp.5_10 == 0
++
++   If the current case want use the ccmp instruction, then
++
++   _9 can replaced by _7 | _8
++
++   So this requires that ivtmp.5_11 and ivtmp.5_10 be divided into different
++   partitions.
++
++   Now this function can achieve this ability.  */
++
++static void
++determine_add_ccmp_conflict_graph (basic_block bb, tree_live_info_p liveinfo,
++				   var_map map, ssa_conflicts *graph)
++{
++  if (!flag_ccmp2 || !targetm.gen_ccmp_first || !check_ccmp_candidate (bb))
++    return;
++  for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
++       gsi_next (&bsi))
++    {
++      gimple_set_visited (gsi_stmt (bsi), false);
++    }
++  ssa_conflicts *ccmp_conflict_graph;
++  ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
++  add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
++  unsigned x;
++  bitmap b;
++  if (ccmp_conflict_graph)
++    {
++      FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
++	{
++	  if (!b)
++	    continue;
++	  unsigned y = bitmap_first_set_bit (b);
++	  if (!graph->conflicts[x] || !bitmap_bit_p (graph->conflicts[x], y))
++	    {
++	      ssa_conflicts_add (graph, x, y);
++	      if (dump_file && (dump_flags & TDF_DETAILS))
++		{
++		  fprintf (dump_file, "potential ccmp: add additional "
++				      "conflict-ssa : bb[%d]  %d:%d\n",
++			   bb->index, x, y);
++		}
++	    }
++	}
++    }
++  ssa_conflicts_delete (ccmp_conflict_graph);
++}
+ 
+ /* Build a conflict graph based on LIVEINFO.  Any partitions which are in the
+    partition view of the var_map liveinfo is based on get entries in the
+@@ -938,6 +1133,8 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
+ 	    live_track_process_use (live, var);
+ 	}
+ 
++	determine_add_ccmp_conflict_graph (bb, liveinfo, map, graph);
++
+       /* If result of a PHI is unused, looping over the statements will not
+ 	 record any conflicts since the def was never live.  Since the PHI node
+ 	 is going to be translated out of SSA form, it will insert a copy.
+-- 
+2.27.0.windows.1
+
diff --git a/0036-StructReorg-Refactoring-reorder-fields-to-struct-lay.patch b/0036-StructReorg-Refactoring-reorder-fields-to-struct-lay.patch
new file mode 100644
index 0000000000000000000000000000000000000000..41d01f763824d13eaff6260543a19fc59b7fe6af
--- /dev/null
+++ b/0036-StructReorg-Refactoring-reorder-fields-to-struct-lay.patch
@@ -0,0 +1,1115 @@
+From 3c06a2cda7220a48866ae2dbe3f365e300cbaeca Mon Sep 17 00:00:00 2001
+From: liyancheng <412998149@qq.com>
+Date: Wed, 1 Jun 2022 17:22:12 +0800
+Subject: [PATCH 02/12] [StructReorg] Refactoring reorder fields to struct
+ layout optimization
+
+Refactor the reorder_fields optimization into struct layout optimization. Add
+flag -fipa-struct-reorg=[0,1,2] to enable none, strcut reorg, reorder fields
+optimizations.
+---
+ gcc/common.opt                                |   6 +-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.c       | 167 +++++++++---------
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |   2 +-
+ gcc/opts.c                                    |  12 ++
+ gcc/passes.def                                |   2 +-
+ gcc/symbol-summary.h                          |   4 +-
+ .../struct/rf_DTE_struct_instance_field.c     |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c   |   2 +-
+ .../gcc.dg/struct/rf_check_ptr_layers_bug.c   |   2 +-
+ .../gcc.dg/struct/rf_create_fields_bug.c      |   2 +-
+ .../gcc.dg/struct/rf_create_new_func_bug.c    |   2 +-
+ .../gcc.dg/struct/rf_ele_minus_verify.c       |   2 +-
+ .../gcc.dg/struct/rf_escape_by_base.c         |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c |   2 +-
+ .../gcc.dg/struct/rf_mem_ref_offset.c         |   2 +-
+ .../struct/rf_mul_layer_ptr_record_bug.c      |   2 +-
+ .../gcc.dg/struct/rf_pass_conflict.c          |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c     |   2 +-
+ .../gcc.dg/struct/rf_ptr_negate_expr.c        |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c   |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c      |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c  |   2 +-
+ .../gcc.dg/struct/rf_rescusive_type.c         |   2 +-
+ .../struct/rf_rewrite_assign_more_cmp.c       |   2 +-
+ .../gcc.dg/struct/rf_rewrite_cond_bug.c       |   2 +-
+ .../gcc.dg/struct/rf_rewrite_cond_more_cmp.c  |   2 +-
+ .../gcc.dg/struct/rf_rewrite_phi_bug.c        |   2 +-
+ gcc/testsuite/gcc.dg/struct/rf_visible_func.c |   2 +-
+ .../gcc.dg/struct/rf_void_ptr_param_func.c    |   2 +-
+ .../gcc.dg/struct/sr_pointer_minus.c          |   2 +-
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |  19 ++
+ gcc/timevar.def                               |   2 +-
+ gcc/tree-pass.h                               |   2 +-
+ gcc/tree.c                                    |   4 +-
+ 35 files changed, 153 insertions(+), 117 deletions(-)
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 4dd566def..7fc075d35 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1876,13 +1876,17 @@ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+ 
+ fipa-reorder-fields
+-Common Report Var(flag_ipa_reorder_fields) Init(0) Optimization
++Common Report Var(flag_ipa_struct_layout) Init(0) Optimization
+ Perform structure fields reorder optimizations.
+ 
+ fipa-struct-reorg
+ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+ 
++fipa-struct-reorg=
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 2)
++-fipa-struct-reorg=[0,1,2] adding none, struct-reorg, reorder-fields optimizations.
++
+ fipa-extend-auto-profile
+ Common Report Var(flag_ipa_extend_auto_profile)
+ Use sample profile information for source code.
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+index 2bf41e0d8..9214ee74a 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+@@ -235,7 +235,7 @@ enum srmode
+ {
+   NORMAL = 0,
+   COMPLETE_STRUCT_RELAYOUT,
+-  STRUCT_REORDER_FIELDS
++  STRUCT_LAYOUT_OPTIMIZE
+ };
+ 
+ static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
+@@ -552,7 +552,7 @@ void
+ srtype::simple_dump (FILE *f)
+ {
+   print_generic_expr (f, type);
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       fprintf (f, "(%d)", TYPE_UID (type));
+     }
+@@ -593,9 +593,9 @@ srfield::create_new_fields (tree newtype[max_split],
+ 			    tree newfields[max_split],
+ 			    tree newlast[max_split])
+ {
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+-      create_new_reorder_fields (newtype, newfields, newlast);
++      create_new_optimized_fields (newtype, newfields, newlast);
+       return;
+     }
+ 
+@@ -689,15 +689,15 @@ srfield::reorder_fields (tree newfields[max_split], tree newlast[max_split],
+     }
+ }
+ 
+-/* Create the new reorder fields for this field.
++/* Create the new optimized fields for this field.
+    newtype[max_split]: srtype's member variable,
+    newfields[max_split]: created by create_new_type func,
+    newlast[max_split]: created by create_new_type func.  */
+ 
+ void
+-srfield::create_new_reorder_fields (tree newtype[max_split],
+-				    tree newfields[max_split],
+-				    tree newlast[max_split])
++srfield::create_new_optimized_fields (tree newtype[max_split],
++				      tree newfields[max_split],
++				      tree newlast[max_split])
+ {
+   /* newtype, corresponding to newtype[max_split] in srtype.  */
+   tree nt = NULL_TREE;
+@@ -794,7 +794,7 @@ srtype::create_new_type (void)
+      we are not splitting the struct into two clusters,
+      then just return false and don't change the type. */
+   if (!createnewtype && maxclusters == 0
+-      && current_mode != STRUCT_REORDER_FIELDS)
++      && current_mode != STRUCT_LAYOUT_OPTIMIZE)
+     {
+       newtype[0] = type;
+       return false;
+@@ -822,8 +822,8 @@ srtype::create_new_type (void)
+       sprintf(id, "%d", i);
+       if (tname) 
+ 	{
+-	  name = concat (tname, current_mode == STRUCT_REORDER_FIELDS
+-			 ? ".reorder." : ".reorg.", id, NULL);
++	  name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE
++			 ? ".slo." : ".reorg.", id, NULL);
+ 	  TYPE_NAME (newtype[i]) = build_decl (UNKNOWN_LOCATION, TYPE_DECL,
+ 				   get_identifier (name), newtype[i]);
+           free (name);
+@@ -969,8 +969,8 @@ srfunction::create_new_decls (void)
+ 	      sprintf(id, "%d", j);
+ 	      if (tname)
+ 	        {
+-		  name = concat (tname, current_mode == STRUCT_REORDER_FIELDS
+-					? ".reorder." : ".reorg.", id, NULL);
++		  name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE
++					? ".slo." : ".reorg.", id, NULL);
+ 		  new_name = get_identifier (name);
+ 		  free (name);
+ 		}
+@@ -2718,7 +2718,7 @@ escape_type escape_type_volatile_array_or_ptrptr (tree type)
+     return escape_volatile;
+   if (isarraytype (type))
+     return escape_array;
+-  if (isptrptr (type) && (current_mode != STRUCT_REORDER_FIELDS))
++  if (isptrptr (type) && (current_mode != STRUCT_LAYOUT_OPTIMIZE))
+     return escape_ptr_ptr;
+   return does_not_escape;
+ }
+@@ -2740,13 +2740,13 @@ ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype)
+       field_srtype->add_field_site (field_srfield);
+     }
+   if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT
+-      && current_mode != STRUCT_REORDER_FIELDS)
++      && current_mode != STRUCT_LAYOUT_OPTIMIZE)
+     {
+       base_srtype->mark_escape (escape_rescusive_type, NULL);
+     }
+   /* Types of non-pointer field are difficult to track the correctness
+      of the rewrite when it used by the escaped type.  */
+-  if (current_mode == STRUCT_REORDER_FIELDS
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+       && TREE_CODE (field_type) == RECORD_TYPE)
+     {
+       field_srtype->mark_escape (escape_instance_field, NULL);
+@@ -2781,7 +2781,7 @@ ipa_struct_reorg::record_struct_field_types (tree base_type,
+ 	  }
+ 	/* Types of non-pointer field are difficult to track the correctness
+ 	   of the rewrite when it used by the escaped type.  */
+-	if (current_mode == STRUCT_REORDER_FIELDS
++	if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	    && TREE_CODE (field_type) == RECORD_TYPE)
+ 	  {
+ 	    base_srtype->mark_escape (escape_instance_field, NULL);
+@@ -2966,7 +2966,7 @@ ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg)
+       /* Separate instance is hard to trace in complete struct
+ 	 relayout optimization.  */
+       if ((current_mode == COMPLETE_STRUCT_RELAYOUT
+-	   || current_mode == STRUCT_REORDER_FIELDS)
++	   || current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	  && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE)
+ 	{
+ 	  e = escape_separate_instance;
+@@ -3071,7 +3071,7 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+ 	  /* Add a safe func mechanism.  */
+ 	  bool l_find = true;
+ 	  bool r_find = true;
+-	  if (current_mode == STRUCT_REORDER_FIELDS)
++	  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	    {
+ 	      l_find = !(current_function->is_safe_func
+ 			 && TREE_CODE (lhs) == SSA_NAME
+@@ -3117,7 +3117,7 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+ 		}
+ 	    }
+ 	}
+-      else if ((current_mode == STRUCT_REORDER_FIELDS)
++      else if ((current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	       && (gimple_assign_rhs_code (stmt) == LE_EXPR
+ 		   || gimple_assign_rhs_code (stmt) == LT_EXPR
+ 		   || gimple_assign_rhs_code (stmt) == GE_EXPR
+@@ -3128,7 +3128,7 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+ 	  find_var (gimple_assign_rhs2 (stmt), stmt);
+ 	}
+       /* find void ssa_name from stmt such as: _2 = _1 - old_arcs_1.  */
+-      else if ((current_mode == STRUCT_REORDER_FIELDS)
++      else if ((current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	       && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR
+ 	       && types_compatible_p (
+ 		  TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))),
+@@ -3391,11 +3391,12 @@ is_result_of_mult (tree arg, tree *num, tree struct_size)
+ 	  arg = gimple_assign_rhs1 (size_def_stmt);
+ 	  size_def_stmt = SSA_NAME_DEF_STMT (arg);
+ 	}
+-      else if (rhs_code == NEGATE_EXPR && current_mode == STRUCT_REORDER_FIELDS)
++      else if (rhs_code == NEGATE_EXPR
++	       && current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  return trace_calculate_negate (size_def_stmt, num, struct_size);
+ 	}
+-      else if (rhs_code == NOP_EXPR && current_mode == STRUCT_REORDER_FIELDS)
++      else if (rhs_code == NOP_EXPR && current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  return trace_calculate_diff (size_def_stmt, num);
+ 	}
+@@ -3415,7 +3416,7 @@ is_result_of_mult (tree arg, tree *num, tree struct_size)
+ bool
+ ipa_struct_reorg::handled_allocation_stmt (gimple *stmt)
+ {
+-  if ((current_mode == STRUCT_REORDER_FIELDS)
++  if ((current_mode == STRUCT_LAYOUT_OPTIMIZE)
+       && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
+ 	  || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
+ 	  || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)))
+@@ -3548,7 +3549,7 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple
+   /* x_1 = y.x_nodes; void *x;
+      Directly mark the structure pointer type assigned
+      to the void* variable as escape.  */
+-  else if (current_mode == STRUCT_REORDER_FIELDS
++  else if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	   && TREE_CODE (side) == SSA_NAME
+ 	   && VOID_POINTER_P (TREE_TYPE (side))
+ 	   && SSA_NAME_VAR (side)
+@@ -3815,7 +3816,7 @@ ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
+ 	 and doesn't mark escape follow.). */
+       /* _1 = MEM[(struct arc_t * *)a_1].
+ 	 then base a_1: ssa_name  - pointer_type - integer_type.  */
+-      if (current_mode == STRUCT_REORDER_FIELDS)
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base))
+ 			    && (TREE_CODE (inner_type (TREE_TYPE (base)))
+@@ -4031,7 +4032,7 @@ ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
+ 	  /* callee_func (_1, _2);
+ 	     Check the callee func, instead of current func.  */
+ 	  if (!(free_or_realloc
+-		|| (current_mode == STRUCT_REORDER_FIELDS
++		|| (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 		    && safe_functions.contains (
+ 		       node->get_edge (stmt)->callee)))
+ 	      && VOID_POINTER_P (argtypet))
+@@ -4063,9 +4064,9 @@ ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt)
+ 		       realpart, imagpart, address, escape_from_base))
+     return;
+ 
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+-      if (!opt_for_fn (current_function_decl, flag_ipa_reorder_fields))
++      if (!opt_for_fn (current_function_decl, flag_ipa_struct_layout))
+ 	{
+ 	  type->mark_escape (escape_non_optimize, stmt);
+ 	}
+@@ -4287,7 +4288,7 @@ ipa_struct_reorg::check_definition_call (srdecl *decl, vec<srdecl*> &worklist)
+       check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt);
+     }
+ 
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       if (!handled_allocation_stmt (stmt))
+ 	{
+@@ -4341,7 +4342,7 @@ ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl*> &worklist)
+ 	}
+       return;
+     }
+-  if (current_mode == STRUCT_REORDER_FIELDS && SSA_NAME_VAR (ssa_name)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE && SSA_NAME_VAR (ssa_name)
+       && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name))))
+     {
+       type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name));
+@@ -4442,7 +4443,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
+       if (!get_type_field (other, base, indirect, type1, field,
+ 			   realpart, imagpart, address, escape_from_base))
+ 	{
+-	  if (current_mode == STRUCT_REORDER_FIELDS)
++	  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	    {
+ 	      /* release INTEGER_TYPE cast to struct pointer.  */
+ 	      bool cast_from_int_ptr = current_function->is_safe_func && base
+@@ -4498,7 +4499,7 @@ get_base (tree &base, tree expr)
+ void
+ ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt)
+ {
+-  if (current_mode != STRUCT_REORDER_FIELDS || current_function->is_safe_func
++  if (current_mode != STRUCT_LAYOUT_OPTIMIZE || current_function->is_safe_func
+       || !(POINTER_TYPE_P (TREE_TYPE (a_expr)))
+       || !(POINTER_TYPE_P (TREE_TYPE (b_expr)))
+       || !handled_type (TREE_TYPE (a_expr))
+@@ -4579,7 +4580,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec<srdecl*> &worklist)
+ 	       && (code != EQ_EXPR && code != NE_EXPR
+ 		   && code != LT_EXPR && code != LE_EXPR
+ 		   && code != GT_EXPR && code != GE_EXPR))
+-	   || (current_mode == STRUCT_REORDER_FIELDS
++	   || (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	       && (code != EQ_EXPR && code != NE_EXPR
+ 		   && code != LT_EXPR && code != LE_EXPR
+ 		   && code != GT_EXPR && code != GE_EXPR)))
+@@ -4618,7 +4619,7 @@ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec<srdecl*> &worklist)
+ 	       && (code != EQ_EXPR && code != NE_EXPR
+ 		   && code != LT_EXPR && code != LE_EXPR
+ 		   && code != GT_EXPR && code != GE_EXPR))
+-	   || (current_mode == STRUCT_REORDER_FIELDS
++	   || (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	       && (code != EQ_EXPR && code != NE_EXPR
+ 		   && code != LT_EXPR && code != LE_EXPR
+ 		  && code != GT_EXPR && code != GE_EXPR)))
+@@ -4740,11 +4741,11 @@ ipa_struct_reorg::record_function (cgraph_node *node)
+     escapes = escape_marked_as_used;
+   else if (!node->local)
+     {
+-      if (current_mode != STRUCT_REORDER_FIELDS)
++      if (current_mode != STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  escapes = escape_visible_function;
+ 	}
+-      if (current_mode == STRUCT_REORDER_FIELDS && node->externally_visible)
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE && node->externally_visible)
+ 	{
+ 	  escapes = escape_visible_function;
+ 	}
+@@ -4754,9 +4755,9 @@ ipa_struct_reorg::record_function (cgraph_node *node)
+   else if (!tree_versionable_function_p (node->decl))
+     escapes = escape_noclonable_function;
+ 
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+-      if (!opt_for_fn (node->decl, flag_ipa_reorder_fields))
++      if (!opt_for_fn (node->decl, flag_ipa_struct_layout))
+ 	{
+ 	  escapes = escape_non_optimize;
+ 	}
+@@ -4773,7 +4774,7 @@ ipa_struct_reorg::record_function (cgraph_node *node)
+   gimple_stmt_iterator si;
+ 
+   /* Add a safe func mechanism.  */
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       current_function->is_safe_func = safe_functions.contains (node);
+       if (dump_file)
+@@ -4989,7 +4990,7 @@ ipa_struct_reorg::record_accesses (void)
+     }
+ 
+   /* Add a safe func mechanism.  */
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       record_safe_func_with_void_ptr_parm ();
+     }
+@@ -5188,7 +5189,7 @@ void
+ ipa_struct_reorg::prune_escaped_types (void)
+ {
+   if (current_mode != COMPLETE_STRUCT_RELAYOUT
+-      && current_mode != STRUCT_REORDER_FIELDS)
++      && current_mode != STRUCT_LAYOUT_OPTIMIZE)
+     {
+       /* Detect recusive types and mark them as escaping.  */
+       detect_cycles ();
+@@ -5196,7 +5197,7 @@ ipa_struct_reorg::prune_escaped_types (void)
+ 	 mark them as escaping.  */
+       propagate_escape ();
+     }
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       propagate_escape_via_original ();
+       propagate_escape_via_empty_with_no_original ();
+@@ -5256,7 +5257,7 @@ ipa_struct_reorg::prune_escaped_types (void)
+       if (function->args.is_empty ()
+ 	  && function->decls.is_empty ()
+ 	  && function->globals.is_empty ()
+-	  && current_mode != STRUCT_REORDER_FIELDS)
++	  && current_mode != STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  delete function;
+ 	  functions.ordered_remove (i);
+@@ -5281,10 +5282,10 @@ ipa_struct_reorg::prune_escaped_types (void)
+ 
+   /* Prune types that escape, all references to those types
+      will have been removed in the above loops.  */
+-  /* The escape type is not deleted in STRUCT_REORDER_FIELDS,
++  /* The escape type is not deleted in STRUCT_LAYOUT_OPTIMIZE,
+      Then the type that contains the escaped type fields
+      can find complete information.  */
+-  if (current_mode != STRUCT_REORDER_FIELDS)
++  if (current_mode != STRUCT_LAYOUT_OPTIMIZE)
+     {
+       for (unsigned i = 0; i < types.length ();)
+ 	{
+@@ -5334,7 +5335,7 @@ ipa_struct_reorg::create_new_types (void)
+   for (unsigned i = 0; i < types.length (); i++)
+     newtypes += types[i]->create_new_type ();
+ 
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       for (unsigned i = 0; i < types.length (); i++)
+ 	{
+@@ -5458,8 +5459,8 @@ ipa_struct_reorg::create_new_args (cgraph_node *new_node)
+   char *name = NULL;
+   if (tname)
+     {
+-      name = concat (tname, current_mode == STRUCT_REORDER_FIELDS
+-			    ? ".reorder.0" : ".reorg.0", NULL);
++      name = concat (tname, current_mode == STRUCT_LAYOUT_OPTIMIZE
++			    ? ".slo.0" : ".reorg.0", NULL);
+       new_name = get_identifier (name);
+       free (name);
+     }
+@@ -5547,8 +5548,8 @@ ipa_struct_reorg::create_new_functions (void)
+       statistics_counter_event (NULL, "Create new function", 1);
+       new_node = node->create_version_clone_with_body (
+ 				vNULL, NULL, NULL, NULL, NULL,
+-				current_mode == STRUCT_REORDER_FIELDS
+-				? "struct_reorder" : "struct_reorg");
++				current_mode == STRUCT_LAYOUT_OPTIMIZE
++				? "slo" : "struct_reorg");
+       new_node->can_change_signature = node->can_change_signature;
+       new_node->make_local ();
+       f->newnode = new_node;
+@@ -5666,13 +5667,13 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_
+         newbase1 = build_fold_addr_expr (newbase1);
+       if (indirect)
+ 	{
+-	  if (current_mode == STRUCT_REORDER_FIELDS)
++	  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	    {
+ 	      /* Supports the MEM_REF offset.
+ 		 _1 = MEM[(struct arc *)ap_1 + 72B].flow;
+-		 Old rewrite: _1 = ap.reorder.0_8->flow;
++		 Old rewrite: _1 = ap.slo.0_8->flow;
+ 		 New rewrite: _1
+-		  = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow;
++		  = MEM[(struct arc.slo.0 *)ap.slo.0_8 + 64B].flow;
+ 	      */
+ 	      HOST_WIDE_INT offset_tmp = 0;
+ 	      HOST_WIDE_INT mem_offset = 0;
+@@ -5738,10 +5739,10 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+       return remove;
+     }
+ 
+-  if ((current_mode != STRUCT_REORDER_FIELDS
++  if ((current_mode != STRUCT_LAYOUT_OPTIMIZE
+        && (gimple_assign_rhs_code (stmt) == EQ_EXPR
+ 	   || gimple_assign_rhs_code (stmt) == NE_EXPR))
+-      || (current_mode == STRUCT_REORDER_FIELDS
++      || (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	  && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+ 	      == tcc_comparison)))
+     {
+@@ -5751,7 +5752,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+       tree newrhs2[max_split];
+       tree_code rhs_code = gimple_assign_rhs_code (stmt);
+       tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR;
+-      if (current_mode == STRUCT_REORDER_FIELDS
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	  && rhs_code != EQ_EXPR && rhs_code != NE_EXPR)
+ 	{
+ 	  code = rhs_code;
+@@ -5798,8 +5799,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+ 	 _6 = _4 + _5;
+ 	 _5 = (long unsigned int) _3;
+ 	 _3 = _1 - old_2.  */
+-      if (current_mode != STRUCT_REORDER_FIELDS
+-	  || (current_mode == STRUCT_REORDER_FIELDS && (num != NULL)))
++      if (current_mode != STRUCT_LAYOUT_OPTIMIZE
++	  || (current_mode == STRUCT_LAYOUT_OPTIMIZE && (num != NULL)))
+ 	{
+ 	  num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num);
+ 	}
+@@ -5827,7 +5828,7 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+     }
+ 
+   /* Support POINTER_DIFF_EXPR rewriting.  */
+-  if (current_mode == STRUCT_REORDER_FIELDS
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE
+       && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR)
+     {
+       tree rhs1 = gimple_assign_rhs1 (stmt);
+@@ -6014,7 +6015,7 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi)
+   srfunction *f = find_function (node);
+ 
+   /* Add a safe func mechanism.  */
+-  if (current_mode == STRUCT_REORDER_FIELDS && f && f->is_safe_func)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE && f && f->is_safe_func)
+     {
+       tree expr = gimple_call_arg (stmt, 0);
+       tree newexpr[max_split];
+@@ -6141,9 +6142,9 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
+   tree_code rhs_code = gimple_cond_code (stmt);
+ 
+   /* Handle only equals or not equals conditionals. */
+-  if ((current_mode != STRUCT_REORDER_FIELDS
++  if ((current_mode != STRUCT_LAYOUT_OPTIMIZE
+        && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR))
+-      || (current_mode == STRUCT_REORDER_FIELDS
++      || (current_mode == STRUCT_LAYOUT_OPTIMIZE
+ 	  && TREE_CODE_CLASS (rhs_code) != tcc_comparison))
+     return false;
+   tree lhs = gimple_cond_lhs (stmt);
+@@ -6171,10 +6172,10 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
+     }
+ 
+   /*  Old rewrite: if (x_1 != 0B)
+-		-> _1 = x.reorder.0_1 != 0B; if (_1 != 1)
++		-> _1 = x.slo.0_1 != 0B; if (_1 != 1)
+ 		   The logic is incorrect.
+       New rewrite: if (x_1 != 0B)
+-		-> if (x.reorder.0_1 != 0B)；*/
++		-> if (x.slo.0_1 != 0B)；*/
+   for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
+     {
+       if (newlhs[i])
+@@ -6203,7 +6204,7 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
+ bool
+ ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
+ {
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       /* Delete debug gimple now.  */
+       return true;
+@@ -6367,7 +6368,7 @@ ipa_struct_reorg::rewrite_functions (void)
+      then don't rewrite any accesses. */
+   if (!create_new_types ())
+     {
+-      if (current_mode == STRUCT_REORDER_FIELDS)
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  for (unsigned i = 0; i < functions.length (); i++)
+ 	    {
+@@ -6386,7 +6387,7 @@ ipa_struct_reorg::rewrite_functions (void)
+       return 0;
+     }
+ 
+-  if (current_mode == STRUCT_REORDER_FIELDS && dump_file)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE && dump_file)
+     {
+       fprintf (dump_file, "=========== all created newtypes: ===========\n\n");
+       dump_newtypes (dump_file);
+@@ -6396,13 +6397,13 @@ ipa_struct_reorg::rewrite_functions (void)
+     {
+       retval = TODO_remove_functions;
+       create_new_functions ();
+-      if (current_mode == STRUCT_REORDER_FIELDS)
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+ 	{
+ 	  prune_escaped_types ();
+ 	}
+     }
+ 
+-  if (current_mode == STRUCT_REORDER_FIELDS)
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       for (unsigned i = 0; i < functions.length (); i++)
+ 	{
+@@ -6572,7 +6573,7 @@ ipa_struct_reorg::execute (enum srmode mode)
+ {
+   unsigned int ret = 0;
+ 
+-  if (mode == NORMAL || mode == STRUCT_REORDER_FIELDS)
++  if (mode == NORMAL || mode == STRUCT_LAYOUT_OPTIMIZE)
+     {
+       current_mode = mode;
+       /* If there is a top-level inline-asm,
+@@ -6660,12 +6661,12 @@ pass_ipa_struct_reorg::gate (function *)
+ 	  && (in_lto_p || flag_whole_program));
+ }
+ 
+-const pass_data pass_data_ipa_reorder_fields =
++const pass_data pass_data_ipa_struct_layout =
+ {
+   SIMPLE_IPA_PASS, // type
+-  "reorder_fields", // name
++  "struct_layout", // name
+   OPTGROUP_NONE, // optinfo_flags
+-  TV_IPA_REORDER_FIELDS, // tv_id
++  TV_IPA_STRUCT_LAYOUT, // tv_id
+   0, // properties_required
+   0, // properties_provided
+   0, // properties_destroyed
+@@ -6673,11 +6674,11 @@ const pass_data pass_data_ipa_reorder_fields =
+   0, // todo_flags_finish
+ };
+ 
+-class pass_ipa_reorder_fields : public simple_ipa_opt_pass
++class pass_ipa_struct_layout : public simple_ipa_opt_pass
+ {
+ public:
+-  pass_ipa_reorder_fields (gcc::context *ctxt)
+-    : simple_ipa_opt_pass (pass_data_ipa_reorder_fields, ctxt)
++  pass_ipa_struct_layout (gcc::context *ctxt)
++    : simple_ipa_opt_pass (pass_data_ipa_struct_layout, ctxt)
+   {}
+ 
+   /* opt_pass methods: */
+@@ -6685,17 +6686,17 @@ public:
+   virtual unsigned int execute (function *)
+   {
+     unsigned int ret = 0;
+-    ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS);
++    ret = ipa_struct_reorg ().execute (STRUCT_LAYOUT_OPTIMIZE);
+     return ret;
+   }
+ 
+-}; // class pass_ipa_reorder_fields
++}; // class pass_ipa_struct_layout
+ 
+ bool
+-pass_ipa_reorder_fields::gate (function *)
++pass_ipa_struct_layout::gate (function *)
+ {
+   return (optimize >= 3
+-	  && flag_ipa_reorder_fields
++	  && flag_ipa_struct_layout
+ 	  /* Don't bother doing anything if the program has errors.  */
+ 	  && !seen_error ()
+ 	  && flag_lto_partition == LTO_PARTITION_ONE
+@@ -6715,7 +6716,7 @@ make_pass_ipa_struct_reorg (gcc::context *ctxt)
+ }
+ 
+ simple_ipa_opt_pass *
+-make_pass_ipa_reorder_fields (gcc::context *ctxt)
++make_pass_ipa_struct_layout (gcc::context *ctxt)
+ {
+-  return new pass_ipa_reorder_fields (ctxt);
++  return new pass_ipa_struct_layout (ctxt);
+ }
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+index 8fb6ce9c4..54b0dc655 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+@@ -187,7 +187,7 @@ struct srfield
+ 			  tree newlast[max_split]);
+   void reorder_fields (tree newfields[max_split], tree newlast[max_split],
+ 		       tree &field);
+-  void create_new_reorder_fields (tree newtype[max_split],
++  void create_new_optimized_fields (tree newtype[max_split],
+ 				  tree newfields[max_split],
+ 				  tree newlast[max_split]);
+ };
+diff --git a/gcc/opts.c b/gcc/opts.c
+index 479d726df..c3877c24e 100644
+--- a/gcc/opts.c
++++ b/gcc/opts.c
+@@ -2695,6 +2695,18 @@ common_handle_option (struct gcc_options *opts,
+ 	}
+       break;
+ 
++    case OPT_fipa_struct_reorg_:
++      opts->x_struct_layout_optimize_level = value;
++      if (value > 1)
++	{
++	  SET_OPTION_IF_UNSET (opts, opts_set, flag_ipa_struct_layout, value);
++	}
++      /* No break here - do -fipa-struct-reorg processing.  */
++      /* FALLTHRU.  */
++    case OPT_fipa_struct_reorg:
++      opts->x_flag_ipa_struct_reorg = value;
++      break;
++
+     case OPT_fprofile_generate_:
+       opts->x_profile_data_prefix = xstrdup (arg);
+       value = true;
+diff --git a/gcc/passes.def b/gcc/passes.def
+index e9c91d26e..eea4d7808 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -174,7 +174,7 @@ along with GCC; see the file COPYING3.  If not see
+   INSERT_PASSES_AFTER (all_late_ipa_passes)
+   NEXT_PASS (pass_materialize_all_clones);
+   NEXT_PASS (pass_ipa_pta);
+-  NEXT_PASS (pass_ipa_reorder_fields);
++  NEXT_PASS (pass_ipa_struct_layout);
+   /* FIXME: this should a normal IP pass */
+   NEXT_PASS (pass_ipa_struct_reorg);
+   NEXT_PASS (pass_omp_simd_clone);
+diff --git a/gcc/symbol-summary.h b/gcc/symbol-summary.h
+index ddf5e3577..f62222a96 100644
+--- a/gcc/symbol-summary.h
++++ b/gcc/symbol-summary.h
+@@ -61,7 +61,7 @@ protected:
+   {
+     /* In structure optimizatons, we call new to ensure that
+        the allocated memory is initialized to 0.  */
+-    if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
++    if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
+       return is_ggc () ? new (ggc_internal_alloc (sizeof (T))) T ()
+ 		       : new T ();
+     /* Call gcc_internal_because we do not want to call finalizer for
+@@ -77,7 +77,7 @@ protected:
+       ggc_delete (item);
+     else
+       {
+-	if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
++	if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
+ 	  delete item;
+ 	else
+ 	  m_allocator.remove (item);
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+index b95be2dab..882a695b0 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+@@ -72,4 +72,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+index 3d243313b..20ecee545 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+@@ -91,4 +91,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
+index a5477dcc9..ad879fc11 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_check_ptr_layers_bug.c
+@@ -21,4 +21,4 @@ main()
+ { 
+   g();
+ }
+-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+index 886706ae9..f0c9d8f39 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+@@ -79,4 +79,4 @@ main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+index f3785f392..fa5e6c2d0 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+@@ -53,4 +53,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+index 1415d759a..2966869e7 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+@@ -57,4 +57,4 @@ main ()
+   return 0; 
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+index 003da0b57..b74b9e5e9 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+@@ -80,4 +80,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+index 10dcf098c..cf85c6109 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+@@ -69,4 +69,4 @@ main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+index 8d1a9a114..61fd9f755 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+@@ -55,4 +55,4 @@ main ()
+   return 0; 
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
+index 23765fc56..2c115da02 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_mul_layer_ptr_record_bug.c
+@@ -27,4 +27,4 @@ main() {
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+index 54e737ee8..c7646d8b7 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+@@ -106,4 +106,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+index 2ae46fb31..01c000375 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+@@ -84,4 +84,4 @@ main ()
+   return cnt;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+index 3a3c10b70..f962163fe 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+@@ -68,4 +68,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+index 7b7d110df..6558b1797 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+@@ -52,4 +52,4 @@ main ()
+   return 0; 
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
+index 317aafa5f..6d528ed5b 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_offset.c
+@@ -31,4 +31,4 @@ main ()
+         printf ("   Tree.\n");
+ }
+ 
+-/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+index 01a33f669..e95cf2e5d 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+@@ -52,4 +52,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+index a38556533..cb4054522 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+@@ -55,4 +55,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+index 5c17ee528..38bddbae5 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+@@ -54,4 +54,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+index 710517ee9..86034f042 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+@@ -62,4 +62,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+index 6ed0a5d2d..aae7c4bc9 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+@@ -69,4 +69,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+index 5a2dd964f..8672e7552 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+@@ -55,4 +55,4 @@ main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+index faa90b42d..2d67434a0 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+@@ -78,4 +78,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+index 8f2da99cc..a8cf2b63c 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+@@ -89,4 +89,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+index 723142c59..b6cba3c34 100644
+--- a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+@@ -51,4 +51,4 @@ main()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_layout" } } */
+\ No newline at end of file
+diff --git a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+index 9a82da0d6..a0614a1ba 100644
+--- a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+@@ -30,4 +30,4 @@ main ()
+   return 0;
+ }
+ 
+-/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */
++/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+index c8db4675f..67b3ac2d5 100644
+--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+@@ -47,6 +47,25 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout
+ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
+ 	"" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program"
+ 
++# -fipa-struct-reorg=1
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
++ 	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
++	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=2
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
++ 	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
+ # All done.
+ torture-finish
+ dg-finish
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index e873747a8..b179f62bb 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -80,7 +80,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP     , "ipa cp")
+ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining heuristics")
+ DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS	     , "ipa comdats")
+-DEFTIMEVAR (TV_IPA_REORDER_FIELDS    , "ipa struct reorder fields optimization")
++DEFTIMEVAR (TV_IPA_STRUCT_LAYOUT	     , "ipa struct layout optimization")
+ DEFTIMEVAR (TV_IPA_STRUCT_REORG	     , "ipa struct reorg optimization")
+ DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
+ DEFTIMEVAR (TV_IPA_OPT		     , "ipa various optimizations")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index be6387768..187f1a85c 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -509,7 +509,7 @@ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
+-extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
++extern simple_ipa_opt_pass *make_pass_ipa_struct_layout (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
+ 							       *ctxt);
+diff --git a/gcc/tree.c b/gcc/tree.c
+index 89fa469c3..c2075d735 100644
+--- a/gcc/tree.c
++++ b/gcc/tree.c
+@@ -5219,7 +5219,7 @@ fld_simplified_type_name (tree type)
+   /* Simplify type will cause that struct A and struct A within
+      struct B are different type pointers, so skip it in structure
+      optimizations.  */
+-  if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
++  if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
+     return TYPE_NAME (type);
+ 
+   if (!TYPE_NAME (type) || TREE_CODE (TYPE_NAME (type)) != TYPE_DECL)
+@@ -5463,7 +5463,7 @@ fld_simplified_type (tree t, class free_lang_data_d *fld)
+   /* Simplify type will cause that struct A and struct A within
+      struct B are different type pointers, so skip it in structure
+      optimizations.  */
+-  if (flag_ipa_reorder_fields || flag_ipa_struct_reorg)
++  if (flag_ipa_struct_layout || flag_ipa_struct_reorg)
+     return t;
+   if (POINTER_TYPE_P (t))
+     return fld_incomplete_type_of (t, fld);
+-- 
+2.27.0.windows.1
+
diff --git a/0037-Backport-loop-invariant-Don-t-move-cold-bb-instructi.patch b/0037-Backport-loop-invariant-Don-t-move-cold-bb-instructi.patch
new file mode 100644
index 0000000000000000000000000000000000000000..b30030e29b9cd3e9f2b7eeada2b55c01204eb832
--- /dev/null
+++ b/0037-Backport-loop-invariant-Don-t-move-cold-bb-instructi.patch
@@ -0,0 +1,83 @@
+From 897d637aec3b077eb9ef95b2f4a5f7656e36ebd6 Mon Sep 17 00:00:00 2001
+From: benniaobufeijiushiji <linda7@huawei.com>
+Date: Wed, 15 Jun 2022 11:33:03 +0800
+Subject: [PATCH 03/12] [Backport] loop-invariant: Don't move cold bb
+ instructions to preheader in RTL
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=dc1969dab392661cdac1170bbb8c9f83f388580d
+
+When inner loop is unlikely to execute, loop invariant motion would move
+cold instrcutions to a hotter loop. This patch adds profile count checking
+to fix the problem.
+---
+ gcc/loop-invariant.c                    | 17 ++++++++++++++---
+ gcc/testsuite/gcc.dg/loop-invariant-2.c | 20 ++++++++++++++++++++
+ 2 files changed, 34 insertions(+), 3 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/loop-invariant-2.c
+
+diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
+index 37ae6549e..24b9bcb11 100644
+--- a/gcc/loop-invariant.c
++++ b/gcc/loop-invariant.c
+@@ -1184,9 +1184,21 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
+    call.  */
+ 
+ static void
+-find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
++find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
++		    bool always_executed)
+ {
+   rtx_insn *insn;
++  basic_block preheader = loop_preheader_edge (loop)->src;
++
++  /* Don't move insn of cold BB out of loop to preheader to reduce calculations
++     and register live range in hot loop with cold BB.  */
++  if (!always_executed && preheader->count > bb->count)
++    {
++      if (dump_file)
++	fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n",
++		 bb->index, loop->num);
++      return;
++    }
+ 
+   FOR_BB_INSNS (bb, insn)
+     {
+@@ -1215,8 +1227,7 @@ find_invariants_body (class loop *loop, basic_block *body,
+   unsigned i;
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+-    find_invariants_bb (body[i],
+-			bitmap_bit_p (always_reached, i),
++    find_invariants_bb (loop, body[i], bitmap_bit_p (always_reached, i),
+ 			bitmap_bit_p (always_executed, i));
+ }
+ 
+diff --git a/gcc/testsuite/gcc.dg/loop-invariant-2.c b/gcc/testsuite/gcc.dg/loop-invariant-2.c
+new file mode 100644
+index 000000000..df3d84585
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/loop-invariant-2.c
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-rtl-loop2_invariant" } */
++
++volatile int x;
++void
++bar (int, char *, char *);
++void
++foo (int *a, int n, int k)
++{
++  int i;
++
++  for (i = 0; i < n; i++)
++    {
++      if (__builtin_expect (x, 0))
++	bar (k / 5, "one", "two");
++      a[i] = k;
++    }
++}
++
++/* { dg-final { scan-rtl-dump "Don't move invariant from bb: .*out of loop" "loop2_invariant" } } */
+-- 
+2.27.0.windows.1
+
diff --git a/0038-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch b/0038-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
new file mode 100644
index 0000000000000000000000000000000000000000..6b913ae5b294b8fd3631526567728a7dea70b153
--- /dev/null
+++ b/0038-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
@@ -0,0 +1,902 @@
+From edd4200e2b3e94d5c124900657b91c22dfe9c557 Mon Sep 17 00:00:00 2001
+From: Mingchuan Wu <wumingchuan1992@foxmail.com>
+Date: Wed, 15 Jun 2022 16:00:25 +0800
+Subject: [PATCH 04/12] [DFE] Add Dead Field Elimination in Struct-Reorg.
+
+We can transform gimple to eliminate fields that are never read
+and remove their redundant stmts.
+Also we adapted the partial escape_cast_another_ptr for struct relayout.
+Add flag -fipa-struct-reorg=3 to enable dead field elimination.
+---
+ gcc/common.opt                                |   4 +-
+ gcc/ipa-struct-reorg/ipa-struct-reorg.c       | 209 ++++++++++++++++--
+ gcc/ipa-struct-reorg/ipa-struct-reorg.h       |   9 +-
+ gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c  |  86 +++++++
+ .../gcc.dg/struct/dfe_ele_minus_verify.c      |  60 +++++
+ .../gcc.dg/struct/dfe_mem_ref_offset.c        |  58 +++++
+ .../struct/dfe_mul_layer_ptr_record_bug.c     |  30 +++
+ gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c    |  71 ++++++
+ .../gcc.dg/struct/dfe_ptr_negate_expr.c       |  55 +++++
+ gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c     |  55 +++++
+ gcc/testsuite/gcc.dg/struct/struct-reorg.exp  |  21 +-
+ 11 files changed, 639 insertions(+), 19 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 7fc075d35..b5ea3c7a1 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1884,8 +1884,8 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+ 
+ fipa-struct-reorg=
+-Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 2)
+--fipa-struct-reorg=[0,1,2] adding none, struct-reorg, reorder-fields optimizations.
++Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 3)
++-fipa-struct-reorg=[0,1,2,3] adding none, struct-reorg, reorder-fields, dfe optimizations.
+ 
+ fipa-extend-auto-profile
+ Common Report Var(flag_ipa_extend_auto_profile)
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+index 9214ee74a..2fa560239 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+@@ -81,6 +81,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-pretty-print.h"
+ #include "gimple-pretty-print.h"
+ #include "gimple-iterator.h"
++#include "gimple-walk.h"
+ #include "cfg.h"
+ #include "ssa.h"
+ #include "tree-dfa.h"
+@@ -238,11 +239,44 @@ enum srmode
+   STRUCT_LAYOUT_OPTIMIZE
+ };
+ 
++/* Enum the struct layout optimize level,
++   which should be the same as the option -fstruct-reorg=.  */
++
++enum struct_layout_opt_level
++{
++  NONE = 0,
++  STRUCT_REORG,
++  STRUCT_REORDER_FIELDS,
++  DEAD_FIELD_ELIMINATION
++};
++
+ static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
+ bool isptrptr (tree type);
+ 
+ srmode current_mode;
+ 
++hash_map<tree, tree> replace_type_map;
++
++/* Return true if one of these types is created by struct-reorg.  */
++
++static bool
++is_replace_type (tree type1, tree type2)
++{
++  if (replace_type_map.is_empty ())
++    return false;
++  if (type1 == NULL_TREE || type2 == NULL_TREE)
++    return false;
++  tree *type_value = replace_type_map.get (type1);
++  if (type_value)
++    if (types_compatible_p (*type_value, type2))
++      return true;
++  type_value = replace_type_map.get (type2);
++  if (type_value)
++    if (types_compatible_p (*type_value, type1))
++      return true;
++  return false;
++}
++
+ } // anon namespace
+ 
+ namespace struct_reorg {
+@@ -318,12 +352,13 @@ srfunction::simple_dump (FILE *file)
+ /* Constructor of FIELD. */
+ 
+ srfield::srfield (tree field, srtype *base)
+-  : offset(int_byte_position (field)),
++  : offset (int_byte_position (field)),
+     fieldtype (TREE_TYPE (field)),
+     fielddecl (field),
+-    base(base),
+-    type(NULL),
+-    clusternum(0)
++    base (base),
++    type (NULL),
++    clusternum (0),
++    field_access (EMPTY_FIELD)
+ {
+   for(int i = 0;i < max_split; i++)
+     newfield[i] = NULL_TREE;
+@@ -362,6 +397,25 @@ srtype::srtype (tree type)
+     }
+ }
+ 
++/* Check it if all fields in the RECORD_TYPE are referenced.  */
++
++bool
++srtype::has_dead_field (void)
++{
++  bool may_dfe = false;
++  srfield *this_field;
++  unsigned i;
++  FOR_EACH_VEC_ELT (fields, i, this_field)
++    {
++      if (!(this_field->field_access & READ_FIELD))
++	{
++	  may_dfe = true;
++	  break;
++	}
++    }
++  return may_dfe;
++}
++
+ /* Mark the type as escaping type E at statement STMT. */
+ 
+ void
+@@ -833,6 +887,10 @@ srtype::create_new_type (void)
+   for (unsigned i = 0; i < fields.length (); i++)
+     {
+       srfield *f = fields[i];
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE
++	  && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
++	  && !(f->field_access & READ_FIELD))
++	continue;
+       f->create_new_fields (newtype, newfields, newlast);
+     }
+ 
+@@ -854,6 +912,16 @@ srtype::create_new_type (void)
+ 
+   warn_padded = save_warn_padded;
+ 
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE
++      && replace_type_map.get (this->newtype[0]) == NULL)
++    replace_type_map.put (this->newtype[0], this->type);
++  if (dump_file)
++    {
++      if (current_mode == STRUCT_LAYOUT_OPTIMIZE
++	  && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
++	  && has_dead_field ())
++	fprintf (dump_file, "Dead field elimination.\n");
++    }
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "Created %d types:\n", maxclusters);
+@@ -1128,12 +1196,12 @@ csrtype::init_type_info (void)
+ 
+   /* Close enough to pad to improve performance.
+      33~63 should pad to 64 but 33~48 (first half) are too far away, and
+-     65~127 should pad to 128 but 65~96 (first half) are too far away.  */
++     65~127 should pad to 128 but 65~80 (first half) are too far away.  */
+   if (old_size > 48 && old_size < 64)
+     {
+       new_size = 64;
+     }
+-  if (old_size > 96 && old_size < 128)
++  if (old_size > 80 && old_size < 128)
+     {
+       new_size = 128;
+     }
+@@ -1272,6 +1340,7 @@ public:
+   bool has_rewritten_type (srfunction*);
+   void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt);
+   unsigned execute_struct_relayout (void);
++  bool remove_dead_field_stmt (tree lhs);
+ };
+ 
+ struct ipa_struct_relayout
+@@ -3206,6 +3275,90 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+     }
+ }
+ 
++/* Update field_access in srfield.  */
++
++static void
++update_field_access (tree record, tree field, unsigned access, void *data)
++{
++  srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (record);
++  if (this_srtype == NULL)
++    return;
++  srfield *this_srfield = this_srtype->find_field (int_byte_position (field));
++  if (this_srfield == NULL)
++    return;
++
++  this_srfield->field_access |= access;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "record field access %d:", access);
++      print_generic_expr (dump_file, record);
++      fprintf (dump_file, "  field:");
++      print_generic_expr (dump_file, field);
++      fprintf (dump_file, "\n");
++    }
++  return;
++}
++
++/* A callback for walk_stmt_load_store_ops to visit store.  */
++
++static bool
++find_field_p_store (gimple *, tree node, tree op, void *data)
++{
++  if (TREE_CODE (op) != COMPONENT_REF)
++    return false;
++  tree node_type = TREE_TYPE (node);
++  if (!handled_type (node_type))
++    return false;
++
++  update_field_access (node_type, TREE_OPERAND (op, 1), WRITE_FIELD, data);
++
++  return false;
++}
++
++/* A callback for walk_stmt_load_store_ops to visit load.  */
++
++static bool
++find_field_p_load (gimple *, tree node, tree op, void *data)
++{
++  if (TREE_CODE (op) != COMPONENT_REF)
++    return false;
++  tree node_type = TREE_TYPE (node);
++  if (!handled_type (node_type))
++    return false;
++
++  update_field_access (node_type, TREE_OPERAND (op, 1), READ_FIELD, data);
++
++  return false;
++}
++
++/* Determine whether the stmt should be deleted.  */
++
++bool
++ipa_struct_reorg::remove_dead_field_stmt (tree lhs)
++{
++  tree base = NULL_TREE;
++  bool indirect = false;
++  srtype *t = NULL;
++  srfield *f = NULL;
++  bool realpart = false;
++  bool imagpart = false;
++  bool address = false;
++  bool escape_from_base = false;
++  if (!get_type_field (lhs, base, indirect, t, f, realpart, imagpart,
++		       address, escape_from_base))
++    return false;
++  if (t ==NULL)
++    return false;
++  if (t->newtype[0] == t->type)
++    return false;
++  if (f == NULL)
++    return false;
++  if (f->newfield[0] == NULL
++      && (f->field_access & WRITE_FIELD))
++    return true;
++  return false;
++}
++
+ /* Maybe record access of statement for further analaysis. */
+ 
+ void
+@@ -3227,6 +3380,13 @@ ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt)
+     default:
+       break;
+     }
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE
++      && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION)
++    {
++      /* Look for loads and stores.  */
++      walk_stmt_load_store_ops (stmt, this, find_field_p_load,
++				find_field_p_store);
++    }
+ }
+ 
+ /* Calculate the multiplier.  */
+@@ -3543,8 +3703,11 @@ ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple
+     }
+   else if (type != d->type)
+     {
+-      type->mark_escape (escape_cast_another_ptr, stmt);
+-      d->type->mark_escape (escape_cast_another_ptr, stmt);
++      if (!is_replace_type (d->type->type, type->type))
++	{
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	  d->type->mark_escape (escape_cast_another_ptr, stmt);
++	}
+     }
+   /* x_1 = y.x_nodes; void *x;
+      Directly mark the structure pointer type assigned
+@@ -4131,8 +4294,9 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
+ 	}
+       /* If we have a non void* or a decl (which is hard to track),
+          then mark the type as escaping.  */
+-      if (!VOID_POINTER_P (TREE_TYPE (newdecl))
+-	  || DECL_P (newdecl))
++      if (replace_type_map.get (type->type) == NULL
++	  && (!VOID_POINTER_P (TREE_TYPE (newdecl))
++	      || DECL_P (newdecl)))
+ 	{
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    {
+@@ -4142,7 +4306,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
+ 	      print_generic_expr (dump_file, TREE_TYPE (newdecl));
+ 	      fprintf (dump_file, "\n");
+ 	    }
+-          type->mark_escape (escape_cast_another_ptr, stmt);
++	  type->mark_escape (escape_cast_another_ptr, stmt);
+ 	  return;
+ 	}
+       /* At this point there should only be unkown void* ssa names. */
+@@ -4465,11 +4629,13 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
+ 
+       return;
+     }
++  if (!is_replace_type (t1->type, type->type))
++    {
++      if (t1)
++	t1->mark_escape (escape_cast_another_ptr, stmt);
+ 
+-  if (t1)
+-    t1->mark_escape (escape_cast_another_ptr, stmt);
+-
+-  type->mark_escape (escape_cast_another_ptr, stmt);
++      type->mark_escape (escape_cast_another_ptr, stmt);
++    }
+ }
+ 
+ 
+@@ -5722,6 +5888,19 @@ bool
+ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+ {
+   bool remove = false;
++
++  if (current_mode == STRUCT_LAYOUT_OPTIMIZE
++      && struct_layout_optimize_level >= DEAD_FIELD_ELIMINATION
++      && remove_dead_field_stmt (gimple_assign_lhs (stmt)))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\n rewriting statement (remove): \n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	}
++      return true;
++    }
++
+   if (gimple_clobber_p (stmt))
+     {
+       tree lhs = gimple_assign_lhs (stmt);
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+index 54b0dc655..936c0fa6f 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+@@ -142,6 +142,7 @@ public:
+ 
+   bool create_new_type (void);
+   void analyze (void);
++  bool has_dead_field (void);
+   void mark_escape (escape_type, gimple *stmt);
+   bool has_escaped (void)
+   {
+@@ -163,6 +164,12 @@ public:
+   }
+ };
+ 
++/* Bitflags used for determining if a field
++     is never accessed, read or written.  */
++const unsigned EMPTY_FIELD = 0x0u;
++const unsigned READ_FIELD = 0x01u;
++const unsigned WRITE_FIELD = 0x02u;
++
+ struct srfield
+ {
+   unsigned HOST_WIDE_INT offset;
+@@ -174,7 +181,7 @@ struct srfield
+   unsigned clusternum;
+ 
+   tree newfield[max_split];
+-
++  unsigned field_access; /* FIELD_DECL -> bitflag (use for dfe).  */
+   // Constructors
+   srfield (tree field, srtype *base);
+ 
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+new file mode 100644
+index 000000000..4261d2352
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_DTE_verify.c
+@@ -0,0 +1,86 @@
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++
++/* let it escape_array, "Type is used in an array [not handled yet]".  */
++network_t* net[2];
++arc_p stop_arcs = NULL;
++
++int
++main ()
++{
++  net[0] = (network_t*) calloc (1, sizeof(network_t));
++  net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++
++  net[0]->arcs->id = 100;
++
++  for (unsigned i = 0; i < 3; i++)
++    {        
++      net[0]->arcs->id = net[0]->arcs->id + 2;
++      stop_arcs->cost = net[0]->arcs->id / 2;
++      stop_arcs->net_add = net[0];
++      printf("stop_arcs->cost = %ld\n", stop_arcs->cost);
++      net[0]->arcs++;
++      stop_arcs++;
++    }
++
++  if( net[1] != 0 && stop_arcs != 0)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+new file mode 100644
+index 000000000..42d38c63a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ele_minus_verify.c
+@@ -0,0 +1,60 @@
++// verify newarc[cmp-1].flow
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[0].id);
++  for (int i = 1; i < MAX; i++)
++    {
++      ap[i-1].id = 500;
++    }
++  printf("%d\n", ap[0].id);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+new file mode 100644
+index 000000000..53583fe82
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_mem_ref_offset.c
+@@ -0,0 +1,58 @@
++/* Supports the MEM_REF offset.
++   _1 = MEM[(struct arc *)ap_4 + 72B].flow;
++   Old rewrite：_1 = ap.reorder.0_8->flow;
++   New rewrite：_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow.  */
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  const int MAX = 100;
++  /* A similar scenario can be reproduced only by using local variables.  */
++  arc_p ap = NULL;
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+new file mode 100644
+index 000000000..fd675ec2e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_mul_layer_ptr_record_bug.c
+@@ -0,0 +1,30 @@
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct T_HASH_ENTRY
++{ 
++  unsigned int hash;
++  unsigned int klen;
++  char *key;
++} iHashEntry;
++
++typedef struct T_HASH
++{
++  unsigned int size;
++  unsigned int fill;
++  unsigned int keys;
++
++  iHashEntry **array;
++} uHash;
++
++uHash *retval;
++
++int
++main() {
++  retval->array = (iHashEntry **)calloc(sizeof(iHashEntry *), retval->size);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+new file mode 100644
+index 000000000..600e7908b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_diff.c
+@@ -0,0 +1,71 @@
++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid 
++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt"
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  arc_t *old_arcs;
++  node_t *node;
++  node_t *stop;
++  size_t off;
++  network_t* net;
++
++  for( ; node->number < stop->number; node++ )
++    {
++      off = node->basic_arc - old_arcs;
++      node->basic_arc = (arc_t *)(net->arcs + off);
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 3 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+new file mode 100644
+index 000000000..f411364a7
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_negate_expr.c
+@@ -0,0 +1,55 @@
++// support NEGATE_EXPR rewriting
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  int64_t susp = 0;
++  const int MAX = 100;
++  arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  ap -= susp;
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+new file mode 100644
+index 000000000..a4e723763
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_ptr_ptr.c
+@@ -0,0 +1,55 @@
++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]";
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_t **ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
++  (*ap)[0].id = 300;
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 2 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+index 67b3ac2d5..ac5585813 100644
+--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+@@ -64,8 +64,27 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout
+ 	"" "-fipa-struct-reorg=1 -fdump-ipa-all -flto-partition=one -fwhole-program"
+ 
+ # -fipa-struct-reorg=2
+-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
++ 	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
+  	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
++	"" "-fipa-struct-reorg=2 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-struct-reorg=3
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/dfe*.c]] \
++	"" "-fipa-struct-reorg=3 -fdump-ipa-all -flto-partition=one -fwhole-program"
++
+ # All done.
+ torture-finish
+ dg-finish
+-- 
+2.27.0.windows.1
+
diff --git a/0039-Backport-ipa-sra-Fix-thinko-when-overriding-safe_to_.patch b/0039-Backport-ipa-sra-Fix-thinko-when-overriding-safe_to_.patch
new file mode 100644
index 0000000000000000000000000000000000000000..d63a8efa43583eb3bee074f21b0bc485e13374ff
--- /dev/null
+++ b/0039-Backport-ipa-sra-Fix-thinko-when-overriding-safe_to_.patch
@@ -0,0 +1,143 @@
+From d8753de2129d230afc9a887d5804747c69824a68 Mon Sep 17 00:00:00 2001
+From: zhaowenyu <804544223@qq.com>
+Date: Mon, 20 Jun 2022 11:24:45 +0800
+Subject: [PATCH 05/12] [Backport] ipa-sra: Fix thinko when overriding
+ safe_to_import_accesses (PR 101066)
+
+Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=5aa28c8cf15cd254cc5a3a12278133b93b8b017f
+
+ipa-sra: Fix thinko when overriding safe_to_import_accesses (PR 101066)
+
+The "new" IPA-SRA has a more difficult job than the previous
+not-truly-IPA version when identifying situations in which a parameter
+passed by reference can be passed into a third function and only thee
+converted to one passed by value (and possibly "split" at the same
+time).
+
+In order to allow this, two conditions must be fulfilled.  First the
+call to the third function must happen before any modifications of
+memory, because it could change the value passed by reference.
+Second, in order to make sure we do not introduce new (invalid)
+dereferences, the call must postdominate the entry BB.
+
+The second condition is actually not necessary if the caller function
+is also certain to dereference the pointer but the first one must
+still hold.  Unfortunately, the code making this overriding decision
+also happen to trigger when the first condition is not fulfilled.
+This is fixed in the following patch.
+
+gcc/ChangeLog:
+
+2021-06-16  Martin Jambor  <mjambor@suse.cz>
+
+(cherry picked from commit 763121ccd908f52bc666f277ea2cf42110b3aad9)
+---
+ gcc/ipa-sra.c                       | 15 +++++++++++++--
+ gcc/testsuite/gcc.dg/ipa/pr101066.c | 20 ++++++++++++++++++++
+ 2 files changed, 33 insertions(+), 2 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/ipa/pr101066.c
+
+diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c
+index b706fceff..1cb30afc3 100644
+--- a/gcc/ipa-sra.c
++++ b/gcc/ipa-sra.c
+@@ -340,7 +340,7 @@ class isra_call_summary
+ public:
+   isra_call_summary ()
+     : m_arg_flow (), m_return_ignored (false), m_return_returned (false),
+-      m_bit_aligned_arg (false)
++      m_bit_aligned_arg (false), m_before_any_store (false)
+   {}
+ 
+   void init_inputs (unsigned arg_count);
+@@ -359,6 +359,10 @@ public:
+ 
+   /* Set when any of the call arguments are not byte-aligned.  */
+   unsigned m_bit_aligned_arg : 1;
++
++  /* Set to true if the call happend before any (other) store to memory in the
++     caller.  */
++  unsigned m_before_any_store : 1;
+ };
+ 
+ /* Class to manage function summaries.  */
+@@ -472,6 +476,8 @@ isra_call_summary::dump (FILE *f)
+     fprintf (f, "    return value ignored\n");
+   if (m_return_returned)
+     fprintf (f, "    return value used only to compute caller return value\n");
++  if (m_before_any_store)
++    fprintf (f, "    happens before any store to memory\n");
+   for (unsigned i = 0; i < m_arg_flow.length (); i++)
+     {
+       fprintf (f, "    Parameter %u:\n", i);
+@@ -516,6 +522,7 @@ ipa_sra_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
+   new_sum->m_return_ignored = old_sum->m_return_ignored;
+   new_sum->m_return_returned = old_sum->m_return_returned;
+   new_sum->m_bit_aligned_arg = old_sum->m_bit_aligned_arg;
++  new_sum->m_before_any_store = old_sum->m_before_any_store;
+ }
+ 
+ 
+@@ -2355,6 +2362,7 @@ process_scan_results (cgraph_node *node, struct function *fun,
+ 	unsigned count = gimple_call_num_args (call_stmt);
+ 	isra_call_summary *csum = call_sums->get_create (cs);
+ 	csum->init_inputs (count);
++	csum->m_before_any_store = uses_memory_as_obtained;
+ 	for (unsigned argidx = 0; argidx < count; argidx++)
+ 	  {
+ 	    if (!csum->m_arg_flow[argidx].pointer_pass_through)
+@@ -2601,6 +2609,7 @@ isra_write_edge_summary (output_block *ob, cgraph_edge *e)
+   bp_pack_value (&bp, csum->m_return_ignored, 1);
+   bp_pack_value (&bp, csum->m_return_returned, 1);
+   bp_pack_value (&bp, csum->m_bit_aligned_arg, 1);
++  bp_pack_value (&bp, csum->m_before_any_store, 1);
+   streamer_write_bitpack (&bp);
+ }
+ 
+@@ -2719,6 +2728,7 @@ isra_read_edge_summary (struct lto_input_block *ib, cgraph_edge *cs)
+   csum->m_return_ignored = bp_unpack_value (&bp, 1);
+   csum->m_return_returned = bp_unpack_value (&bp, 1);
+   csum->m_bit_aligned_arg = bp_unpack_value (&bp, 1);
++  csum->m_before_any_store = bp_unpack_value (&bp, 1);
+ }
+ 
+ /* Read intraprocedural analysis information about NODE and all of its outgoing
+@@ -3475,7 +3485,8 @@ param_splitting_across_edge (cgraph_edge *cs)
+ 	    }
+ 	  else if (!ipf->safe_to_import_accesses)
+ 	    {
+-	      if (!all_callee_accesses_present_p (param_desc, arg_desc))
++	      if (!csum->m_before_any_store
++		  || !all_callee_accesses_present_p (param_desc, arg_desc))
+ 		{
+ 		  if (dump_file && (dump_flags & TDF_DETAILS))
+ 		    fprintf (dump_file, "  %u->%u: cannot import accesses.\n",
+diff --git a/gcc/testsuite/gcc.dg/ipa/pr101066.c b/gcc/testsuite/gcc.dg/ipa/pr101066.c
+new file mode 100644
+index 000000000..1ceb6e431
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/ipa/pr101066.c
+@@ -0,0 +1,20 @@
++/* { dg-do run } */
++/* { dg-options "-Os -fno-ipa-cp -fno-inline" } */
++
++int a = 1, c, d, e;
++int *b = &a;
++static int g(int *h) {
++  c = *h;
++  return d;
++}
++static void f(int *h) {
++  e = *h;
++  *b = 0;
++  g(h);
++}
++int main() {
++  f(b);
++  if (c)
++    __builtin_abort();
++  return 0;
++}
+-- 
+2.27.0.windows.1
+
diff --git a/0040-Backport-ifcvt-Allow-constants-for-noce_convert_mult.patch b/0040-Backport-ifcvt-Allow-constants-for-noce_convert_mult.patch
new file mode 100644
index 0000000000000000000000000000000000000000..23e3b89074b04ca1895a4468e0c699cd7c340bd7
--- /dev/null
+++ b/0040-Backport-ifcvt-Allow-constants-for-noce_convert_mult.patch
@@ -0,0 +1,59 @@
+From ea059ab02ac79eba1c05d6e05cbb2590c47d7c1f Mon Sep 17 00:00:00 2001
+From: zhaowenyu <804544223@qq.com>
+Date: Thu, 23 Jun 2022 10:16:08 +0800
+Subject: [PATCH 06/12] [Backport] ifcvt: Allow constants for
+ noce_convert_multiple.
+
+Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=9b8eaa282250ad260e01d164093b597579db00d9
+
+This lifts the restriction of not allowing constants for noce_convert_multiple.
+The code later checks if a valid sequence is produced anyway.
+gcc/ChangeLog:
+
+        * ifcvt.cc (noce_convert_multiple_sets): Allow constants.
+         (bb_ok_for_noce_convert_multiple_sets): Likewise.
+---
+ gcc/ifcvt.c | 13 ++++++++-----
+ 1 file changed, 8 insertions(+), 5 deletions(-)
+
+diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
+index 977dd1bd4..2452f231c 100644
+--- a/gcc/ifcvt.c
++++ b/gcc/ifcvt.c
+@@ -3252,7 +3252,9 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
+ 	 we'll end up trying to emit r4:HI = cond ? (r1:SI) : (r3:HI).
+ 	 Wrap the two cmove operands into subregs if appropriate to prevent
+ 	 that.  */
+-      if (GET_MODE (new_val) != GET_MODE (temp))
++
++      if (!CONSTANT_P (new_val)
++	  && GET_MODE (new_val) != GET_MODE (temp))
+ 	{
+ 	  machine_mode src_mode = GET_MODE (new_val);
+ 	  machine_mode dst_mode = GET_MODE (temp);
+@@ -3263,7 +3265,8 @@ noce_convert_multiple_sets (struct noce_if_info *if_info)
+ 	    }
+ 	  new_val = lowpart_subreg (dst_mode, new_val, src_mode);
+ 	}
+-      if (GET_MODE (old_val) != GET_MODE (temp))
++      if (!CONSTANT_P (old_val)
++	  && GET_MODE (old_val) != GET_MODE (temp))
+ 	{
+ 	  machine_mode src_mode = GET_MODE (old_val);
+ 	  machine_mode dst_mode = GET_MODE (temp);
+@@ -3392,9 +3395,9 @@ bb_ok_for_noce_convert_multiple_sets (basic_block test_bb)
+       if (!REG_P (dest))
+ 	return false;
+ 
+-      if (!(REG_P (src)
+-	   || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
+-	       && subreg_lowpart_p (src))))
++      if (!((REG_P (src) || CONSTANT_P (src))
++	    || (GET_CODE (src) == SUBREG && REG_P (SUBREG_REG (src))
++	      && subreg_lowpart_p (src))))
+ 	return false;
+ 
+       /* Destination must be appropriate for a conditional write.  */
+-- 
+2.27.0.windows.1
+
diff --git a/0041-Backport-Register-sysroot-in-the-driver-switches-tab.patch b/0041-Backport-Register-sysroot-in-the-driver-switches-tab.patch
new file mode 100644
index 0000000000000000000000000000000000000000..441b5c5afd5789f77ad62563686585c584286c5c
--- /dev/null
+++ b/0041-Backport-Register-sysroot-in-the-driver-switches-tab.patch
@@ -0,0 +1,40 @@
+From beeb0fb50c7e40ee3d79044abc6408f760d6584a Mon Sep 17 00:00:00 2001
+From: zhaowenyu <804544223@qq.com>
+Date: Thu, 23 Jun 2022 10:40:46 +0800
+Subject: [PATCH 07/12] [Backport] Register --sysroot in the driver switches
+ table
+
+Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=48e2d9b7b88dabed336cd098cd212d0e53c5125f
+
+This change adjusts the processing of --sysroot to save the option in the internal "switches"
+array, which lets self-specs test for it and provide a default value possibly dependent on
+environment variables, as in
+
+ --with-specs=%{!-sysroot*:--sysroot=%:getenv("WIND_BASE" /target)}
+
+2021-12-20  Olivier Hainque  <hainque@adacore.com>
+
+  gcc/
+         * gcc.c (driver_handle_option): do_save --sysroot.
+---
+ gcc/gcc.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/gcc/gcc.c b/gcc/gcc.c
+index b55075b14..655beffcc 100644
+--- a/gcc/gcc.c
++++ b/gcc/gcc.c
+@@ -4190,7 +4190,9 @@ driver_handle_option (struct gcc_options *opts,
+     case OPT__sysroot_:
+       target_system_root = arg;
+       target_system_root_changed = 1;
+-      do_save = false;
++      /* Saving this option is useful to let self-specs decide to
++	 provide a default one.  */
++      do_save = true;
+       break;
+ 
+     case OPT_time_:
+-- 
+2.27.0.windows.1
+
diff --git a/0042-DFE-Fix-bugs.patch b/0042-DFE-Fix-bugs.patch
new file mode 100644
index 0000000000000000000000000000000000000000..58015585cbf5c6fa3929316690a92adfff0757c3
--- /dev/null
+++ b/0042-DFE-Fix-bugs.patch
@@ -0,0 +1,665 @@
+From f8308a2b440efe124cd6ff59924f135e85e53888 Mon Sep 17 00:00:00 2001
+From: Mingchuan Wu <wumingchuan1992@foxmail.com>
+Date: Sat, 18 Jun 2022 17:51:04 +0800
+Subject: [PATCH 08/12] [DFE] Fix bugs
+
+Fix bugs:
+1. Fixed a bug in check replace type.
+2. Use new to update field access for ref.
+3. We now replace the dead fields in stmt by creating a new ssa.
+4. The replaced type is no longer optimized in NORMAL mode.
+
+Also we added 5 dejaGNU test cases.
+---
+ gcc/ipa-struct-reorg/ipa-struct-reorg.c       |  77 ++++++---
+ gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c |  56 ++++++
+ gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c     | 162 ++++++++++++++++++
+ gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c   | 126 ++++++++++++++
+ .../gcc.dg/struct/dfe_extr_tcp_usrreq.c       |  58 +++++++
+ .../gcc.dg/struct/dfe_extr_ui_main.c          |  61 +++++++
+ 6 files changed, 516 insertions(+), 24 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+ create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+
+diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+index 2fa560239..00dc4bf1d 100644
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+@@ -252,6 +252,7 @@ enum struct_layout_opt_level
+ 
+ static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
+ bool isptrptr (tree type);
++void get_base (tree &base, tree expr);
+ 
+ srmode current_mode;
+ 
+@@ -631,7 +632,15 @@ srtype::analyze (void)
+       into 2 different structures.  In future we intend to add profile
+       info and/or static heuristics to differentiate splitting process.  */
+   if (fields.length () == 2)
+-    fields[1]->clusternum = 1;
++    {
++      for (hash_map<tree, tree>::iterator it = replace_type_map.begin ();
++	   it != replace_type_map.end (); ++it)
++	{
++	  if (types_compatible_p ((*it).second, this->type))
++	    return;
++	}
++      fields[1]->clusternum = 1;
++    }
+ 
+   /* Otherwise we do nothing.  */
+   if (fields.length () >= 3)
+@@ -3278,12 +3287,33 @@ ipa_struct_reorg::find_vars (gimple *stmt)
+ /* Update field_access in srfield.  */
+ 
+ static void
+-update_field_access (tree record, tree field, unsigned access, void *data)
++update_field_access (tree node, tree op, unsigned access, void *data)
+ {
+-  srtype *this_srtype = ((ipa_struct_reorg *)data)->find_type (record);
++  HOST_WIDE_INT offset = 0;
++  switch (TREE_CODE (op))
++    {
++      case COMPONENT_REF:
++	{
++	  offset = int_byte_position (TREE_OPERAND (op, 1));
++	  break;
++	}
++      case MEM_REF:
++	{
++	  offset = tree_to_uhwi (TREE_OPERAND (op, 1));
++	  break;
++	}
++      default:
++	return;
++    }
++  tree base = node;
++  get_base (base, node);
++  srdecl *this_srdecl = ((ipa_struct_reorg *)data)->find_decl (base);
++  if (this_srdecl == NULL)
++    return;
++  srtype *this_srtype = this_srdecl->type;
+   if (this_srtype == NULL)
+     return;
+-  srfield *this_srfield = this_srtype->find_field (int_byte_position (field));
++  srfield *this_srfield = this_srtype->find_field (offset);
+   if (this_srfield == NULL)
+     return;
+ 
+@@ -3291,9 +3321,9 @@ update_field_access (tree record, tree field, unsigned access, void *data)
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+       fprintf (dump_file, "record field access %d:", access);
+-      print_generic_expr (dump_file, record);
++      print_generic_expr (dump_file, this_srtype->type);
+       fprintf (dump_file, "  field:");
+-      print_generic_expr (dump_file, field);
++      print_generic_expr (dump_file, this_srfield->fielddecl);
+       fprintf (dump_file, "\n");
+     }
+   return;
+@@ -3302,15 +3332,10 @@ update_field_access (tree record, tree field, unsigned access, void *data)
+ /* A callback for walk_stmt_load_store_ops to visit store.  */
+ 
+ static bool
+-find_field_p_store (gimple *, tree node, tree op, void *data)
++find_field_p_store (gimple *stmt ATTRIBUTE_UNUSED,
++		    tree node, tree op, void *data)
+ {
+-  if (TREE_CODE (op) != COMPONENT_REF)
+-    return false;
+-  tree node_type = TREE_TYPE (node);
+-  if (!handled_type (node_type))
+-    return false;
+-
+-  update_field_access (node_type, TREE_OPERAND (op, 1), WRITE_FIELD, data);
++  update_field_access (node, op, WRITE_FIELD, data);
+ 
+   return false;
+ }
+@@ -3318,15 +3343,10 @@ find_field_p_store (gimple *, tree node, tree op, void *data)
+ /* A callback for walk_stmt_load_store_ops to visit load.  */
+ 
+ static bool
+-find_field_p_load (gimple *, tree node, tree op, void *data)
++find_field_p_load (gimple *stmt ATTRIBUTE_UNUSED,
++		   tree node, tree op, void *data)
+ {
+-  if (TREE_CODE (op) != COMPONENT_REF)
+-    return false;
+-  tree node_type = TREE_TYPE (node);
+-  if (!handled_type (node_type))
+-    return false;
+-
+-  update_field_access (node_type, TREE_OPERAND (op, 1), READ_FIELD, data);
++  update_field_access (node, op, READ_FIELD, data);
+ 
+   return false;
+ }
+@@ -4629,7 +4649,7 @@ ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<
+ 
+       return;
+     }
+-  if (!is_replace_type (t1->type, type->type))
++  if (!is_replace_type (inner_type (t), type->type))
+     {
+       if (t1)
+ 	t1->mark_escape (escape_cast_another_ptr, stmt);
+@@ -5898,7 +5918,16 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
+ 	  fprintf (dump_file, "\n rewriting statement (remove): \n");
+ 	  print_gimple_stmt (dump_file, stmt, 0);
+ 	}
+-      return true;
++      /* Replace the dead field in stmt by creating a dummy ssa.  */
++      tree dummy_ssa = make_ssa_name (TREE_TYPE (gimple_assign_lhs (stmt)));
++      gimple_assign_set_lhs (stmt, dummy_ssa);
++      update_stmt (stmt);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "To: \n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	}
++      return false;
+     }
+ 
+   if (gimple_clobber_p (stmt))
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+new file mode 100644
+index 000000000..13a226ee8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_dtrace.c
+@@ -0,0 +1,56 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_4__ TYPE_2__;
++typedef struct TYPE_3__ TYPE_1__;
++
++typedef int uint8_t;
++typedef int uint16_t;
++
++struct TYPE_4__
++{
++  size_t cpu_id;
++};
++
++struct TYPE_3__
++{
++  int cpuc_dtrace_flags;
++};
++
++TYPE_2__ *CPU;
++volatile int CPU_DTRACE_FAULT;
++TYPE_1__ *cpu_core;
++scalar_t__ dtrace_load8 (uintptr_t);
++
++__attribute__((used)) static int
++dtrace_bcmp (const void *s1, const void *s2, size_t len)
++{
++  volatile uint16_t *flags;
++  flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
++  if (s1 == s2)
++    return (0);
++  if (s1 == NULL || s2 == NULL)
++    return (1);
++  if (s1 != s2 && len != 0)
++    {
++      const uint8_t *ps1 = s1;
++      const uint8_t *ps2 = s2;
++      do
++	{
++	  if (dtrace_load8 ((uintptr_t)ps1++) != *ps2++)
++	    return (1);
++	}
++      while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
++    }
++  return (0);
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+new file mode 100644
+index 000000000..1fff2cb9d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_gc.c
+@@ -0,0 +1,162 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++struct mrb_context
++{
++  size_t stack;
++  size_t stbase;
++  size_t stend;
++  size_t eidx;
++  int *ci;
++  int *cibase;
++  int status;
++};
++
++struct RObject
++{
++  int dummy;
++};
++
++struct RHash
++{
++  int dummy;
++};
++
++struct RFiber
++{
++  struct mrb_context *cxt;
++};
++
++struct RClass
++{
++  int dummy;
++};
++
++struct RBasic
++{
++  int tt;
++};
++
++struct RArray
++{
++  int dummy;
++};
++
++typedef int mrb_state;
++typedef int mrb_gc;
++typedef int mrb_callinfo;
++size_t ARY_LEN (struct RArray *);
++size_t MRB_ENV_STACK_LEN (struct RBasic *);
++int MRB_FIBER_TERMINATED;
++
++#define MRB_TT_ARRAY 140
++#define MRB_TT_CLASS 139
++#define MRB_TT_DATA 138
++#define MRB_TT_ENV 137
++#define MRB_TT_EXCEPTION 136
++#define MRB_TT_FIBER 135
++#define MRB_TT_HASH 134
++#define MRB_TT_ICLASS 133
++#define MRB_TT_MODULE 132
++#define MRB_TT_OBJECT 131
++#define MRB_TT_PROC 130
++#define MRB_TT_RANGE 129
++#define MRB_TT_SCLASS 128
++
++size_t ci_nregs (int *);
++int gc_mark_children (int *, int *, struct RBasic *);
++size_t mrb_gc_mark_hash_size (int *, struct RHash *);
++size_t mrb_gc_mark_iv_size (int *, struct RObject *);
++size_t mrb_gc_mark_mt_size (int *, struct RClass *);
++
++__attribute__((used)) static size_t
++gc_gray_mark (mrb_state *mrb, mrb_gc *gc, struct RBasic *obj)
++{
++  size_t children = 0;
++  gc_mark_children (mrb, gc, obj);
++  switch (obj->tt)
++    {
++      case MRB_TT_ICLASS:
++	children++;
++	break;
++
++      case MRB_TT_CLASS:
++      case MRB_TT_SCLASS:
++      case MRB_TT_MODULE:
++	{
++	  struct RClass *c = (struct RClass *)obj;
++	  children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
++	  children += mrb_gc_mark_mt_size (mrb, c);
++	  children ++;
++	}
++	break;
++
++      case MRB_TT_OBJECT:
++      case MRB_TT_DATA:
++      case MRB_TT_EXCEPTION:
++	children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
++	break;
++
++      case MRB_TT_ENV:
++	children += MRB_ENV_STACK_LEN (obj);
++	break;
++
++      case MRB_TT_FIBER:
++	{
++	  struct mrb_context *c = ((struct RFiber *)obj)->cxt;
++	  size_t i;
++	  mrb_callinfo *ci;
++	  if (!c || c->status == MRB_FIBER_TERMINATED)
++	    break;
++
++	  i = c->stack - c->stbase;
++	  if (c->ci)
++	    {
++	      i += ci_nregs (c->ci);
++	    }
++	  if (c->stbase + i > c->stend)
++	    i = c->stend - c->stbase;
++
++	  children += i;
++	  children += c->eidx;
++	  if (c->cibase)
++	    {
++	      for (i = 0, ci = c->cibase; ci <= c->ci; i++, ci++)
++		;
++	    }
++	  children += i;
++	}
++	break;
++
++      case MRB_TT_ARRAY:
++	{
++	  struct RArray *a = (struct RArray *)obj;
++	  children += ARY_LEN (a);
++	}
++	break;
++
++      case MRB_TT_HASH:
++	children += mrb_gc_mark_iv_size (mrb, (struct RObject *)obj);
++	children += mrb_gc_mark_hash_size (mrb, (struct RHash *)obj);
++	break;
++
++      case MRB_TT_PROC:
++      case MRB_TT_RANGE:
++	children += 2;
++	break;
++      default:
++	break;
++    }
++
++  return children;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+new file mode 100644
+index 000000000..0f577667c
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_hpsa.c
+@@ -0,0 +1,126 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_6__ TYPE_3__;
++typedef struct TYPE_5__ TYPE_2__;
++typedef struct TYPE_4__ TYPE_1__;
++
++struct io_accel2_cmd
++{
++  int dummy;
++};
++
++struct hpsa_tmf_struct
++{
++  int it_nexus;
++};
++
++struct hpsa_scsi_dev_t
++{
++  int nphysical_disks;
++  int ioaccel_handle;
++  struct hpsa_scsi_dev_t **phys_disk;
++};
++
++struct ctlr_info
++{
++  TYPE_3__ *pdev;
++  struct io_accel2_cmd *ioaccel2_cmd_pool;
++};
++struct TYPE_4__
++{
++  int LunAddrBytes;
++};
++
++struct TYPE_5__
++{
++  TYPE_1__ LUN;
++};
++
++struct CommandList
++{
++  size_t cmdindex;
++  int cmd_type;
++  struct hpsa_scsi_dev_t *phys_disk;
++  TYPE_2__ Header;
++};
++
++struct TYPE_6__
++{
++  int dev;
++};
++
++int BUG ();
++#define CMD_IOACCEL1 132
++#define CMD_IOACCEL2 131
++#define CMD_IOCTL_PEND 130
++#define CMD_SCSI 129
++#define IOACCEL2_TMF 128
++int dev_err (int *, char *, int);
++scalar_t__ hpsa_is_cmd_idle (struct CommandList *);
++int le32_to_cpu (int);
++int test_memcmp (unsigned char *, int *, int);
++
++__attribute__((used)) static bool
++hpsa_cmd_dev_match (struct ctlr_info *h, struct CommandList *c,
++		    struct hpsa_scsi_dev_t *dev, unsigned char *scsi3addr)
++{
++  int i;
++  bool match = false;
++  struct io_accel2_cmd * c2 = &h->ioaccel2_cmd_pool[c->cmdindex];
++  struct hpsa_tmf_struct *ac = (struct hpsa_tmf_struct *)c2;
++
++  if (hpsa_is_cmd_idle (c))
++    return false;
++
++  switch (c->cmd_type)
++    {
++      case CMD_SCSI:
++      case CMD_IOCTL_PEND:
++	match = !test_memcmp (scsi3addr, &c->Header.LUN.LunAddrBytes,
++			      sizeof (c->Header.LUN.LunAddrBytes));
++	break;
++
++      case CMD_IOACCEL1:
++      case CMD_IOACCEL2:
++	if (c->phys_disk == dev)
++	  {
++	    match = true;
++	  }
++	else
++	  {
++	    for (i = 0; i < dev->nphysical_disks && !match; i++)
++	      {
++		match = dev->phys_disk[i] == c->phys_disk;
++	      }
++	  }
++	break;
++
++      case IOACCEL2_TMF:
++	for (i = 0; i < dev->nphysical_disks && !match; i++)
++	  {
++	    match = dev->phys_disk[i]->ioaccel_handle == 
++		    le32_to_cpu (ac->it_nexus);
++	  }
++	break;
++
++      case 0:
++	match = false;
++	break;
++      default:
++	dev_err (&h->pdev->dev, "unexpected cmd_type: %d\n", c->cmd_type);
++	BUG ();
++    }
++
++  return match;
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+new file mode 100644
+index 000000000..5570c762e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_tcp_usrreq.c
+@@ -0,0 +1,58 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++struct tcpcb
++{
++  int t_state;
++};
++
++struct socket
++{
++  int dummy;
++};
++
++struct proc
++{
++  int dummy;
++};
++
++struct inpcb
++{
++  scalar_t__ inp_lport;
++};
++
++int COMMON_END (int);
++int COMMON_START ();
++int PRU_LISTEN;
++int TCPS_LISTEN;
++int in_pcbbind (struct inpcb *, int *, struct proc *);
++struct inpcb* sotoinpcb (struct socket *);
++
++__attribute__((used)) static void
++tcp_usr_listen (struct socket *so, struct proc *p)
++{
++  int error = 0;
++  struct inpcb *inp = sotoinpcb (so);
++  struct tcpcb *tp;
++
++  COMMON_START ();
++  if (inp->inp_lport == 0)
++  {
++    error = in_pcbbind (inp, NULL, p);
++  }
++  if (error == 0)
++  {
++    tp->t_state = TCPS_LISTEN;
++  }
++  COMMON_END (PRU_LISTEN);
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
+diff --git a/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+new file mode 100644
+index 000000000..50ab9cc24
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/struct/dfe_extr_ui_main.c
+@@ -0,0 +1,61 @@
++/* { dg-do compile} */
++
++#define NULL ((void*)0)
++typedef unsigned long size_t;
++typedef long intptr_t;
++typedef unsigned long uintptr_t;
++typedef long scalar_t__;
++typedef int bool;
++#define false 0
++#define true 1
++
++typedef struct TYPE_4__ TYPE_2__;
++typedef struct TYPE_3__ TYPE_1__;
++
++struct TYPE_4__
++{
++  size_t modCount;
++  TYPE_1__ *modList;
++};
++
++struct TYPE_3__
++{
++  void *modDescr;
++  void *modName;
++};
++
++size_t MAX_MODS;
++void *String_Alloc (char *);
++int test_strlen (char *);
++int trap_FD_GetFileList (char *, char *, char *, int);
++TYPE_2__ uiInfo;
++
++__attribute__((used)) static void
++UI_LoadMods ()
++{
++  int numdirs;
++  char dirlist[2048];
++  char *dirptr;
++  char *descptr;
++  int i;
++  int dirlen;
++
++  uiInfo.modCount = 0;
++  numdirs = trap_FD_GetFileList ("$modelist", "", dirlist, sizeof (dirlist));
++  dirptr = dirlist;
++  for (i = 0; i < numdirs; i++)
++    {
++      dirlen = test_strlen (dirptr) + 1;
++      descptr = dirptr + dirlen;
++      uiInfo.modList[uiInfo.modCount].modName = String_Alloc (dirptr);
++      uiInfo.modList[uiInfo.modCount].modDescr = String_Alloc (descptr);
++      dirptr += dirlen + test_strlen (descptr) + 1;
++      uiInfo.modCount++;
++      if (uiInfo.modCount >= MAX_MODS)
++        {
++	  break;
++        }
++    }
++}
++
++/* { dg-final { scan-ipa-dump-times "Dead field elimination" 1 "struct_layout" } } */
+-- 
+2.27.0.windows.1
+
diff --git a/0043-Backport-Extend-special_memory_constraint.patch b/0043-Backport-Extend-special_memory_constraint.patch
new file mode 100644
index 0000000000000000000000000000000000000000..3ca2d4356030779eeea926c1c626929a67253b3f
--- /dev/null
+++ b/0043-Backport-Extend-special_memory_constraint.patch
@@ -0,0 +1,165 @@
+From b4770dd95fa342671d53c9de2077d77ee07b68dd Mon Sep 17 00:00:00 2001
+From: zhaowenyu <804544223@qq.com>
+Date: Sat, 25 Jun 2022 00:41:50 +0800
+Subject: [PATCH 09/12] [Backport]  Extend special_memory_constraint.
+
+Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=4de7b010038933dd6ca96bf186ca49f243d0def6
+
+For operand with special_memory_constraint, there could be a wrapper for memory_operand.
+Extract mem for operand for conditional judgement like MEM_P, also for record_address_regs.
+---
+ gcc/ira-costs.c       | 12 +++++++-----
+ gcc/ira.c             |  2 +-
+ gcc/lra-constraints.c | 28 +++++++++++++++++++++++-----
+ gcc/recog.c           |  7 +++++--
+ gcc/rtl.h             |  1 +
+ 5 files changed, 37 insertions(+), 13 deletions(-)
+
+diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c
+index 6891156b5..aeda6588b 100644
+--- a/gcc/ira-costs.c
++++ b/gcc/ira-costs.c
+@@ -781,7 +781,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
+ 
+ 		    case CT_SPECIAL_MEMORY:
+ 		      insn_allows_mem[i] = allows_mem[i] = 1;
+-		      if (MEM_P (op) && constraint_satisfied_p (op, cn))
++		      if (MEM_P (extract_mem_from_operand (op))
++			  && constraint_satisfied_p (op, cn))
+ 			win = 1;
+ 		      break;
+ 
+@@ -1397,15 +1398,16 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)
+      commutative.  */
+   for (i = 0; i < recog_data.n_operands; i++)
+     {
++      rtx op_mem = extract_mem_from_operand (recog_data.operand[i]);
+       memcpy (op_costs[i], init_cost, struct_costs_size);
+ 
+       if (GET_CODE (recog_data.operand[i]) == SUBREG)
+ 	recog_data.operand[i] = SUBREG_REG (recog_data.operand[i]);
+ 
+-      if (MEM_P (recog_data.operand[i]))
+-	record_address_regs (GET_MODE (recog_data.operand[i]),
+-			     MEM_ADDR_SPACE (recog_data.operand[i]),
+-			     XEXP (recog_data.operand[i], 0),
++      if (MEM_P (op_mem))
++	record_address_regs (GET_MODE (op_mem),
++			     MEM_ADDR_SPACE (op_mem),
++			     XEXP (op_mem, 0),
+ 			     0, MEM, SCRATCH, frequency * 2);
+       else if (constraints[i][0] == 'p'
+ 	       || (insn_extra_address_constraint
+diff --git a/gcc/ira.c b/gcc/ira.c
+index 681ec2f46..c13650229 100644
+--- a/gcc/ira.c
++++ b/gcc/ira.c
+@@ -1868,7 +1868,7 @@ ira_setup_alts (rtx_insn *insn)
+ 
+ 			case CT_MEMORY:
+ 			case CT_SPECIAL_MEMORY:
+-			  if (MEM_P (op))
++			  if (MEM_P (extract_mem_from_operand (op)))
+ 			    goto op_success;
+ 			  win_p = true;
+ 			  break;
+diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
+index 7cc479b30..df75c7b94 100644
+--- a/gcc/lra-constraints.c
++++ b/gcc/lra-constraints.c
+@@ -409,14 +409,34 @@ valid_address_p (rtx op, struct address_info *ad,
+   return valid_address_p (ad->mode, *ad->outer, ad->as);
+ }
+ 
++/* For special_memory_operand, it could be false for MEM_P (op),
++   i.e. bcst_mem_operand in i386 backend.
++   Extract and return real memory operand or op.  */
++rtx
++extract_mem_from_operand (rtx op)
++{
++  for (rtx x = op;; x = XEXP (x, 0))
++    {
++      if (MEM_P (x))
++	return x;
++      if (GET_RTX_LENGTH (GET_CODE (x)) != 1
++	  || GET_RTX_FORMAT (GET_CODE (x))[0] != 'e')
++	break;
++    }
++  return op;
++}
++
+ /* Return true if the eliminated form of memory reference OP satisfies
+    extra (special) memory constraint CONSTRAINT.  */
+ static bool
+ satisfies_memory_constraint_p (rtx op, enum constraint_num constraint)
+ {
+   struct address_info ad;
++  rtx mem = extract_mem_from_operand (op);
++  if (!MEM_P (mem))
++    return false;
+ 
+-  decompose_mem_address (&ad, op);
++  decompose_mem_address (&ad, mem);
+   address_eliminator eliminator (&ad);
+   return constraint_satisfied_p (op, constraint);
+ }
+@@ -2344,8 +2364,7 @@ process_alt_operands (int only_alternative)
+ 		      break;
+ 
+ 		    case CT_MEMORY:
+-		      if (MEM_P (op)
+-			  && satisfies_memory_constraint_p (op, cn))
++		      if (satisfies_memory_constraint_p (op, cn))
+ 			win = true;
+ 		      else if (spilled_pseudo_p (op))
+ 			win = true;
+@@ -2386,8 +2405,7 @@ process_alt_operands (int only_alternative)
+ 		      break;
+ 
+ 		    case CT_SPECIAL_MEMORY:
+-		      if (MEM_P (op)
+-			  && satisfies_memory_constraint_p (op, cn))
++		      if (satisfies_memory_constraint_p (op, cn))
+ 			win = true;
+ 		      else if (spilled_pseudo_p (op))
+ 			win = true;
+diff --git a/gcc/recog.c b/gcc/recog.c
+index 2720aaaac..8674054b9 100644
+--- a/gcc/recog.c
++++ b/gcc/recog.c
+@@ -1798,7 +1798,8 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints)
+ 	    case CT_MEMORY:
+ 	    case CT_SPECIAL_MEMORY:
+ 	      /* Every memory operand can be reloaded to fit.  */
+-	      result = result || memory_operand (op, VOIDmode);
++	      result = result || memory_operand (extract_mem_from_operand (op),
++						 VOIDmode);
+ 	      break;
+ 
+ 	    case CT_ADDRESS:
+@@ -2584,7 +2585,9 @@ constrain_operands (int strict, alternative_mask alternatives)
+ 
+ 	  /* A unary operator may be accepted by the predicate, but it
+ 	     is irrelevant for matching constraints.  */
+-	  if (UNARY_P (op))
++	  /* For special_memory_operand, there could be a memory operand inside,
++	     and it would cause a mismatch for constraint_satisfied_p.  */
++	  if (UNARY_P (op) && op == extract_mem_from_operand (op))
+ 	    op = XEXP (op, 0);
+ 
+ 	  if (GET_CODE (op) == SUBREG)
+diff --git a/gcc/rtl.h b/gcc/rtl.h
+index b29afca8d..35fb6ba73 100644
+--- a/gcc/rtl.h
++++ b/gcc/rtl.h
+@@ -4323,6 +4323,7 @@ extern rtx gen_hard_reg_clobber (machine_mode, unsigned int);
+ extern rtx get_reg_known_value (unsigned int);
+ extern bool get_reg_known_equiv_p (unsigned int);
+ extern rtx get_reg_base_value (unsigned int);
++extern rtx extract_mem_from_operand (rtx);
+ 
+ #ifdef STACK_REGS
+ extern int stack_regs_mentioned (const_rtx insn);
+-- 
+2.27.0.windows.1
+
diff --git a/0044-Backport-ira-Fix-unnecessary-register-spill.patch b/0044-Backport-ira-Fix-unnecessary-register-spill.patch
new file mode 100644
index 0000000000000000000000000000000000000000..63103fe82dc20c9802fdd80623dd9bcb6fec48a0
--- /dev/null
+++ b/0044-Backport-ira-Fix-unnecessary-register-spill.patch
@@ -0,0 +1,73 @@
+From 95d8a6545bef39f5deff376c60c38e4e3c13c8f5 Mon Sep 17 00:00:00 2001
+From: zhaowenyu <804544223@qq.com>
+Date: Sat, 25 Jun 2022 00:45:24 +0800
+Subject: [PATCH 10/12] [Backport]  ira: Fix unnecessary register spill
+
+Reference: https://gcc.gnu.org/git/gitweb.cgi?p=gcc.git;h=edf95e51e53697f3050f076675c26a4cece17741
+
+The variables first_moveable_pseudo and last_moveable_pseudo aren't reset after compiling a function,
+which means they leak into the first scheduler pass of the following function. In some cases, this
+can cause an extra spill during register location of the second function.
+---
+ gcc/ira.c                                  |  2 ++
+ gcc/testsuite/gcc.target/aarch64/nospill.c | 35 ++++++++++++++++++++++
+ 2 files changed, 37 insertions(+)
+ create mode 100644 gcc/testsuite/gcc.target/aarch64/nospill.c
+
+diff --git a/gcc/ira.c b/gcc/ira.c
+index 681ec2f46..77e4bb988 100644
+--- a/gcc/ira.c
++++ b/gcc/ira.c
+@@ -5130,6 +5130,8 @@ move_unallocated_pseudos (void)
+ 		   INSN_UID (newinsn), i);
+ 	SET_REG_N_REFS (i, 0);
+       }
++
++  first_moveable_pseudo = last_moveable_pseudo = 0;
+ }
+ 
+ /* If the backend knows where to allocate pseudos for hard
+diff --git a/gcc/testsuite/gcc.target/aarch64/nospill.c b/gcc/testsuite/gcc.target/aarch64/nospill.c
+new file mode 100644
+index 000000000..968a4267e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/nospill.c
+@@ -0,0 +1,35 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++/* The pseudo for P is marked as moveable in the IRA pass. */
++float
++func_0 (float a, float b, float c)
++{
++  float p = c / a;
++
++  if (b > 1)
++    {
++      b /= p;
++      if (c > 2)
++        a /= 3;
++    }
++
++  return b / c * a;
++}
++
++/* If first_moveable_pseudo and last_moveable_pseudo are not reset correctly,
++   they will carry over and spill the pseudo for Q. */
++float
++func_1 (float a, float b, float c)
++{
++  float q = a + b;
++
++  c *= a / (b + b);
++  if (a > 0)
++    c *= q;
++
++  return a * b * c;
++}
++
++/* We have plenty of spare registers, so check nothing has been spilled. */
++/* { dg-final { scan-assembler-not "\tstr\t" } } */
+-- 
+2.27.0.windows.1
+
diff --git a/0045-Transposed-SLP-Enable-Transposed-SLP.patch b/0045-Transposed-SLP-Enable-Transposed-SLP.patch
new file mode 100644
index 0000000000000000000000000000000000000000..6323e8d915b6e25cbc3b1e905930dabc5683da80
--- /dev/null
+++ b/0045-Transposed-SLP-Enable-Transposed-SLP.patch
@@ -0,0 +1,3009 @@
+From 639b5248cbab1806618545fc30215ed9d1a019e7 Mon Sep 17 00:00:00 2001
+From: luohailing <luo_hailing@qq.com>
+Date: Fri, 17 Jun 2022 22:38:55 +0800
+Subject: [PATCH 11/12] [Transposed SLP] Enable Transposed SLP     Enable
+ Transposed SLP when memory is uncontinual     with
+ -ftree-slp-transpose-vectorize.
+
+---
+ gcc/common.opt                          |    4 +
+ gcc/testsuite/gcc.dg/vect/transpose-1.c |   53 ++
+ gcc/testsuite/gcc.dg/vect/transpose-2.c |   50 ++
+ gcc/testsuite/gcc.dg/vect/transpose-3.c |   54 ++
+ gcc/testsuite/gcc.dg/vect/transpose-4.c |   53 ++
+ gcc/testsuite/gcc.dg/vect/transpose-5.c |   73 ++
+ gcc/testsuite/gcc.dg/vect/transpose-6.c |   67 ++
+ gcc/testsuite/gcc.dg/vect/transpose-7.c |   53 ++
+ gcc/testsuite/gcc.dg/vect/transpose-8.c |   53 ++
+ gcc/testsuite/gcc.dg/vect/vect.exp      |    7 +
+ gcc/tree-vect-data-refs.c               |  236 +++++
+ gcc/tree-vect-slp.c                     | 1090 ++++++++++++++++++++++-
+ gcc/tree-vect-stmts.c                   |  763 +++++++++++++++-
+ gcc/tree-vectorizer.h                   |   89 ++
+ 14 files changed, 2641 insertions(+), 4 deletions(-)
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/vect/transpose-8.c
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 24834cf60..d38401b71 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -3049,6 +3049,10 @@ ftree-vect-analyze-slp-group
+ Common Report Var(flag_tree_slp_group) Init(0)
+ Disable SLP vectorization for reduction chain on tree.
+ 
++ftree-slp-transpose-vectorize
++Common Report Var(flag_tree_slp_transpose_vectorize) Optimization Init(0)
++Enable basic block vectorization (SLP) for transposed stores and loads on trees.
++
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+ -fvect-cost-model=[unlimited|dynamic|cheap]	Specifies the cost model for vectorization.
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-1.c b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+new file mode 100644
+index 000000000..8237a8b9e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 16;
++  int i2 = 8;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 2;
++	input2[i] = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1264)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-2.c b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+new file mode 100644
+index 000000000..b01a0410e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+@@ -0,0 +1,50 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 8
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned short c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i] + c2[i] + c3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 5;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1440)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-3.c b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+new file mode 100644
+index 000000000..529581c59
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+@@ -0,0 +1,54 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned short *pix1, int i_pix1, unsigned short *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned short input1[M];
++  unsigned short input2[M];
++  int i1 = 8;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1680)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-4.c b/gcc/testsuite/gcc.dg/vect/transpose-4.c
+new file mode 100644
+index 000000000..0b4adea9b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-4.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned *pix1, int i_pix1, unsigned *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned input1[M];
++  unsigned input2[M];
++  int i1 = 12;
++  int i2 = 6;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 7;
++	input2[i] = i * 3;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 3616)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-5.c b/gcc/testsuite/gcc.dg/vect/transpose-5.c
+new file mode 100644
+index 000000000..81a248840
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-5.c
+@@ -0,0 +1,73 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include <math.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++#define eps 1e-8
++
++double foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  unsigned a0[N];
++  unsigned a1[N];
++  unsigned a2[N];
++  unsigned a3[N];
++
++  int b0[N];
++  int b1[N];
++  int b2[N];
++  int b3[N];
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      a0[i] = (pix1[0] - pix2[0]) + ((pix1[4] + pix2[4]) << 16);
++      a1[i] = (pix1[1] - pix2[1]) + ((pix1[5] + pix2[5]) << 16);
++      a2[i] = (pix1[2] - pix2[2]) + ((pix1[6] + pix2[6]) << 16);
++      a3[i] = (pix1[3] - pix2[3]) + ((pix1[7] + pix2[7]) << 16);
++    }
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      b0[i] = (pix1[0] - pix2[0]) + (pix1[4] + pix2[4]);
++      b1[i] = (pix1[1] - pix2[1]) + (pix1[5] + pix2[5]);
++      b2[i] = (pix1[2] - pix2[2]) + (pix1[6] + pix2[6]);
++      b3[i] = (pix1[3] - pix2[3]) + (pix1[7] + pix2[7]);
++    }
++
++  double sum = 0;
++  for (int i = 0; i < N; i++)
++    {
++      sum += a0[i] + a1[i] + a2[i] + a3[i] + b0[i] + b1[i] + b2[i] + b3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 8;
++  int i2 = 3;
++  unsigned char m = 2;
++  unsigned short n = 12;
++  float t = 3.0;
++  double k = 4.2;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 6;
++	input2[i] = i * 3;
++    }
++  double sum = foo (input1, i1, input2, i2);
++  if (fabs (sum - 78648144) > eps)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-6.c b/gcc/testsuite/gcc.dg/vect/transpose-6.c
+new file mode 100644
+index 000000000..3e134ac02
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-6.c
+@@ -0,0 +1,67 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_float } */
++#include <stdio.h>
++#include <stdlib.h>
++#include <math.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++#define eps 1e-8
++
++float foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  unsigned a0[N];
++  unsigned a1[N];
++  unsigned a2[N];
++  unsigned a3[N];
++
++  float c0[N];
++  float c1[N];
++  float c2[N];
++  float c3[N];
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      a0[i] = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
++      a1[i] = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
++      a2[i] = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
++      a3[i] = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
++
++      c0[i] = (pix1[0] * pix2[0]) + (pix1[4] * pix2[4]);
++      c1[i] = (pix1[1] * pix2[1]) + (pix1[5] * pix2[5]);
++      c2[i] = (pix1[2] * pix2[2]) + (pix1[6] * pix2[6]);
++      c3[i] = (pix1[3] * pix2[3]) + (pix1[7] * pix2[7]);
++    }
++
++  float sum = 0;
++  for (int i = 0; i < N; i++)
++    {
++      sum += a0[i] + a1[i] + a2[i] + a3[i] + c0[i] + c1[i] + c2[i] + c3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 18;
++  int i2 = 6;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i * 2;
++    }
++  float sum = foo (input1, i1, input2, i2);
++  if (fabs (sum - 106041168) > eps) 
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-7.c b/gcc/testsuite/gcc.dg/vect/transpose-7.c
+new file mode 100644
+index 000000000..2074d9aa8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-7.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 16
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned char c0[N], c1[N];
++  for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++    }
++  for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++   }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 6;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 5;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 3280)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/transpose-8.c b/gcc/testsuite/gcc.dg/vect/transpose-8.c
+new file mode 100644
+index 000000000..a154f012a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/transpose-8.c
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 32
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned char c0[N], c1[N];
++  for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++    }
++  for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++   }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 6;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 5;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 7584)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp
+index efe17ac6f..d92e1ba5b 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect.exp
++++ b/gcc/testsuite/gcc.dg/vect/vect.exp
+@@ -114,6 +114,13 @@ et-dg-runtest dg-runtest [lsort \
+ 	[glob -nocomplain $srcdir/$subdir/no-vfa-*.\[cS\]]] \
+ 	"" $DEFAULT_VECTCFLAGS
+ 
++# -ftree-slp-transpose-vectorize SLP tests
++set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
++lappend VECT_SLP_CFLAGS "-ftree-slp-transpose-vectorize"
++et-dg-runtest dg-runtest [lsort \
++	[glob -nocomplain $srcdir/$subdir/transpose-*.\[cS\]]] \
++	"" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3"
++
+ # -ffast-math tests
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+ lappend DEFAULT_VECTCFLAGS "-ffast-math"
+diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
+index fcc0726bd..d78b06455 100644
+--- a/gcc/tree-vect-data-refs.c
++++ b/gcc/tree-vect-data-refs.c
+@@ -2647,6 +2647,9 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info)
+       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
+ 
+       DR_GROUP_SIZE (stmt_info) = groupsize;
++
++      DR_GROUP_SLP_TRANSPOSE (stmt_info) = false;
++
+       if (dump_enabled_p ())
+ 	{
+ 	  dump_printf_loc (MSG_NOTE, vect_location,
+@@ -2676,6 +2679,20 @@ vect_analyze_group_access_1 (dr_vec_info *dr_info)
+ 			     DR_GROUP_GAP (stmt_info));
+ 	}
+ 
++      /* SLP: create an SLP data structure for every interleaving group of
++	 loads for further analysis in vect_analyse_slp.  */
++      if (DR_IS_READ (dr) && !slp_impossible)
++	{
++	  if (loop_vinfo)
++	    {
++	      LOOP_VINFO_GROUPED_LOADS (loop_vinfo).safe_push (stmt_info);
++	    }
++	  if (bb_vinfo)
++	    {
++	      BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (stmt_info);
++	    }
++	}
++
+       /* SLP: create an SLP data structure for every interleaving group of
+ 	 stores for further analysis in vect_analyse_slp.  */
+       if (DR_IS_WRITE (dr) && !slp_impossible)
+@@ -5413,6 +5430,225 @@ vect_permute_store_chain (vec<tree> dr_chain,
+     }
+ }
+ 
++/* Encoding the PERM_MASK_FIRST.  */
++
++static void
++vect_indices_encoding_first (tree vectype, unsigned int array_num,
++			     tree &perm_mask_high_first,
++			     tree &perm_mask_low_first)
++{
++  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
++  vec_perm_builder sel (nelt, nelt, 1);
++  sel.quick_grow (nelt);
++  unsigned int group_num = nelt / array_num;
++  unsigned int index = 0;
++  unsigned int array = 0;
++  unsigned int group = 0;
++
++  /* The encoding has 1 pattern in the fisrt stage.  */
++  for (array = 0; array < array_num / 2; array++)
++    {
++      for (group = 0; group < group_num * 2; group++)
++	{
++	  sel[index++] = array + array_num * group;
++	}
++    }
++  vec_perm_indices indices (sel, 2, nelt);
++  perm_mask_high_first = vect_gen_perm_mask_checked (vectype, indices);
++
++  index = 0;
++  for (array = array_num / 2; array < array_num; array++)
++    {
++      for (group = 0; group < group_num * 2; group++)
++	{
++	  sel[index++] = array + array_num * group;
++	}
++    }
++  indices.new_vector (sel, 2, nelt);
++  perm_mask_low_first = vect_gen_perm_mask_checked (vectype, indices);
++}
++
++/* Encoding the PERM_MASK.  */
++
++static void
++vect_indices_encoding (tree vectype, unsigned int array_num,
++		       tree &perm_mask_high, tree &perm_mask_low)
++{
++  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
++  vec_perm_builder sel (nelt, nelt, 1);
++  sel.quick_grow (nelt);
++  unsigned int group_num = nelt / array_num;
++  unsigned int index = 0;
++  unsigned int array = 0;
++  unsigned int group = 0;
++
++  /* The encoding has 2 patterns in the folllowing stages.  */
++  for (array = 0; array < array_num / 2; array++)
++    {
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = group + group_num * array;
++	}
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = nelt + group + group_num * array;
++	}
++    }
++  vec_perm_indices indices (sel, 2, nelt);
++  perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
++
++  index = 0;
++  for (array = array_num / 2; array < array_num; array++)
++    {
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = group + group_num * array;
++	}
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = nelt + group + group_num * array;
++	}
++    }
++  indices.new_vector (sel, 2, nelt);
++  perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
++}
++
++/* Function vect_transpose_store_chain.
++
++   Given a chain of interleaved stores in DR_CHAIN of LENGTH and ARRAY_NUM that
++   must be a power of 2.  Generate interleave_high/low stmts to reorder
++   the data correctly for the stores.  Return the final references for stores
++   in RESULT_CHAIN.  This function is similar to vect_permute_store_chain (),
++   we interleave the contents of the vectors in their order.
++
++   E.g., LENGTH is 4, the scalar type is short (i.e., VF is 8) and ARRAY_NUM
++   is 4.  That is, the input is 4 vectors each containing 8 elements.
++   And 2 (VF / ARRAY_NUM) of 8 elements come from the same array.  we interleave
++   the contents of the four vectors in their order.  We assign a number to each
++   element, the input sequence is:
++
++   1st vec:   0  1  2  3  4  5  6  7
++   2nd vec:   8  9 10 11 12 13 14 15
++   3rd vec:  16 17 18 19 20 21 22 23
++   4th vec:  24 25 26 27 28 29 30 31
++
++   The output sequence should be:
++
++   1st vec:   0  4  8 12 16 20 24 28
++   2nd vec:   1  5  9 13 17 21 25 29
++   3rd vec:   2  6 10 14 18 22 26 30
++   4th vec:   3  7 11 15 19 23 27 31
++
++   In our example,
++   We get 2 (VF / ARRAY_NUM) elements together in every vector.
++
++   I1:   0  4  1  5  2  6  3  7
++   I2:   8 12  9 13 10 14 11 15
++   I3:  16 20 17 21 18 22 19 23
++   I4:  24 28 25 29 26 30 27 31
++
++   Then, we use interleave_high/low instructions to create such output.
++   Every 2 (VF / ARRAY_NUM) elements are regarded as a whole.  The permutation
++   is done in log LENGTH stages.
++
++   I1: interleave_high (1st vec, 3rd vec)
++   I2: interleave_low (1st vec, 3rd vec)
++   I3: interleave_high (2nd vec, 4th vec)
++   I4: interleave_low (2nd vec, 4th vec)
++
++   The first stage of the sequence should be:
++
++   I1:   0  4 16 20  1  5 17 21
++   I2:   2  6 18 22  3  7 19 23
++   I3:   8 12 24 28  9 13 25 29
++   I4:  10 14 26 30 11 15 27 31
++
++   The following stage sequence should be, i.e. the final result is:
++
++   I1:   0  4  8 12 16 20 24 28
++   I2:   1  5  9 13 17 21 25 29
++   I3:   2  6 10 14 18 22 26 30
++   I4:   3  7 11 15 19 23 27 31.  */
++
++void
++vect_transpose_store_chain (vec<tree> dr_chain, unsigned int length,
++			    unsigned int array_num, stmt_vec_info stmt_info,
++			    gimple_stmt_iterator *gsi, vec<tree> *result_chain)
++{
++  gimple *perm_stmt = NULL;
++  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
++  tree perm_mask_low_first = NULL;
++  tree perm_mask_high_first = NULL;
++  tree perm_mask_low = NULL;
++  tree perm_mask_high = NULL;
++  unsigned int log_length = exact_log2 (length);
++
++  /* Only power of 2 is supported.  */
++  gcc_assert (pow2p_hwi (length));
++
++  /* The encoding has 2 types, one for the grouped pattern in the fisrt stage,
++     another for the interleaved patterns in the following stages.  */
++  gcc_assert (array_num != 0);
++
++  /* Create grouped stmt (in the first stage):
++	group = nelt / array_num;
++	high_first = VEC_PERM_EXPR <vect1, vect2,
++		{0, array_num, 2*array_num, ..., (2*group-1)*array_num,
++		1, 1+array_num, 1+2*array_num, ..., 1+(2*group-1)*array_num,
++		...,
++		array_num/2-1, (array_num/2-1)+array_num, ...,
++		(array_num/2-1)+(2*group-1)*array_num}>
++	low_first = VEC_PERM_EXPR <vect1, vect2,
++		{array_num/2, array_num/2+array_num, array_num/2+2*array_num,
++		..., array_num/2+(2*group-1)*array_num,
++		array_num/2+1, array_num/2+1+array_num,
++		..., array_num/2+1+(2*group-1)*array_num,
++		...,
++		array_num-1, array_num-1+array_num,
++		..., array_num-1+(2*group-1)*array_num}>  */
++  vect_indices_encoding_first (vectype, array_num, perm_mask_high_first,
++			       perm_mask_low_first);
++
++  /* Create interleaving stmt (in the following stages):
++	high = VEC_PERM_EXPR <vect1, vect2, {0, 1, ..., group-1,
++		nelt, nelt+1, ..., nelt+group-1,
++		group, group+1, ..., 2*group-1,
++		nelt+group, nelt+group+1, ..., nelt+2*group-1,
++		...}>
++	low = VEC_PERM_EXPR <vect1, vect2,
++		{nelt/2, nelt/2+1, ..., nelt/2+group-1,
++		nelt*3/2, nelt*3/2+1, ..., nelt*3/2+group-1,
++		nelt/2+group, nelt/2+group+1, ..., nelt/2+2*group-1,
++		nelt*3/2+group, nelt*3/2+group+1, ..., nelt*3/2+2*group-1,
++		...}>  */
++  vect_indices_encoding (vectype, array_num, perm_mask_high, perm_mask_low);
++
++  for (unsigned int perm_time = 0; perm_time < log_length; perm_time++)
++    {
++      for (unsigned int index = 0; index < length / 2; index++)
++	{
++	  tree vect1 = dr_chain[index];
++	  tree vect2 = dr_chain[index + length / 2];
++
++	  tree high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
++	  perm_stmt = gimple_build_assign (high, VEC_PERM_EXPR, vect1, vect2,
++					   perm_time == 0 ? perm_mask_high_first
++							  : perm_mask_high);
++	  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
++	  (*result_chain)[2 * index] = high;
++
++	  tree low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
++	  perm_stmt = gimple_build_assign (low, VEC_PERM_EXPR, vect1, vect2,
++					   perm_time == 0 ? perm_mask_low_first
++							  : perm_mask_low);
++	  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
++	  (*result_chain)[2 * index+1] = low;
++	}
++      memcpy (dr_chain.address (), result_chain->address (),
++	      length * sizeof (tree));
++    }
++}
++
+ /* Function vect_setup_realignment
+ 
+    This function is called when vectorizing an unaligned load using
+diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+index 476b32370..d30463b96 100644
+--- a/gcc/tree-vect-slp.c
++++ b/gcc/tree-vect-slp.c
+@@ -2414,11 +2414,13 @@ vect_analyze_slp_instance (vec_info *vinfo,
+ 
+   /* For basic block SLP, try to break the group up into multiples of the
+      vector size.  */
++  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+   unsigned HOST_WIDE_INT const_nunits;
+   if (is_a <bb_vec_info> (vinfo)
+       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
+       && DR_GROUP_FIRST_ELEMENT (stmt_info)
+-      && nunits.is_constant (&const_nunits))
++      && nunits.is_constant (&const_nunits)
++      && !bb_vinfo->transposed)
+     {
+       /* We consider breaking the group only on VF boundaries from the existing
+ 	 start.  */
+@@ -2455,6 +2457,898 @@ vect_analyze_slp_instance (vec_info *vinfo,
+   return false;
+ }
+ 
++static inline bool
++is_const_assign (stmt_vec_info store_elem)
++{
++  if (store_elem == NULL)
++    {
++      gcc_unreachable ();
++    }
++  gimple *stmt = store_elem->stmt;
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (stmt);
++  return rhs_class == GIMPLE_SINGLE_RHS
++	 && TREE_CONSTANT (gimple_assign_rhs1 (store_elem->stmt));
++}
++
++/* Push inits to INNERMOST_INITS and check const assign.  */
++
++static bool
++record_innermost (vec<tree> &innermost_inits,
++		  vec<tree> &innermost_offsets,
++		  stmt_vec_info stmt_vinfo)
++{
++  if (!stmt_vinfo)
++    {
++      return false;
++    }
++  stmt_vec_info next_info = stmt_vinfo;
++  while (next_info)
++    {
++      /* No need to vectorize constant assign in a transposed version.  */
++      if (is_const_assign (next_info))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			      "no need to vectorize, store is const assign: %G",
++			      next_info->stmt);
++	    }
++	  return false;
++	}
++      innermost_inits.safe_push (STMT_VINFO_DR_INIT (next_info));
++      innermost_offsets.safe_push (STMT_VINFO_DR_OFFSET (next_info));
++      next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++    }
++  return true;
++}
++
++/* Compare inits to INNERMOST_INITS, return FALSE if inits do not match
++   the first grouped_store.  And check const assign meanwhile.  */
++
++static bool
++compare_innermost (const vec<tree> &innermost_inits,
++		   const vec<tree> &innermost_offsets,
++		   stmt_vec_info stmt_vinfo)
++{
++  if (!stmt_vinfo || innermost_inits.length () != stmt_vinfo->size)
++    {
++      return false;
++    }
++  stmt_vec_info next_info = stmt_vinfo;
++  unsigned int i = 0;
++  while (next_info)
++    {
++      if (is_const_assign (next_info))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "no need to vectorize, store is const "
++			       "assign: %G", next_info->stmt);
++	    }
++	  return false;
++	}
++      if (innermost_inits[i] != STMT_VINFO_DR_INIT (next_info)
++	  || innermost_offsets[i] != STMT_VINFO_DR_OFFSET (next_info))
++	{
++	  return false;
++	}
++      next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++      i++;
++    }
++  return true;
++}
++
++/* Check if grouped stores are of same type.
++   input: t1/t2 = TREE_TYPE (gimple_assign_lhs (first_element->stmt))
++   output: 0 if same, 1 or -1 else.  */
++
++static int
++tree_type_cmp (const tree t1, const tree t2)
++{
++  gcc_checking_assert (t1 != NULL && t2 != NULL);
++  if (t1 != t2)
++    {
++      if (TREE_CODE (t1) != TREE_CODE (t2))
++	{
++	  return TREE_CODE (t1) > TREE_CODE (t2) ? 1 : -1;
++	}
++      if (TYPE_UNSIGNED (t1) != TYPE_UNSIGNED (t2))
++	{
++	  return TYPE_UNSIGNED (t1) > TYPE_UNSIGNED (t2) ? 1 : -1;
++	}
++      if (TYPE_PRECISION (t1) != TYPE_PRECISION (t2))
++	{
++	  return TYPE_PRECISION (t1) > TYPE_PRECISION (t2) ? 1 : -1;
++	}
++    }
++  return 0;
++}
++
++/* Check it if 2 grouped stores are of same type that
++   we can analyze them in a transpose group.  */
++static int
++check_same_store_type (stmt_vec_info grp1, stmt_vec_info grp2)
++{
++  if (grp1 == grp2)
++    {
++      return 0;
++    }
++  if (grp1->size != grp2->size)
++    {
++      return grp1->size > grp2->size ? 1 : -1;
++    }
++  tree lhs1 = gimple_assign_lhs (grp1->stmt);
++  tree lhs2 = gimple_assign_lhs (grp2->stmt);
++  if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
++    {
++      return TREE_CODE (lhs1) > TREE_CODE (lhs2) ? 1 : -1;
++    }
++  tree grp_type1 = TREE_TYPE (gimple_assign_lhs (grp1->stmt));
++  tree grp_type2 = TREE_TYPE (gimple_assign_lhs (grp2->stmt));
++  int cmp = tree_type_cmp (grp_type1, grp_type2);
++  return cmp;
++}
++
++/* Sort grouped stores according to group_size and store_type.
++   output: 0 if same, 1 if grp1 > grp2, -1 otherwise.  */
++
++static int
++grouped_store_cmp (const void *grp1_, const void *grp2_)
++{
++  stmt_vec_info grp1 = *(stmt_vec_info *)const_cast<void *>(grp1_);
++  stmt_vec_info grp2 = *(stmt_vec_info *)const_cast<void *>(grp2_);
++  return check_same_store_type (grp1, grp2);
++}
++
++/* Transposing is based on permutation in registers.  Permutation requires
++   vector length being power of 2 and satisfying the vector mode.  */
++
++static inline bool
++check_filling_reg (stmt_vec_info current_element)
++{
++  if (current_element->size == 0)
++    {
++      return false;
++    }
++  /* If the gimple STMT was already vectorized in vect pass, it's unable to
++     conduct transpose analysis, skip it.  */
++  bool lhs_vectorized
++	= TREE_CODE (TREE_TYPE (gimple_get_lhs (current_element->stmt)))
++	  == VECTOR_TYPE;
++  bool rhs_vectorized
++	= TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (current_element->stmt)))
++	  == VECTOR_TYPE;
++  if (lhs_vectorized || rhs_vectorized)
++    {
++      return false;
++    }
++  unsigned int store_precision
++    = TYPE_PRECISION (TREE_TYPE (gimple_get_lhs (current_element->stmt)));
++  auto_vector_modes vector_modes;
++  targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
++  unsigned min_mode_size = -1u;
++  for (unsigned i = 0; i < vector_modes.length (); i++)
++    {
++      unsigned mode_bit_size = (GET_MODE_BITSIZE (vector_modes[i])).coeffs[0];
++      min_mode_size = mode_bit_size < min_mode_size
++			? mode_bit_size : min_mode_size;
++    }
++  return store_precision != 0
++	 && pow2p_hwi (current_element->size)
++	 && (current_element->size * store_precision % min_mode_size == 0);
++}
++
++/* Check if previous groups are suitable to transpose, if not, set their
++   group number to -1, reduce grp_num and clear current_groups.
++   Otherwise, just clear current_groups.  */
++
++static void
++check_and_clear_groups (vec<stmt_vec_info> current_groups,
++			unsigned int &grp_num)
++{
++  stmt_vec_info first_element;
++  if (current_groups.length () == 1
++      || (current_groups.length () != 0
++	  && !pow2p_hwi (current_groups.length ())))
++    {
++      while (current_groups.length () != 0)
++	{
++	  first_element = current_groups.pop ();
++	  first_element->group_number = -1;
++	}
++      grp_num--;
++    }
++  else
++    {
++      while (current_groups.length ())
++	{
++	  current_groups.pop ();
++	}
++    }
++}
++
++
++/* Make sure that transpose slp vectorization is conducted only if grouped
++   stores are one dimension array ref.  */
++
++static bool
++is_store_one_dim_array (gimple *stmt)
++{
++  tree op = gimple_get_lhs (stmt);
++  if (TREE_CODE (op) != ARRAY_REF)
++    return false;
++  return TREE_OPERAND_LENGTH (op) > 0
++	 && TREE_OPERAND_LENGTH (TREE_OPERAND (op, 0)) == 0;
++}
++
++/* Set grouped_stores with similar MEM_REF to the same group and mark their
++   grp_num.  Groups with same grp_num consist the minimum unit to analyze
++   transpose.  Return num of such units.  */
++
++static unsigned
++vect_prepare_transpose (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info current_element = NULL;
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  unsigned int grp_num = 0;
++  /* Use arrays to record MEM_REF data in different GROUPED_STORES.  */
++  auto_vec<tree> innermost_inits;
++  auto_vec<tree> innermost_offsets;
++
++  /* A set of stmt_vec_info with same store type.  Analyze them if their size
++     is suitable to transpose.  */
++  auto_vec<stmt_vec_info> current_groups;
++
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, current_element)
++    {
++      /* Compare current grouped_store to the first one if first_element exists,
++	 push current_element to current_groups if they are similar on innermost
++	 behavior of MEM_REF.  */
++      if (first_element != NULL
++	  && !check_same_store_type (first_element, current_element)
++	  && compare_innermost (innermost_inits, innermost_offsets,
++				current_element))
++	{
++	  current_groups.safe_push (current_element);
++	  current_element->group_number = grp_num;
++	  /* If current_element is the last element in grouped_stores, continue
++	     will exit the loop and leave the last group unanalyzed.  */
++	  if (i == bb_vinfo->grouped_stores.length () - 1)
++	    {
++	      check_and_clear_groups (current_groups, grp_num);
++	    }
++	  continue;
++	}
++      check_and_clear_groups (current_groups, grp_num);
++      innermost_inits.release ();
++      innermost_offsets.release ();
++      /* Beginning of a new group to analyze whether they are able to consist
++	 a unit to conduct transpose analysis.  */
++      first_element = NULL;
++      if (is_store_one_dim_array (current_element->stmt)
++	  && check_filling_reg (current_element)
++	  && record_innermost (innermost_inits, innermost_offsets,
++			       current_element))
++	{
++	  first_element = current_element;
++	  current_groups.safe_push (current_element);
++	  current_element->group_number = ++grp_num;
++	  if (i == bb_vinfo->grouped_stores.length () - 1)
++	    {
++	      check_and_clear_groups (current_groups, grp_num);
++	    }
++	  continue;
++	}
++      current_element->group_number = -1;
++    }
++  return grp_num;
++}
++
++/* Return a flag to transpose grouped stores before building slp tree.
++   Add bool may_transpose in class vec_info.  */
++
++static bool
++vect_may_transpose (bb_vec_info bb_vinfo)
++{
++  if (targetm.vectorize.vec_perm_const == NULL)
++    {
++      return false;
++    }
++  if (bb_vinfo->grouped_stores.length () < 2)
++    {
++      return false;
++    }
++  DUMP_VECT_SCOPE ("analyze if grouped stores may transpose to slp");
++  /* Sort grouped_stores according to size and type for function
++     vect_prepare_transpose ().  */
++  bb_vinfo->grouped_stores.qsort (grouped_store_cmp);
++
++  int groups = vect_prepare_transpose (bb_vinfo);
++  BB_VINFO_TRANS_GROUPS (bb_vinfo) = groups;
++  if (dump_enabled_p ())
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "%d groups to analyze transposed slp.\n", groups);
++  return groups != 0;
++}
++
++/* Get the base address of STMT_INFO.  */
++
++static tree
++get_op_base_address (stmt_vec_info stmt_info)
++{
++  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
++  tree op = DR_BASE_ADDRESS (dr);
++  while (TREE_OPERAND_LENGTH (op) > 0)
++    {
++      op = TREE_OPERAND (op, 0);
++    }
++  return op;
++}
++
++/* Compare the UID of the two stmt_info STMTINFO_A and STMTINFO_B.
++   Sorting them in ascending order.  */
++
++static int
++dr_group_cmp (const void *stmtinfo_a_, const void *stmtinfo_b_)
++{
++  stmt_vec_info stmtinfo_a
++	= *(stmt_vec_info *) const_cast<void *> (stmtinfo_a_);
++  stmt_vec_info stmtinfo_b
++	= *(stmt_vec_info *) const_cast<void *> (stmtinfo_b_);
++
++  /* Stabilize sort.  */
++  if (stmtinfo_a == stmtinfo_b)
++    {
++      return 0;
++    }
++  return gimple_uid (stmtinfo_a->stmt) < gimple_uid (stmtinfo_b->stmt) ? -1 : 1;
++}
++
++/* Find the first elements of the grouped loads which are required to merge.  */
++
++static void
++vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited,
++			    vec<stmt_vec_info> &res)
++{
++  unsigned int i = 0;
++  stmt_vec_info merge_first_element = NULL;
++  stmt_vec_info first_element = NULL;
++  tree opa = NULL;
++  unsigned int grp_size_a = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, i, first_element)
++    {
++      if (visited[i])
++	{
++	  continue;
++	}
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || !pow2p_hwi (DR_GROUP_SIZE (first_element)))
++	{
++	  /* Non-conforming grouped load should be grouped separately.  */
++	  if (merge_first_element == NULL)
++	    {
++	      visited[i] = true;
++	      res.safe_push (first_element);
++	      return;
++	    }
++	}
++      if (merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	  opa = get_op_base_address (first_element);
++	  grp_size_a = DR_GROUP_SIZE (first_element);
++	  res.safe_push (first_element);
++	  visited[i] = true;
++	  continue;
++	}
++
++      /* If the two first elements are of the same base address and group size,
++	 these two grouped loads need to be merged.  */
++      tree opb = get_op_base_address (first_element);
++      unsigned int grp_size_b = DR_GROUP_SIZE (first_element);
++      if (opa == opb && grp_size_a == grp_size_b)
++	{
++	  res.safe_push (first_element);
++	  visited[i] = true;
++	}
++    }
++}
++
++/* Merge the grouped loads that are found from
++   vect_slp_grouped_load_find ().  */
++
++static stmt_vec_info
++vect_slp_grouped_load_merge (vec<stmt_vec_info> res)
++{
++  stmt_vec_info stmt_info = res[0];
++  if (res.length () == 1)
++    {
++      return stmt_info;
++    }
++  unsigned int i = 0;
++  unsigned int size = DR_GROUP_SIZE (res[0]);
++  unsigned int new_group_size = size * res.length ();
++  stmt_vec_info first_element = NULL;
++  stmt_vec_info merge_first_element = NULL;
++  stmt_vec_info last_element = NULL;
++  FOR_EACH_VEC_ELT (res, i, first_element)
++    {
++      if (merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	  last_element = merge_first_element;
++	  size = DR_GROUP_SIZE (merge_first_element);
++	}
++
++      if (last_element != first_element
++	  && !DR_GROUP_NEXT_ELEMENT (last_element))
++	{
++	  DR_GROUP_NEXT_ELEMENT (last_element) = first_element;
++	  /* Store the gap from the previous member of the group.  If there is
++	     no gap in the access, DR_GROUP_GAP is always 1.  */
++	  DR_GROUP_GAP_TRANS (first_element) = DR_GROUP_GAP (first_element);
++	  DR_GROUP_GAP (first_element) = 1;
++	}
++      for (stmt_info = first_element; stmt_info;
++	   stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  DR_GROUP_FIRST_ELEMENT (stmt_info) = merge_first_element;
++	  DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info);
++	  DR_GROUP_SIZE (stmt_info) = new_group_size;
++	  last_element = stmt_info;
++	}
++    }
++  DR_GROUP_SIZE (merge_first_element) = new_group_size;
++  DR_GROUP_SLP_TRANSPOSE (merge_first_element) = true;
++  DR_GROUP_NEXT_ELEMENT (last_element) = NULL;
++  return merge_first_element;
++}
++
++/* Merge the grouped loads which have the same base address and group size.
++   For example, for grouped loads (opa_1, opa_2, opb_1, opb_2):
++     opa_1: a0->a1->a2->a3
++     opa_2: a8->a9->a10->a11
++     opb_1: b0->b1
++     opb_2: b16->b17
++   we can probably get two merged grouped loads:
++     opa: a0->a1->a2->a3->a8->a9->a10->a11
++     opb: b0->b1->b16->b17.  */
++
++static bool
++vect_merge_slp_grouped_loads (bb_vec_info bb_vinfo)
++{
++  if (bb_vinfo->grouped_loads.length () <= 0)
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "The number of grouped loads is 0.\n");
++	}
++      return false;
++    }
++  bb_vinfo->grouped_loads.qsort (dr_group_cmp);
++  auto_vec<bool> visited (bb_vinfo->grouped_loads.length ());
++  auto_vec<stmt_vec_info> grouped_loads_merge;
++  for (unsigned int i = 0; i < bb_vinfo->grouped_loads.length (); i++)
++    {
++      visited.safe_push (false);
++    }
++  while (1)
++    {
++      /* Find grouped loads which are required to merge.  */
++      auto_vec<stmt_vec_info> res;
++      vect_slp_grouped_load_find (bb_vinfo, visited, res);
++      if (res.is_empty ())
++	{
++	  break;
++	}
++      /* Merge the required grouped loads into one group.  */
++      grouped_loads_merge.safe_push (vect_slp_grouped_load_merge (res));
++    }
++  if (grouped_loads_merge.length () == bb_vinfo->grouped_loads.length ())
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "No grouped loads need to be merged.\n");
++	}
++      return false;
++    }
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Merging grouped loads successfully.\n");
++    }
++  BB_VINFO_GROUPED_LOADS (bb_vinfo).release ();
++  for (unsigned int i = 0; i < grouped_loads_merge.length (); i++)
++    {
++      BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (grouped_loads_merge[i]);
++    }
++  return true;
++}
++
++/* Find the first elements of the grouped stores
++   which are required to transpose and merge.  */
++
++static void
++vect_slp_grouped_store_find (bb_vec_info bb_vinfo, vec<bool> &visited,
++			     vec<stmt_vec_info> &res)
++{
++  stmt_vec_info first_element = NULL;
++  stmt_vec_info merge_first_element = NULL;
++  unsigned int k = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      if (visited[k])
++	{
++	  continue;
++	}
++      /* Non-conforming grouped store should be grouped separately.  */
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || first_element->group_number == -1)
++	{
++	  if (merge_first_element == NULL)
++	    {
++	      visited[k] = true;
++	      res.safe_push (first_element);
++	      return;
++	    }
++	}
++      if (first_element->group_number != -1
++	  && merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	}
++      if (merge_first_element->group_number == first_element->group_number)
++	{
++	  visited[k] = true;
++	  res.safe_push (first_element);
++	}
++    }
++}
++
++/* Transpose and merge the grouped stores that are found from
++   vect_slp_grouped_store_find ().  */
++
++static stmt_vec_info
++vect_slp_grouped_store_transform (vec<stmt_vec_info> res)
++{
++  stmt_vec_info stmt_info = res[0];
++  if (res.length () == 1)
++    {
++      return stmt_info;
++    }
++  stmt_vec_info rearrange_first_element = stmt_info;
++  stmt_vec_info last_element = rearrange_first_element;
++
++  unsigned int size = DR_GROUP_SIZE (rearrange_first_element);
++  unsigned int new_group_size = size * res.length ();
++  for (unsigned int i = 1; i < res.length (); i++)
++    {
++      /* Store the gap from the previous member of the group.  If there is no
++	 gap in the access, DR_GROUP_GAP is always 1.  */
++      DR_GROUP_GAP_TRANS (res[i]) = DR_GROUP_GAP (res[i]);
++      DR_GROUP_GAP (res[i]) = 1;
++    }
++  while (!res.is_empty ())
++    {
++      stmt_info = res[0];
++      res.ordered_remove (0);
++      if (DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  res.safe_push (DR_GROUP_NEXT_ELEMENT (stmt_info));
++	}
++      DR_GROUP_FIRST_ELEMENT (stmt_info) = rearrange_first_element;
++      DR_GROUP_NEXT_ELEMENT (last_element) = stmt_info;
++      DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info);
++      DR_GROUP_SIZE (stmt_info) = new_group_size;
++      last_element = stmt_info;
++    }
++
++  DR_GROUP_SIZE (rearrange_first_element) = new_group_size;
++  DR_GROUP_SLP_TRANSPOSE (rearrange_first_element) = true;
++  DR_GROUP_NEXT_ELEMENT (last_element) = NULL;
++  return rearrange_first_element;
++}
++
++/* Save the STMT_INFO in the grouped stores to BB_VINFO_SCALAR_STORES for
++   transposing back grouped stores.  */
++
++static void
++get_scalar_stores (bb_vec_info bb_vinfo)
++{
++  unsigned int k = 0;
++  stmt_vec_info first_element = NULL;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      /* Filter the grouped store which is unnecessary for transposing.  */
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || first_element->group_number == -1)
++	{
++	  continue;
++	}
++      vec<stmt_vec_info> tmp_scalar_store;
++      tmp_scalar_store.create (DR_GROUP_SIZE (first_element));
++      for (stmt_vec_info stmt_info = first_element; stmt_info;
++	   stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  tmp_scalar_store.safe_push (stmt_info);
++	}
++      BB_VINFO_SCALAR_STORES (bb_vinfo).safe_push (tmp_scalar_store);
++    }
++}
++
++/* Transpose and merge the grouped stores which have the same group number.
++   For example, for grouped stores (opa_0, opa_1, opa_2, opa_3):
++     opa_0: a00->a01->a02->a03
++     opa_1: a10->a11->a12->a13
++     opa_2: a20->a21->a22->a23
++     opa_2: a30->a31->a32->a33
++   we can probably get the merged grouped store:
++     opa: a00->a10->a20->a30
++	->a01->a11->a21->a31
++	->a02->a12->a22->a32
++	->a03->a13->a23->a33.  */
++
++static bool
++vect_transform_slp_grouped_stores (bb_vec_info bb_vinfo)
++{
++  if (bb_vinfo->grouped_stores.length () <= 0)
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "The number of grouped stores is 0.\n");
++	}
++      return false;
++    }
++
++  bb_vinfo->grouped_stores.qsort (dr_group_cmp);
++  auto_vec<stmt_vec_info> grouped_stores_merge;
++  auto_vec<bool> visited (bb_vinfo->grouped_stores.length ());
++  unsigned int i = 0;
++  for (i = 0; i < bb_vinfo->grouped_stores.length (); i++)
++    {
++      visited.safe_push (false);
++    }
++
++  /* Get scalar stores for the following transposition recovery.  */
++  get_scalar_stores (bb_vinfo);
++
++  while (1)
++    {
++      /* Find grouped stores which are required to transpose and merge.  */
++      auto_vec<stmt_vec_info> res;
++      vect_slp_grouped_store_find (bb_vinfo, visited, res);
++      if (res.is_empty ())
++	{
++	  break;
++	}
++      /* Transpose and merge the required grouped stores into one group.  */
++      grouped_stores_merge.safe_push (vect_slp_grouped_store_transform (res));
++    }
++
++  BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
++  for (i = 0; i < grouped_stores_merge.length (); i++)
++    {
++      BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_merge[i]);
++    }
++
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Transposing grouped stores successfully.\n");
++    }
++  return true;
++}
++
++/* A helpful function of vect_transform_back_slp_grouped_stores ().  */
++
++static auto_vec<stmt_vec_info>
++vect_transform_back_slp_grouped_store (bb_vec_info bb_vinfo,
++				       stmt_vec_info first_stmt_info)
++{
++  auto_vec<stmt_vec_info> grouped_stores_split;
++  for (unsigned int i = 0; i < bb_vinfo->scalar_stores.length (); i++)
++    {
++      vec<stmt_vec_info> scalar_tmp = bb_vinfo->scalar_stores[i];
++      if (scalar_tmp.length () > 1
++	  && scalar_tmp[0]->group_number != first_stmt_info->group_number)
++	{
++	  continue;
++	}
++      stmt_vec_info cur_stmt_info = NULL;
++      stmt_vec_info cur_first_stmt_info = NULL;
++      stmt_vec_info last_stmt_info = NULL;
++      unsigned int k = 0;
++      FOR_EACH_VEC_ELT (scalar_tmp, k, cur_stmt_info)
++	{
++	  if (k == 0)
++	    {
++	      cur_first_stmt_info = cur_stmt_info;
++	      last_stmt_info = cur_stmt_info;
++	    }
++	  DR_GROUP_FIRST_ELEMENT (cur_stmt_info) = cur_first_stmt_info;
++	  DR_GROUP_NEXT_ELEMENT (last_stmt_info) = cur_stmt_info;
++	  last_stmt_info = cur_stmt_info;
++	}
++      DR_GROUP_SIZE (cur_first_stmt_info) = k;
++      DR_GROUP_NEXT_ELEMENT (last_stmt_info) = NULL;
++      if (first_stmt_info != cur_first_stmt_info)
++	{
++	  DR_GROUP_GAP (cur_first_stmt_info)
++		= DR_GROUP_GAP_TRANS (cur_first_stmt_info);
++	  DR_GROUP_SLP_TRANSPOSE (cur_first_stmt_info) = false;
++	  DR_GROUP_NUMBER (cur_first_stmt_info) = -1;
++	}
++      grouped_stores_split.safe_push (cur_first_stmt_info);
++    }
++  return grouped_stores_split;
++}
++
++/* Transform the grouped store back.  */
++
++void
++vect_transform_back_slp_grouped_stores (bb_vec_info bb_vinfo,
++					stmt_vec_info first_stmt_info)
++{
++  if (first_stmt_info->group_number == -1)
++    {
++      return;
++    }
++  /* Transform back.  */
++  auto_vec<stmt_vec_info> grouped_stores_split
++	= vect_transform_back_slp_grouped_store (bb_vinfo, first_stmt_info);
++
++  /* Add the remaining grouped stores to grouped_stores_split.  */
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element)
++    {
++      if (first_element->group_number != first_stmt_info->group_number)
++	{
++	  grouped_stores_split.safe_push (first_element);
++	}
++    }
++  DR_GROUP_SLP_TRANSPOSE (first_stmt_info) = false;
++  DR_GROUP_NUMBER (first_stmt_info) = -1;
++  BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
++  for (i = 0; i < grouped_stores_split.length (); i++)
++    {
++      BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_split[i]);
++    }
++}
++
++/* Function check_for_slp_vectype
++
++   Restriction for grouped stores by checking their vectype.
++   If the vectype of the grouped store is changed, it need transform back.
++   If all grouped stores need to be transformed back, return FALSE.  */
++
++static bool
++check_for_slp_vectype (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  int count = 0;
++  auto_vec<stmt_vec_info> grouped_stores_check;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element)
++    {
++      grouped_stores_check.safe_push (first_element);
++    }
++  FOR_EACH_VEC_ELT (grouped_stores_check, i, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element)
++	  && first_element->group_number != -1)
++	{
++	  unsigned int group_size_b
++			= DR_GROUP_SIZE_TRANS (first_element);
++	  tree vectype = STMT_VINFO_VECTYPE (first_element);
++	  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
++	  if (nunits.to_constant () > group_size_b)
++	    {
++	      count++;
++	      /* If the vectype is changed, this grouped store need
++		 to be transformed back.  */
++	      vect_transform_back_slp_grouped_stores (bb_vinfo, first_element);
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "No supported: only supported for"
++				   " group_size geq than nunits.\n");
++		}
++	    }
++	}
++    }
++  if (count == BB_VINFO_TRANS_GROUPS (bb_vinfo))
++    {
++      return false;
++    }
++  return true;
++}
++
++/* Function check_for_dr_alignment
++
++   Check the alignment of the slp instance loads.
++   Return FALSE if a load cannot be vectorized.  */
++
++static bool
++check_for_dr_alignment (slp_instance instance)
++{
++  slp_tree node = NULL;
++  unsigned int i = 0;
++  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
++    {
++      stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
++      dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
++      enum dr_alignment_support supportable_dr_alignment
++	= vect_supportable_dr_alignment (first_dr_info, false);
++      if (supportable_dr_alignment == dr_explicit_realign_optimized
++	  || supportable_dr_alignment == dr_explicit_realign)
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Initialize slp_transpose flag before transposing.  */
++
++static void
++init_stmt_info_slp_transpose (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info first_element = NULL;
++  unsigned int k = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element))
++	{
++	  DR_GROUP_SLP_TRANSPOSE (first_element) = false;
++	}
++    }
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, k, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element))
++	{
++	  DR_GROUP_SLP_TRANSPOSE (first_element) = false;
++	}
++    }
++}
++
++/* Analyze and transpose the stmts before building the SLP tree.  */
++
++static bool
++vect_analyze_transpose (bb_vec_info bb_vinfo)
++{
++  DUMP_VECT_SCOPE ("vect_analyze_transpose");
++
++  if (!vect_may_transpose (bb_vinfo))
++    {
++      return false;
++    }
++
++  /* For basic block SLP, try to merge the grouped stores and loads
++     into one group.  */
++  init_stmt_info_slp_transpose (bb_vinfo);
++  if (vect_transform_slp_grouped_stores (bb_vinfo)
++      && vect_merge_slp_grouped_loads (bb_vinfo))
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "Analysis succeeded with SLP transposed.\n");
++	}
++      return true;
++    }
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Analysis failed with SLP transposed.\n");
++    }
++  return false;
++}
+ 
+ /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
+    trees of packed scalar stmts if SLP is possible.  */
+@@ -3124,7 +4018,11 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
+ 
+   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
+ 
+-  if (dump_enabled_p ())
++  BB_VINFO_VEC_INSIDE_COST (bb_vinfo) = vec_inside_cost;
++  BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo) = vec_outside_cost;
++  BB_VINFO_SCALAR_COST (bb_vinfo) = scalar_cost;
++
++  if (!unlimited_cost_model (NULL) && dump_enabled_p ())
+     {
+       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
+       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
+@@ -3239,6 +4137,22 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
+ 
+   vect_pattern_recog (bb_vinfo);
+ 
++  /* Transpose grouped stores and loads for better vectorizable version.  */
++  if (bb_vinfo->transposed)
++    {
++      if (!vect_analyze_transpose (bb_vinfo))
++	{
++	  if (dump_enabled_p ())
++	    {
++	       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				"not vectorized: unhandled slp transposed in "
++				"basic block.\n");
++	    }
++	  return false;
++	}
++    }
++  bb_vinfo->before_slp = true;
++
+   /* Check the SLP opportunities in the basic block, analyze and build SLP
+      trees.  */
+   if (!vect_analyze_slp (bb_vinfo, n_stmts))
+@@ -3254,6 +4168,20 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
+       return false;
+     }
+ 
++  /* Check if the vectype is suitable for SLP transposed.  */
++  if (bb_vinfo->transposed && !check_for_slp_vectype (bb_vinfo))
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "Failed to SLP transposed in the basic block.\n");
++	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "not vectorized: vectype is not suitable for "
++			   "SLP transposed in basic block.\n");
++	}
++      return false;
++    }
++
+   vect_record_base_alignments (bb_vinfo);
+ 
+   /* Analyze and verify the alignment of data references and the
+@@ -3286,6 +4214,27 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
+   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
+     return false;
+ 
++  /* Check if the alignment is suitable for SLP transposed.  */
++  if (bb_vinfo->transposed)
++    {
++      for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); i++)
++	{
++	  if (!check_for_dr_alignment (instance))
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				   "Failed to SLP transposed in the basic "
++				   "block.\n");
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				   "not vectorized: alignment is not suitable "
++				   "for SLP transposed in basic block.\n");
++		}
++	      return false;
++	    }
++	}
++    }
++
+   if (!vect_slp_analyze_operations (bb_vinfo))
+     {
+       if (dump_enabled_p ())
+@@ -3311,6 +4260,83 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
+   return true;
+ }
+ 
++static bool
++may_new_transpose_bbvinfo (bb_vec_info bb_vinfo_ori, bool res_ori)
++{
++  /* If the flag is false or the slp analysis is broken before
++     vect_analyze_slp, we don't try to analyze the transposed SLP version.  */
++  if (!flag_tree_slp_transpose_vectorize
++      || !BB_VINFO_BEFORE_SLP (bb_vinfo_ori))
++    {
++      return false;
++    }
++
++  /* If the original bb_vinfo can't be vectorized, try to new a bb_vinfo
++     of the transposed version.  */
++  if (!res_ori)
++    {
++      return true;
++    }
++
++  /* Caculate the cost of the original bb_vinfo.  */
++  if (unlimited_cost_model (NULL))
++    {
++      vect_bb_vectorization_profitable_p (bb_vinfo_ori);
++    }
++  /* If the vec cost and scalar cost are not much difference (here we set the
++     threshold to 4), we try to new a bb_vinfo of the transposed version.  */
++  if (BB_VINFO_SCALAR_COST (bb_vinfo_ori)
++      < 4 * (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori)
++	     + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori)))
++    {
++      return true;
++    }
++  return false;
++}
++
++static bool
++may_choose_transpose_bbvinfo (bb_vec_info bb_vinfo_trans, bool res_trans,
++			     bb_vec_info bb_vinfo_ori, bool res_ori)
++{
++  /* The original bb_vinfo is chosen if the transposed bb_vinfo
++     can't be vectorized.  */
++  if (!res_trans)
++    {
++      return false;
++    }
++  /* Caculate the cost of the transposed bb_vinfo.  */
++  if (unlimited_cost_model (NULL))
++    {
++      vect_bb_vectorization_profitable_p (bb_vinfo_trans);
++    }
++  int diff_bb_cost = -1;
++  int diff_bb_cost_trans = -1;
++  if (res_ori)
++    {
++      diff_bb_cost = BB_VINFO_SCALAR_COST (bb_vinfo_ori)
++		     - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori)
++		     - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori);
++    }
++  if (res_trans)
++    {
++      diff_bb_cost_trans = BB_VINFO_SCALAR_COST (bb_vinfo_trans)
++			   - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans)
++			   - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans);
++    }
++  /* The original bb_vinfo is chosen when one of the following conditions
++     is satisfied as follows:
++	1) The cost of original version is better transposed version.
++	2) The vec cost is similar to scalar cost in the transposed version.  */
++  if ((res_ori && res_trans && diff_bb_cost >= diff_bb_cost_trans)
++      || (res_trans && BB_VINFO_SCALAR_COST (bb_vinfo_trans)
++		       <= (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans)
++			  + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans))))
++    {
++      return false;
++    }
++  return true;
++}
++
+ /* Subroutine of vect_slp_bb.  Try to vectorize the statements between
+    REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
+    on success.  The region has N_STMTS statements and has the datarefs
+@@ -3323,6 +4349,7 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
+ 		    unsigned int n_stmts)
+ {
+   bb_vec_info bb_vinfo;
++  bb_vec_info bb_vinfo_trans = NULL;
+   auto_vector_modes vector_modes;
+ 
+   /* Autodetect first vector size we try.  */
+@@ -3337,6 +4364,10 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
+     {
+       bool vectorized = false;
+       bool fatal = false;
++      bool res_bb_vinfo_ori = false;
++      bool res_bb_vinfo_trans = false;
++
++      /* New a bb_vinfo of the original version.  */
+       bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
+ 
+       bool first_time_p = shared.datarefs.is_empty ();
+@@ -3346,8 +4377,57 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
+       else
+ 	bb_vinfo->shared->check_datarefs ();
+       bb_vinfo->vector_mode = next_vector_mode;
++      bb_vinfo->transposed = false;
++      bb_vinfo->before_slp = false;
++
++      res_bb_vinfo_ori = vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal);
++      /* Analyze and new a transposed bb_vinfo.  */
++      if (may_new_transpose_bbvinfo (bb_vinfo, res_bb_vinfo_ori))
++	{
++	  bool fatal_trans = false;
++	  bb_vinfo_trans
++	    = new _bb_vec_info (region_begin, region_end, &shared);
++	  bool first_time_p = shared.datarefs.is_empty ();
++	  BB_VINFO_DATAREFS (bb_vinfo_trans) = datarefs;
++	  if (first_time_p)
++	    {
++	      bb_vinfo_trans->shared->save_datarefs ();
++	    }
++	  else
++	    {
++	      bb_vinfo_trans->shared->check_datarefs ();
++	    }
++	  bb_vinfo_trans->vector_mode = next_vector_mode;
++	  bb_vinfo_trans->transposed = true;
++	  bb_vinfo_trans->before_slp = false;
++
++	  res_bb_vinfo_trans
++	    = vect_slp_analyze_bb_1 (bb_vinfo_trans, n_stmts, fatal_trans);
++	  if (may_choose_transpose_bbvinfo (bb_vinfo_trans,
++					   res_bb_vinfo_trans,
++					   bb_vinfo, res_bb_vinfo_ori))
++	    {
++	      bb_vinfo = bb_vinfo_trans;
++	      fatal = fatal_trans;
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "Basic block part vectorized "
++				   "using transposed version.\n");
++		}
++	    }
++	  else
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "Basic block part vectorized "
++				   "using original version.\n");
++		}
++	    }
++	}
+ 
+-      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
++      if ((res_bb_vinfo_ori || res_bb_vinfo_trans)
+ 	  && dbg_cnt (vect_slp))
+ 	{
+ 	  if (dump_enabled_p ())
+@@ -3400,6 +4480,10 @@ vect_slp_bb_region (gimple_stmt_iterator region_begin,
+ 	  }
+ 
+       delete bb_vinfo;
++      if (bb_vinfo_trans)
++	{
++	  bb_vinfo_trans = NULL;
++	}
+ 
+       if (mode_i < vector_modes.length ()
+ 	  && VECTOR_MODE_P (autodetected_vector_mode)
+diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+index 6418edb52..b872cfc8d 100644
+--- a/gcc/tree-vect-stmts.c
++++ b/gcc/tree-vect-stmts.c
+@@ -7329,6 +7329,153 @@ vectorizable_scan_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   return true;
+ }
+ 
++/* Function vect_permute_store_chains
++
++   Call function vect_permute_store_chain ().
++   Given a chain of interleaved stores in DR_CHAIN, generate
++   interleave_high/low stmts to reorder the data correctly.
++   Return the final references for stores in RESULT_CHAIN.  */
++
++static void
++vect_permute_store_chains (vec<tree> dr_chain, unsigned int num_each,
++			   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
++			   vec<tree> *result_chain, unsigned int group)
++{
++  unsigned int k = 0;
++  unsigned int t = 0;
++
++  /* Divide vectors into GROUP parts.  And permute every NUM_EACH vectors
++     together.  */
++  for (k = 0; k < group; k++)
++    {
++      auto_vec<tree> dr_chain_transposed (num_each);
++      auto_vec<tree> result_chain_transposed (num_each);
++      for (t = k; t < dr_chain.length (); t = t + group)
++	{
++	  dr_chain_transposed.quick_push (dr_chain[t]);
++	}
++      vect_permute_store_chain (dr_chain_transposed, num_each, stmt_info,
++				gsi, &result_chain_transposed);
++      for (t = 0; t < num_each; t++)
++	{
++	  result_chain->quick_push (result_chain_transposed[t]);
++	}
++    }
++}
++
++/* Function transpose_oprnd_store
++
++    Calculate the transposed results from VEC_OPRNDS (VEC_STMT)
++    for vectorizable_store.  */
++
++static void
++transpose_oprnd_store (vec<tree>vec_oprnds, vec<tree> *result_chain,
++		       unsigned int vec_num, unsigned int const_nunits,
++		       unsigned int array_num, stmt_vec_info first_stmt_info,
++		       gimple_stmt_iterator *gsi)
++{
++  unsigned int group_for_transform = 0;
++  unsigned int num_each = 0;
++
++  /* Transpose back for vec_oprnds.  */
++  /* vec = {vec1, vec2, ...}  */
++  if (array_num < const_nunits
++      && const_nunits % array_num == 0)
++    {
++      vect_transpose_store_chain (vec_oprnds,
++				  vec_num, array_num,
++				  first_stmt_info,
++				  gsi, result_chain);
++    }
++   /* vec1 = {vec_part1}, vec2 = {vec_part2}, ...  */
++  else if (array_num >= const_nunits
++	   && array_num % const_nunits == 0)
++    {
++      group_for_transform = array_num / const_nunits;
++      num_each = vec_oprnds.length () / group_for_transform;
++      vect_permute_store_chains (vec_oprnds,
++				 num_each, first_stmt_info,
++				 gsi, result_chain,
++				 group_for_transform);
++    }
++  else
++    {
++      gcc_unreachable ();
++    }
++}
++
++static dr_vec_info *
++get_dr_info (stmt_vec_info stmt_info)
++{
++  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
++  if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
++    {
++      SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
++    }
++  return dr_info;
++}
++
++static unsigned
++dr_align_vect_store (dr_vec_info *cur_first_dr_info,
++		     unsigned HOST_WIDE_INT &align)
++{
++  unsigned misalign = 0;
++  align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info));
++  if (aligned_access_p (cur_first_dr_info))
++    {
++      return misalign;
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      align = dr_alignment (vect_dr_behavior (cur_first_dr_info));
++    }
++  else
++    {
++      misalign = DR_MISALIGNMENT (cur_first_dr_info);
++    }
++  return misalign;
++}
++
++static stmt_vec_info
++add_new_stmt_vect_store (tree vectype, tree dataref_ptr, tree dataref_offset,
++			 tree ref_type, dr_vec_info *cur_first_dr_info,
++			 tree vec_oprnd, gimple_stmt_iterator *gsi,
++			 stmt_vec_info stmt_info)
++{
++  /* Data align.  */
++  unsigned HOST_WIDE_INT align;
++  unsigned misalign = dr_align_vect_store (cur_first_dr_info, align);
++
++  if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME)
++    {
++      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
++    }
++
++  /* Get data_ref.  */
++  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
++  tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, offset);
++  if (aligned_access_p (cur_first_dr_info))
++    {
++      ;
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref),
++						 align * BITS_PER_UNIT);
++    }
++  else
++    {
++      tree elem_type = TREE_TYPE (vectype);
++      TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref),
++						 TYPE_ALIGN (elem_type));
++    }
++  /* Add new stmt.  */
++  vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr));
++  gassign *new_stmt = gimple_build_assign (data_ref, vec_oprnd);
++  stmt_vec_info new_stmt_info
++		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++  return new_stmt_info;
++}
+ 
+ /* Function vectorizable_store.
+ 
+@@ -8208,6 +8355,16 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
+ 					 &dataref_ptr, &vec_offset);
++	  /* If the stmt_info need to be transposed recovery, dataref_ptr
++	     will be caculated later.  */
++	  else if (memory_access_type == VMAT_CONTIGUOUS
++		   && is_a <bb_vec_info> (vinfo)
++		   && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++		   && DR_GROUP_SLP_TRANSPOSE (
++			DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      dataref_ptr = NULL_TREE;
++	    }
+ 	  else
+ 	    dataref_ptr
+ 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
+@@ -8299,6 +8456,75 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	}
+       else
+ 	{
++	  /* group_size: the size of group after transposing and merging.
++	     group_size_b: the size of group before transposing and merging,
++			 and only group_size_b >= const_nunits is supported.
++	     array_num: the number of arrays.
++	     const_nunits: TYPE_VECTOR_SUBPARTS (vectype).
++	     ncontinues: group_size_b / const_nunits, it means the number of
++			 times an array is stored in memory.  */
++	  if (slp && is_a <bb_vec_info> (vinfo)
++	      && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	      && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "vectorizable_store for slp transpose.\n");
++		}
++	      /* Transpose back for grouped stores.  */
++	      vect_transform_back_slp_grouped_stores (bb_vinfo,
++						      first_stmt_info);
++
++	      result_chain.create (vec_oprnds.length ());
++	      unsigned int const_nunits = nunits.to_constant ();
++	      unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info);
++	      unsigned int array_num = group_size / group_size_b;
++	      transpose_oprnd_store (vec_oprnds, &result_chain, vec_num,
++				     const_nunits, array_num,
++				     first_stmt_info, gsi);
++
++	      /* For every store group, not for every vec, because transposing
++	      and merging have changed the data reference access.  */
++	      gcc_assert (group_size_b >= const_nunits);
++	      unsigned int ncontinues = group_size_b / const_nunits;
++
++	      unsigned int k = 0;
++	      for (i = 0; i < array_num; i++)
++		{
++		  stmt_vec_info first_stmt_b;
++		  BB_VINFO_GROUPED_STORES (vinfo).iterate (i, &first_stmt_b);
++		  bool simd_lane_access_p
++			= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_b) != 0;
++		  tree ref_type = get_group_alias_ptr_type (first_stmt_b);
++		  dataref_ptr = vect_create_data_ref_ptr (
++				 first_stmt_b, aggr_type,
++				 simd_lane_access_p ? loop : NULL,
++				 offset, &dummy, gsi, &ptr_incr,
++				 simd_lane_access_p, NULL_TREE, bump);
++		  dr_vec_info *cur_first_dr_info = get_dr_info (first_stmt_b);
++		  for (unsigned int t = 0; t < ncontinues; t++)
++		    {
++		      vec_oprnd = result_chain[k];
++		      k++;
++		      if (t > 0)
++			{
++			  /* Bump the vector pointer.  */
++			  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr,
++							 gsi, first_stmt_b,
++							 bump);
++			}
++		      new_stmt_info = add_new_stmt_vect_store (
++					vectype, dataref_ptr, dataref_offset,
++					ref_type, cur_first_dr_info, vec_oprnd,
++					gsi, first_stmt_b);
++		    }
++		}
++	      oprnds.release ();
++	      result_chain.release ();
++	      vec_oprnds.release ();
++	      return true;
++	    }
+ 	  new_stmt_info = NULL;
+ 	  if (grouped_store)
+ 	    {
+@@ -8557,6 +8783,447 @@ hoist_defs_of_uses (stmt_vec_info stmt_info, class loop *loop)
+   return true;
+ }
+ 
++static tree
++calculate_new_type (tree vectype, unsigned int const_nunits,
++		    unsigned int group_size_b, unsigned int &nloads,
++		    unsigned int &ncontinues, tree &lvectype)
++{
++  tree ltype = TREE_TYPE (vectype);
++  /* nloads is the number of ARRAYs in a vector.
++     vectemp = {a[], b[], ...}  */
++  if (group_size_b < const_nunits)
++    {
++      tree ptype;
++      tree vtype
++	= vector_vector_composition_type (vectype,
++					  const_nunits / group_size_b,
++					  &ptype);
++      if (vtype != NULL_TREE)
++	{
++	  nloads = const_nunits / group_size_b;
++	  lvectype = vtype;
++	  ltype = ptype;
++	  ncontinues = 1;
++	}
++    }
++  /* ncontinues is the number of vectors from an ARRAY.
++     vectemp1 = {a[0], a[1], ...}
++     ...
++     vectempm = {a[k], a[k+1], ...}  */
++  else
++    {
++      nloads = 1;
++      ltype = vectype;
++      ncontinues = group_size_b / const_nunits;
++    }
++  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
++  return ltype;
++}
++
++static void
++generate_old_load_permutations (slp_tree slp_node, unsigned int group_size,
++				vec<unsigned> &old_load_permutation)
++{
++  /* Generate the old load permutations from the slp_node.  */
++  unsigned i = 0;
++  unsigned k = 0;
++
++  /* If SLP_NODE has load_permutation, we copy it to old_load_permutation.
++     Otherwise, we generate a permutation sequentially.  */
++  if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
++    {
++      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), i, k)
++	{
++	  old_load_permutation.safe_push (k);
++	}
++    }
++  else
++    {
++      for (unsigned i = 0; i < group_size; i++)
++	{
++	  old_load_permutation.safe_push (i);
++	}
++    }
++}
++
++static void
++generate_new_load_permutation_mapping (unsigned slp_node_length,
++				       vec<unsigned> &group_idx,
++				       const vec<unsigned> &load_permutation,
++				       unsigned int group_size_b,
++				       unsigned &new_group_size,
++				       vec<unsigned> &group_from)
++{
++  /* group_num_vec: only stores the group_loads IDs which are caculated from
++     load_permutation.  */
++  auto_vec<unsigned> group_num_vec;
++
++  /* Caculate which group_loads are the stmts in SLP_NODE from.  */
++  unsigned i = 0;
++  unsigned k = 0;
++  FOR_EACH_VEC_ELT (load_permutation, i, k)
++    {
++      unsigned int t0 = k / group_size_b;
++      if (!group_num_vec.contains (t0))
++	{
++	  group_num_vec.safe_push (t0);
++	}
++      group_from.safe_push (t0);
++    }
++  group_num_vec.qsort (cmp_for_group_num);
++  /* n_groups: the number of group_loads.  */
++  unsigned int n_groups = group_num_vec.length ();
++  new_group_size = n_groups * group_size_b;
++  for (i = 0; i < n_groups; i++)
++    {
++      group_idx.safe_push (group_num_vec[i] * group_size_b);
++    }
++  /* A new mapping from group_ind_vec to group_from.
++      For example:
++	Origin: group_from = {1,1,3,3,5,5,7,7};
++	After mapping: group_from = {0,0,1,1,2,2,2,2};  */
++  auto_vec<unsigned> group_ind_vec (n_groups);
++  for (k = 0; k < n_groups; k++)
++    {
++      group_ind_vec.safe_push (k);
++    }
++  for (i = 0; i < slp_node_length; i++)
++    {
++      for (k = 0; k < n_groups; k++)
++	{
++	  if (group_from[i] == group_num_vec[k])
++	    {
++	      group_from[i] = group_ind_vec[k];
++	      break;
++	    }
++	}
++    }
++}
++
++static void
++generate_new_load_permutation (vec<unsigned> &new_load_permutation,
++			       const vec<unsigned> &old_load_permutation,
++			       slp_tree slp_node, bool &this_load_permuted,
++			       const vec<unsigned> &group_from,
++			       unsigned int group_size_b)
++{
++  unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++  /* Generate the new load permutation from the new mapping.  */
++  new_load_permutation.create (slp_node_length);
++  unsigned i = 0;
++  unsigned k = 0;
++  FOR_EACH_VEC_ELT (old_load_permutation, i, k)
++    {
++      /* t1 is the new permutation of k in the old permutation.
++	 t1 = base_address + offset:
++	 base_address = group_from[i] * group_size_b;
++	 offset = k % group_size_b.  */
++      unsigned int t1
++	= group_from[i] * group_size_b + k % group_size_b;
++      new_load_permutation.safe_push (t1);
++      if (t1 != k)
++	{
++	  this_load_permuted = true;
++	}
++    }
++}
++
++static bool
++is_slp_perm (bool slp_perm, bool this_load_permuted, poly_uint64 nunits,
++	     unsigned int group_size, stmt_vec_info first_stmt_info)
++{
++  /* Calculate the unrolling factor based on the smallest type.  */
++  poly_uint64 unrolling_factor
++    = exact_div (common_multiple (nunits, group_size), group_size);
++  /* The load requires permutation when unrolling exposes
++     a gap either because the group is larger than the SLP
++     group-size or because there is a gap between the groups.  */
++  if (!slp_perm && !this_load_permuted
++      && (known_eq (unrolling_factor, 1U)
++	  || (group_size == DR_GROUP_SIZE (first_stmt_info)
++	      && DR_GROUP_GAP (first_stmt_info) == 0)))
++    {
++      return false;
++    }
++  else
++    {
++      return true;
++    }
++}
++
++static void
++generate_load_permutation (slp_tree slp_node, unsigned &new_group_size,
++			   unsigned int group_size, unsigned int group_size_b,
++			   bool &this_load_permuted, vec<unsigned> &group_idx,
++			   vec<unsigned> &new_load_permutation)
++{
++  /* Generate the old load permutations from SLP_NODE.  */
++  vec<unsigned> old_load_permutation;
++  old_load_permutation.create (group_size);
++  generate_old_load_permutations (slp_node, group_size, old_load_permutation);
++
++  /* Caculate which group_loads are the stmts in SLP_NODE from.  */
++  unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++  /* group_from: stores the group_loads ID for every stmt in SLP_NODE.  */
++  vec<unsigned> group_from;
++  group_from.create (slp_node_length);
++  generate_new_load_permutation_mapping (slp_node_length, group_idx,
++					 old_load_permutation,
++					 group_size_b, new_group_size,
++					 group_from);
++
++  /* Generate the new load permutation from the new mapping and caculate
++     this_load_permuted flag.  If this_load_permuted is true, we need execute
++     slp permutation by using new load permutation.  */
++  generate_new_load_permutation (new_load_permutation, old_load_permutation,
++				 slp_node, this_load_permuted, group_from,
++				 group_size_b);
++  old_load_permutation.release ();
++  group_from.release ();
++}
++
++static unsigned int
++dr_align_vect_load (dr_vec_info *cur_first_dr_info,
++		    unsigned HOST_WIDE_INT &align,
++		    enum dr_alignment_support alignment_support_scheme)
++{
++  unsigned int misalign = 0;
++
++  align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info));
++  if (alignment_support_scheme == dr_aligned)
++    {
++      gcc_assert (aligned_access_p (cur_first_dr_info));
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      align = dr_alignment (vect_dr_behavior (cur_first_dr_info));
++    }
++  else
++    {
++      misalign = DR_MISALIGNMENT (cur_first_dr_info);
++    }
++  return misalign;
++}
++
++static stmt_vec_info
++add_new_stmt_vect_load (tree vectype, tree dataref_ptr, tree dataref_offset,
++			tree ref_type, tree ltype, gassign *(&new_stmt),
++			dr_vec_info *cur_first_dr_info,
++			gimple_stmt_iterator *gsi, stmt_vec_info stmt_info)
++{
++  /* Data align.  */
++  enum dr_alignment_support alignment_support_scheme
++	= vect_supportable_dr_alignment (cur_first_dr_info, false);
++  unsigned HOST_WIDE_INT align;
++  unsigned int misalign = dr_align_vect_load (cur_first_dr_info, align,
++					      alignment_support_scheme);
++  if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME)
++    {
++      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
++    }
++
++  /* Get data_ref.  */
++  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
++  tree data_ref = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
++  if (alignment_support_scheme == dr_aligned)
++    {
++      ;
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      TREE_TYPE (data_ref)
++	= build_aligned_type (TREE_TYPE (data_ref), align * BITS_PER_UNIT);
++    }
++  else
++    {
++      tree elem_type = TREE_TYPE (vectype);
++      TREE_TYPE (data_ref)
++	= build_aligned_type (TREE_TYPE (data_ref), TYPE_ALIGN (elem_type));
++    }
++
++  /* Add new stmt.  */
++  vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr));
++  new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
++  stmt_vec_info new_stmt_info
++	= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++  return new_stmt_info;
++}
++
++static void
++push_new_stmt_to_dr_chain (bool slp_perm, stmt_vec_info new_stmt_info,
++			   vec<tree> &dr_chain, slp_tree slp_node)
++{
++    if (slp_perm)
++      {
++	dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
++      }
++    else
++      {
++	SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
++      }
++}
++
++static stmt_vec_info
++get_first_stmt_info_before_transpose (stmt_vec_info first_stmt_info,
++				      unsigned int group_el,
++				      unsigned int group_size)
++{
++  stmt_vec_info last_stmt_info = first_stmt_info;
++  unsigned int count = 0;
++  gcc_assert (group_el < group_size);
++  while (count < group_el)
++    {
++      last_stmt_info = DR_GROUP_NEXT_ELEMENT (last_stmt_info);
++      count++;
++    }
++  return last_stmt_info;
++}
++
++static stmt_vec_info
++add_new_stmt_for_nloads_greater_than_one (tree lvectype, tree vectype,
++					  vec<constructor_elt, va_gc> *v,
++					  stmt_vec_info stmt_info,
++					  gimple_stmt_iterator *gsi)
++{
++  tree vec_inv = build_constructor (lvectype, v);
++  tree new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
++  vec_info *vinfo = stmt_info->vinfo;
++  stmt_vec_info new_stmt_info = vinfo->lookup_def (new_temp);
++  if (lvectype != vectype)
++    {
++      gassign *new_stmt = gimple_build_assign (make_ssa_name (vectype),
++					       VIEW_CONVERT_EXPR,
++					       build1 (VIEW_CONVERT_EXPR,
++						       vectype, new_temp));
++      new_stmt_info = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++    }
++  return new_stmt_info;
++}
++
++/* Function new_vect_stmt_for_nloads.
++
++   New a VEC_STMT when nloads Arrays are merged into a vector.
++
++   ncopies is the number of vectors that need to be loaded from memmory.
++   nloads is the number of ARRAYs in a vector.
++   vectemp = {a[], b[], ...}  */
++
++static void
++new_vect_stmt_for_nloads (unsigned int ncopies, unsigned int nloads,
++			  vec<unsigned> group_idx, stmt_vec_info stmt_info,
++			  offset_info *offset_info, vectype_info *vectype_info,
++			  vect_memory_access_type memory_access_type,
++			  bool slp_perm, vec<tree>& dr_chain, slp_tree slp_node,
++			  gimple_stmt_iterator *gsi)
++{
++  vec<constructor_elt, va_gc> *v = NULL;
++  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
++  unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
++  stmt_vec_info first_stmt_info_b = NULL;
++  stmt_vec_info new_stmt_info = NULL;
++  tree dataref_ptr = NULL_TREE;
++  tree dummy;
++  gimple *ptr_incr = NULL;
++  unsigned int n = 0;
++  for (unsigned int i = 0; i < ncopies; i++)
++    {
++      vec_alloc (v, nloads);
++      for (unsigned int t = 0; t < nloads; t++)
++	{
++	  first_stmt_info_b = get_first_stmt_info_before_transpose (
++				first_stmt_info, group_idx[n++], group_size);
++	  dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b);
++	  tree bump = vect_get_data_ptr_increment (cur_first_dr_info,
++						   vectype_info->ltype,
++						   memory_access_type);
++	  bool simd_lane_access_p
++		= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0;
++
++	  /* Create dataref_ptr which is point to init_address.  */
++	  dataref_ptr = vect_create_data_ref_ptr (
++			 first_stmt_info_b, vectype_info->ltype, NULL,
++			 offset_info->offset, &dummy, gsi, &ptr_incr,
++			 simd_lane_access_p, offset_info->byte_offset, bump);
++
++	  gassign *new_stmt = NULL;
++	  new_stmt_info = add_new_stmt_vect_load (
++			   vectype_info->vectype, dataref_ptr,
++			   offset_info->dataref_offset, vectype_info->ref_type,
++			   vectype_info->ltype, new_stmt, cur_first_dr_info,
++			   gsi, first_stmt_info_b);
++
++	  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, gimple_assign_lhs (new_stmt));
++	}
++	new_stmt_info = add_new_stmt_for_nloads_greater_than_one (
++				 vectype_info->lvectype, vectype_info->vectype,
++				 v, first_stmt_info_b, gsi);
++	push_new_stmt_to_dr_chain (slp_perm, new_stmt_info,
++				   dr_chain, slp_node);
++    }
++}
++
++/* Function new_vect_stmt_for_ncontinues.
++
++   New a VEC_STMTs when an Array is divided into several vectors.
++
++   n_groups is the number of ARRAYs.
++   ncontinues is the number of vectors from an ARRAY.
++   vectemp1 = {a[0], a[1], ...}
++   ...
++   vectempm = {a[k], a[k+1], ...}  */
++
++static void
++new_vect_stmt_for_ncontinues (unsigned int ncontinues, vec<unsigned> group_idx,
++			      stmt_vec_info stmt_info, offset_info* offset_info,
++			      vectype_info* vectype_info,
++			      vect_memory_access_type memory_access_type,
++			      bool slp_perm, vec<tree>& dr_chain,
++			      slp_tree slp_node,
++			      gimple_stmt_iterator *gsi)
++{
++  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
++  unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
++  stmt_vec_info new_stmt_info = NULL;
++  tree dataref_ptr = NULL_TREE;
++  tree dummy;
++  gimple *ptr_incr = NULL;
++  unsigned int n_groups = group_idx.length ();
++  for (unsigned int i = 0; i < n_groups; i++)
++    {
++      stmt_vec_info first_stmt_info_b = get_first_stmt_info_before_transpose (
++				first_stmt_info, group_idx[i], group_size);
++      dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b);
++      tree bump = vect_get_data_ptr_increment (cur_first_dr_info,
++			vectype_info->ltype, memory_access_type);
++      bool simd_lane_access_p
++		= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0;
++      for (unsigned int k = 0; k < ncontinues; k++)
++	{
++	  /* Create dataref_ptr which is point to init_address.  */
++	  if (k == 0)
++	    {
++	      dataref_ptr = vect_create_data_ref_ptr (
++			 first_stmt_info_b, vectype_info->ltype, NULL,
++			 offset_info->offset, &dummy, gsi, &ptr_incr,
++			 simd_lane_access_p, offset_info->byte_offset, bump);
++	    }
++	  else
++	    {
++	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr,
++					  gsi, first_stmt_info_b, bump);
++	    }
++	  gassign *new_stmt = NULL;
++	  new_stmt_info = add_new_stmt_vect_load (
++			   vectype_info->vectype, dataref_ptr,
++			   offset_info->dataref_offset, vectype_info->ref_type,
++			   vectype_info->ltype, new_stmt, cur_first_dr_info,
++			   gsi, first_stmt_info_b);
++	  push_new_stmt_to_dr_chain (slp_perm, new_stmt_info,
++				     dr_chain, slp_node);
++	}
++    }
++}
++
+ /* vectorizable_load.
+ 
+    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
+@@ -9364,6 +10031,9 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   tree vec_mask = NULL_TREE;
+   prev_stmt_info = NULL;
+   poly_uint64 group_elt = 0;
++  unsigned new_group_size = 0;
++  vec<unsigned> new_load_permutation;
++
+   for (j = 0; j < ncopies; j++)
+     {
+       stmt_vec_info new_stmt_info = NULL;
+@@ -9385,6 +10055,15 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+ 	      dataref_offset = build_int_cst (ref_type, 0);
+ 	    }
++	  /* If the stmt_info need to be transposed recovery, dataref_ptr
++	     will be caculated later.  */
++	  else if (slp && is_a <bb_vec_info> (vinfo)
++		   && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++		   && DR_GROUP_SLP_TRANSPOSE (
++			DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      dataref_ptr = NULL_TREE;
++	    }
+ 	  else if (diff_first_stmt_info)
+ 	    {
+ 	      dataref_ptr
+@@ -9501,6 +10180,63 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  /* Record that VEC_ARRAY is now dead.  */
+ 	  vect_clobber_variable (stmt_info, gsi, vec_array);
+ 	}
++      else if (slp && is_a <bb_vec_info> (vinfo)
++	       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	       && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "vectorizable_load for slp transpose.\n");
++	    }
++	  /* group_size: the size of group after merging.
++	     group_size_b: the size of group before merging.
++	     const_nunits: TYPE_VECTOR_SUBPARTS (vectype), it is the number of
++		elements in a vector.
++	     nloads: const_nunits / group_size_b or 1, it means the number
++		of ARRAYs in a vector.
++	     ncontinues: group_size_b / const_nunits or 1, it means the number
++		of vectors from an ARRAY.  */
++	  unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info);
++	  unsigned int const_nunits = nunits.to_constant ();
++	  unsigned int nloads = const_nunits;
++	  unsigned int ncontinues = group_size_b;
++	  tree lvectype = vectype;
++	  tree ltype = calculate_new_type (vectype, const_nunits,
++					   group_size_b, nloads,
++					   ncontinues, lvectype);
++	  bool this_load_permuted = false;
++	  auto_vec<unsigned> group_idx;
++	  generate_load_permutation (slp_node, new_group_size, group_size,
++				     group_size_b, this_load_permuted,
++				     group_idx, new_load_permutation);
++	  slp_perm = is_slp_perm (slp_perm, this_load_permuted, nunits,
++  			  	  group_size, first_stmt_info);
++
++	  /* ncopies: the number of vectors that need to be loaded from
++		 memmory.  */
++	  unsigned int ncopies = new_group_size / const_nunits;
++	  offset_info offset_info = {offset, byte_offset, dataref_offset};
++	  vectype_info vectype_info = {vectype, ltype, lvectype, ref_type};
++	  if (slp_perm)
++	    {
++	       dr_chain.create (ncopies);
++	    }
++	  if (nloads > 1 && ncontinues == 1)
++	    {
++	      new_vect_stmt_for_nloads (ncopies, nloads, group_idx, stmt_info,
++					&offset_info, &vectype_info,
++					memory_access_type, slp_perm, dr_chain,
++					slp_node, gsi);
++	    }
++	  else
++	    {
++	      new_vect_stmt_for_ncontinues (ncontinues, group_idx, stmt_info,
++					    &offset_info, &vectype_info,
++					    memory_access_type, slp_perm,
++					    dr_chain, slp_node, gsi);
++	    }
++	}
+       else
+ 	{
+ 	  for (i = 0; i < vec_num; i++)
+@@ -9840,7 +10576,32 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       if (slp && !slp_perm)
+ 	continue;
+ 
+-      if (slp_perm)
++      /* Using the new load permutation to generate vector permute statements
++	 from a list of loads in DR_CHAIN.  */
++      if (slp && slp_perm && is_a <bb_vec_info> (vinfo)
++	  && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	  && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	{
++	  unsigned n_perms;
++	  stmt_vec_info stmt_info_ = SLP_TREE_SCALAR_STMTS (slp_node)[0];
++	  unsigned int old_size = DR_GROUP_SIZE (stmt_info);
++	  DR_GROUP_SIZE (stmt_info_) = new_group_size;
++	  vec<unsigned> old_load_permutation
++			  = SLP_TREE_LOAD_PERMUTATION (slp_node);
++	  SLP_TREE_LOAD_PERMUTATION (slp_node) = new_load_permutation;
++	  bool perm_load_success = vect_transform_slp_perm_load (
++				     slp_node, dr_chain, gsi, vf,
++				     slp_node_instance, false, &n_perms);
++	  DR_GROUP_SIZE (stmt_info_) = old_size;
++	  SLP_TREE_LOAD_PERMUTATION (slp_node) = old_load_permutation;
++	  new_load_permutation.release ();
++	  if (!perm_load_success)
++	    {
++	      dr_chain.release ();
++	      return false;
++	    }
++	}
++      else if (slp_perm)
+         {
+ 	  unsigned n_perms;
+           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
+diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+index f7becb34a..1c4a6c421 100644
+--- a/gcc/tree-vectorizer.h
++++ b/gcc/tree-vectorizer.h
+@@ -297,6 +297,21 @@ public:
+   vec<ddr_p> ddrs;
+ };
+ 
++/* Information about offset in vectorizable_load.  */
++struct offset_info {
++  tree offset;
++  tree byte_offset;
++  tree dataref_offset;
++};
++
++/* Information about vectype in vectorizable_load.  */
++struct vectype_info {
++  tree vectype;
++  tree ltype;
++  tree lvectype;
++  tree ref_type;
++};
++
+ /* Vectorizer state common between loop and basic-block vectorization.  */
+ class vec_info {
+ public:
+@@ -335,6 +350,14 @@ public:
+      stmt in the chain.  */
+   auto_vec<stmt_vec_info> grouped_stores;
+ 
++  /* All interleaving chains of loads, represented by the first
++     stmt in the chain.  */
++  auto_vec<stmt_vec_info> grouped_loads;
++
++  /* All interleaving chains of stores (before transposed), represented by all
++     stmt in the chain.  */
++  auto_vec<vec<stmt_vec_info> > scalar_stores;
++
+   /* Cost data used by the target cost model.  */
+   void *target_cost_data;
+ 
+@@ -702,6 +725,8 @@ public:
+ #define LOOP_VINFO_CHECK_NONZERO(L)        (L)->check_nonzero
+ #define LOOP_VINFO_LOWER_BOUNDS(L)         (L)->lower_bounds
+ #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
++#define LOOP_VINFO_GROUPED_LOADS(L)	    (L)->grouped_loads
++#define LOOP_VINFO_SCALAR_STORES(L)	    (L)->scalar_stores
+ #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
+ #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
+ #define LOOP_VINFO_REDUCTIONS(L)           (L)->reductions
+@@ -764,6 +789,25 @@ public:
+   basic_block bb;
+   gimple_stmt_iterator region_begin;
+   gimple_stmt_iterator region_end;
++
++  /* True, if bb_vinfo can goto vect_analyze_slp.  */
++  bool before_slp;
++
++  /* True, if bb_vinfo is a transposed version.  */
++  bool transposed;
++
++  /* The number of transposed groups.  */
++  int transposed_group;
++
++  /* The cost of the scalar iterations.  */
++  int scalar_cost;
++
++  /* The cost of the vector prologue and epilogue, including peeled
++     iterations and set-up code.  */
++  int vec_outside_cost;
++
++  /* The cost of the vector loop body.  */
++  int vec_inside_cost;
+ } *bb_vec_info;
+ 
+ #define BB_VINFO_BB(B)               (B)->bb
+@@ -772,6 +816,14 @@ public:
+ #define BB_VINFO_DATAREFS(B)         (B)->shared->datarefs
+ #define BB_VINFO_DDRS(B)             (B)->shared->ddrs
+ #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data
++#define BB_VINFO_GROUPED_LOADS(B)    (B)->grouped_loads
++#define BB_VINFO_SCALAR_STORES(B)    (B)->scalar_stores
++#define BB_VINFO_VEC_OUTSIDE_COST(B) (B)->vec_outside_cost
++#define BB_VINFO_VEC_INSIDE_COST(B)  (B)->vec_inside_cost
++#define BB_VINFO_SCALAR_COST(B)      (B)->scalar_cost
++#define BB_VINFO_SLP_TRANSPOSED(B)   (B)->transposed
++#define BB_VINFO_BEFORE_SLP(B)       (B)->before_slp
++#define BB_VINFO_TRANS_GROUPS(B)     (B)->transposed_group
+ 
+ static inline bb_vec_info
+ vec_info_for_bb (basic_block bb)
+@@ -1012,6 +1064,17 @@ public:
+   stmt_vec_info next_element;
+   /* The size of the group.  */
+   unsigned int size;
++
++  /* The size of the group before transposed.  */
++  unsigned int size_before_transpose;
++
++  /* If true, the stmt_info is slp transposed.  */
++  bool slp_transpose;
++
++  /* Mark the group store number for rebuild interleaving chain
++     during transpose phase.  Value -1 represents unable to transpose.  */
++  int group_number;
++
+   /* For stores, number of stores from this group seen. We vectorize the last
+      one.  */
+   unsigned int store_count;
+@@ -1019,6 +1082,9 @@ public:
+      is 1.  */
+   unsigned int gap;
+ 
++  /* The gap before transposed.  */
++  unsigned int gap_before_transpose;
++
+   /* The minimum negative dependence distance this stmt participates in
+      or zero if none.  */
+   unsigned int min_neg_dist;
+@@ -1217,6 +1283,12 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
+ #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
+ #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
+ 
++#define DR_GROUP_SLP_TRANSPOSE(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->slp_transpose)
++#define DR_GROUP_SIZE_TRANS(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->size_before_transpose)
++#define DR_GROUP_NUMBER(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->group_number)
+ #define DR_GROUP_FIRST_ELEMENT(S) \
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
+ #define DR_GROUP_NEXT_ELEMENT(S) \
+@@ -1227,6 +1299,8 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
+ #define DR_GROUP_GAP(S) \
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
++#define DR_GROUP_GAP_TRANS(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap_before_transpose)
+ 
+ #define REDUC_GROUP_FIRST_ELEMENT(S) \
+   (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element)
+@@ -1624,6 +1698,17 @@ vect_get_scalar_dr_size (dr_vec_info *dr_info)
+   return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
+ }
+ 
++/* Compare two unsigned int A and B.
++   Sorting them in ascending order.  */
++
++static inline int
++cmp_for_group_num (const void *a_, const void *b_)
++{
++  unsigned int a = *(unsigned int *)const_cast<void *>(a_);
++  unsigned int b = *(unsigned int *)const_cast<void *>(b_);
++  return a < b ? -1 : 1;
++}
++
+ /* Return true if LOOP_VINFO requires a runtime check for whether the
+    vector loop is profitable.  */
+ 
+@@ -1787,6 +1872,9 @@ extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
+ extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
+ extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info,
+                                     gimple_stmt_iterator *, vec<tree> *);
++extern void vect_transpose_store_chain (vec<tree>, unsigned int, unsigned int,
++					stmt_vec_info, gimple_stmt_iterator *,
++					vec<tree> *);
+ extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *,
+ 				    tree *, enum dr_alignment_support, tree,
+ 	                            class loop **);
+@@ -1849,6 +1937,7 @@ extern void vect_free_slp_instance (slp_instance, bool);
+ extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
+ 					  gimple_stmt_iterator *, poly_uint64,
+ 					  slp_instance, bool, unsigned *);
++extern void vect_transform_back_slp_grouped_stores (bb_vec_info, stmt_vec_info);
+ extern bool vect_slp_analyze_operations (vec_info *);
+ extern void vect_schedule_slp (vec_info *);
+ extern opt_result vect_analyze_slp (vec_info *, unsigned);
+-- 
+2.27.0.windows.1
+
diff --git a/0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch b/0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch
new file mode 100644
index 0000000000000000000000000000000000000000..4a3da7836de06a97f866758891d1718eb81f8fe0
--- /dev/null
+++ b/0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch
@@ -0,0 +1,1982 @@
+From 8072fe107c04778de78db90bf6fdb7baf474e24a Mon Sep 17 00:00:00 2001
+From: dingguangya <dingguangya1@huawei.com>
+Date: Thu, 2 Jun 2022 12:48:17 +0800
+Subject: [PATCH 12/12] [ArrayWidenCompare] Add a new optimization for array
+ comparison scenarios
+
+Add option farray-widen-compare.
+For an array pointer whose element is a single-byte type,
+by changing the pointer type to a long-byte type, the elements
+can be combined and compared after loading.
+---
+ gcc/Makefile.in                               |    1 +
+ gcc/common.opt                                |    5 +
+ gcc/doc/invoke.texi                           |   13 +-
+ gcc/passes.def                                |    1 +
+ .../gcc.dg/tree-ssa/awiden-compare-1.c        |   19 +
+ .../gcc.dg/tree-ssa/awiden-compare-2.c        |   90 +
+ .../gcc.dg/tree-ssa/awiden-compare-3.c        |   22 +
+ .../gcc.dg/tree-ssa/awiden-compare-4.c        |   22 +
+ .../gcc.dg/tree-ssa/awiden-compare-5.c        |   19 +
+ .../gcc.dg/tree-ssa/awiden-compare-6.c        |   19 +
+ .../gcc.dg/tree-ssa/awiden-compare-7.c        |   22 +
+ .../gcc.dg/tree-ssa/awiden-compare-8.c        |   24 +
+ gcc/timevar.def                               |    1 +
+ gcc/tree-pass.h                               |    1 +
+ gcc/tree-ssa-loop-array-widen-compare.c       | 1555 +++++++++++++++++
+ 15 files changed, 1813 insertions(+), 1 deletion(-)
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
+ create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
+ create mode 100644 gcc/tree-ssa-loop-array-widen-compare.c
+
+diff --git a/gcc/Makefile.in b/gcc/Makefile.in
+index 23394c64b..2b2bf474a 100644
+--- a/gcc/Makefile.in
++++ b/gcc/Makefile.in
+@@ -1591,6 +1591,7 @@ OBJS = \
+ 	tree-ssa-loop-ivopts.o \
+ 	tree-ssa-loop-manip.o \
+ 	tree-ssa-loop-niter.o \
++	tree-ssa-loop-array-widen-compare.o \
+ 	tree-ssa-loop-prefetch.o \
+ 	tree-ssa-loop-split.o \
+ 	tree-ssa-loop-unswitch.o \
+diff --git a/gcc/common.opt b/gcc/common.opt
+index 24834cf60..2985a5791 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1060,6 +1060,11 @@ fasynchronous-unwind-tables
+ Common Report Var(flag_asynchronous_unwind_tables) Optimization
+ Generate unwind tables that are exact at each instruction boundary.
+ 
++farray-widen-compare
++Common Report Var(flag_array_widen_compare) Optimization
++Extends types for pointers to arrays to improve array comparsion performance.
++In some extreme situations this may result in unsafe behavior.
++
+ fauto-inc-dec
+ Common Report Var(flag_auto_inc_dec) Init(1) Optimization
+ Generate auto-inc/dec instructions.
+diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+index 4b0fd2ffb..44f1f8a2e 100644
+--- a/gcc/doc/invoke.texi
++++ b/gcc/doc/invoke.texi
+@@ -459,7 +459,7 @@ Objective-C and Objective-C++ Dialects}.
+ -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol
+ -fno-allocation-dce -fallow-store-data-races @gol
+ -fassociative-math  -fauto-profile  -fauto-profile[=@var{path}] @gol
+--fauto-inc-dec  -fbranch-probabilities @gol
++-farray-widen-compare -fauto-inc-dec  -fbranch-probabilities @gol
+ -fcaller-saves @gol
+ -fcombine-stack-adjustments  -fconserve-stack @gol
+ -fcompare-elim  -fcprop-registers  -fcrossjumping @gol
+@@ -9710,6 +9710,17 @@ This pass is always skipped on architectures that do not have
+ instructions to support this.  Enabled by default at @option{-O} and
+ higher on architectures that support this.
+ 
++@item -farray-widen-compare
++@opindex farray-widen-compare
++In the narrow-byte array comparison scenario, the types of pointers
++pointing to array are extended so that elements of multiple bytes can
++be loaded at a time when a wide type is used to dereference an array,
++thereby improving the performance of this comparison scenario.  In some
++extreme situations this may result in unsafe behavior.
++
++This option may generate better or worse code; results are highly dependent
++on the structure of loops within the source code.
++
+ @item -fdce
+ @opindex fdce
+ Perform dead code elimination (DCE) on RTL@.
+diff --git a/gcc/passes.def b/gcc/passes.def
+index e9c91d26e..797b803ca 100644
+--- a/gcc/passes.def
++++ b/gcc/passes.def
+@@ -91,6 +91,7 @@ along with GCC; see the file COPYING3.  If not see
+           NEXT_PASS (pass_dse);
+ 	  NEXT_PASS (pass_cd_dce);
+ 	  NEXT_PASS (pass_phiopt, true /* early_p */);
++	  NEXT_PASS (pass_array_widen_compare);
+ 	  NEXT_PASS (pass_tail_recursion);
+ 	  NEXT_PASS (pass_convert_switch);
+ 	  NEXT_PASS (pass_cleanup_eh);
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
+new file mode 100644
+index 000000000..27b69b0e9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-1.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (++len != len_limit)
++        if (pb[len] != cur[len])
++            break;
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
+new file mode 100644
+index 000000000..d102364f2
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-2.c
+@@ -0,0 +1,90 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define EMPTY_HASH_VALUE 0
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++#define true 1
++
++typedef struct {
++    uint32_t len;
++    uint32_t dist;
++} lzma_match;
++
++
++lzma_match *
++func (
++        const uint32_t len_limit,
++        const uint32_t pos,
++        const uint8_t *const cur,
++        uint32_t cur_match,
++        uint32_t depth,
++        uint32_t *const son,
++        const uint32_t cyclic_pos,
++        const uint32_t cyclic_size,
++        lzma_match *matches,
++        uint32_t len_best)
++{
++    uint32_t *ptr0 = son + (cyclic_pos << 1) + 1;
++    uint32_t *ptr1 = son + (cyclic_pos << 1);
++
++    uint32_t len0 = 0;
++    uint32_t len1 = 0;
++
++    while (true)
++    {
++        const uint32_t delta = pos - cur_match;
++        if (depth-- == 0 || delta >= cyclic_size)
++        {
++            *ptr0 = EMPTY_HASH_VALUE;
++            *ptr1 = EMPTY_HASH_VALUE;
++            return matches;
++        }
++
++        uint32_t *const pair = son + ((cyclic_pos - delta + (delta > cyclic_pos ? cyclic_size : 0)) << 1);
++
++        const uint8_t *const pb = cur -delta;
++        uint32_t len = my_min(len0, len1);
++
++        if (pb[len] == cur[len])
++        {
++            while (++len != len_limit)
++                if (pb[len] != cur[len])
++                    break;
++
++            if (len_best < len)
++            {
++                len_best = len;
++                matches->len = len;
++                matches->dist = delta - 1;
++                ++matches;
++
++                if (len == len_limit)
++                {
++                    *ptr1 = pair[0];
++                    *ptr0 = pair[1];
++                    return matches;
++                }
++            }
++        }
++
++        if (pb[len] < cur[len])
++        {
++            *ptr1 = cur_match;
++            ptr1 = pair + 1;
++            cur_match = *ptr1;
++            len1 = len;
++        }
++        else
++        {
++            *ptr0 = cur_match;
++            ptr0 = pair;
++            cur_match = *ptr0;
++            len0 = len;
++        }
++    }
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
+new file mode 100644
+index 000000000..52dd6b02b
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-3.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (len != len_limit)
++    {
++        if (pb[len] != cur[len])
++            break;
++        len = len + 1;
++    }
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 1 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
+new file mode 100644
+index 000000000..d3185d326
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-4.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (len != len_limit)
++    {
++        if (pb[len] != cur[len])
++            break;
++        len = len + 2;
++    }
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
+new file mode 100644
+index 000000000..9743dc623
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-5.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (++len != len_limit)
++        if (pb[len] != cur[len-1])
++            break;
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
+new file mode 100644
+index 000000000..2323d5bf7
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-6.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (len++ != len_limit)
++        if (pb[len] != cur[len])
++            break;
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
+new file mode 100644
+index 000000000..33db62fa4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-7.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (len != len_limit)
++    {
++        len = len + 1;
++        if (pb[len] != cur[len])
++            break;
++    }
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
+new file mode 100644
+index 000000000..8c96d24a1
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/awiden-compare-8.c
+@@ -0,0 +1,24 @@
++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */
++/* { dg-options "-O3 -mabi=lp64 -farray-widen-compare -fdump-tree-awiden_compare-details" } */
++
++#include <stdint.h>
++#include <stdio.h>
++
++#define my_min(x, y) ((x) < (y) ? (x) : (y))
++
++uint32_t
++func (uint32_t len0, uint32_t len1, const uint32_t len_limit, const uint8_t *const pb, const uint8_t *const cur)
++{
++   uint32_t len = my_min(len0, len1);
++    while (++len != len_limit)
++    {
++        if (pb[len] != cur[len])
++        {
++            len = len - 1;
++            break;
++        }
++    }
++    return len;
++}
++
++/* { dg-final { scan-tree-dump-times "loop form is success" 0 "awiden_compare"} } */
+diff --git a/gcc/timevar.def b/gcc/timevar.def
+index e873747a8..6d90bb6e1 100644
+--- a/gcc/timevar.def
++++ b/gcc/timevar.def
+@@ -215,6 +215,7 @@ DEFTIMEVAR (TV_TREE_NRV		     , "tree NRV optimization")
+ DEFTIMEVAR (TV_TREE_COPY_RENAME	     , "tree rename SSA copies")
+ DEFTIMEVAR (TV_TREE_SSA_VERIFY       , "tree SSA verifier")
+ DEFTIMEVAR (TV_TREE_STMT_VERIFY      , "tree STMT verifier")
++DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare")
+ DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion")
+ DEFTIMEVAR (TV_TREE_SWITCH_LOWERING,   "tree switch lowering")
+ DEFTIMEVAR (TV_TREE_RECIP            , "gimple CSE reciprocals")
+diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
+index be6387768..aca0b83f2 100644
+--- a/gcc/tree-pass.h
++++ b/gcc/tree-pass.h
+@@ -436,6 +436,7 @@ extern gimple_opt_pass *make_pass_cselim (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt);
++extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt);
+ extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt);
+diff --git a/gcc/tree-ssa-loop-array-widen-compare.c b/gcc/tree-ssa-loop-array-widen-compare.c
+new file mode 100644
+index 000000000..ba51d785d
+--- /dev/null
++++ b/gcc/tree-ssa-loop-array-widen-compare.c
+@@ -0,0 +1,1555 @@
++/* Array widen compare.
++   Copyright (C) 2022-2022 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it
++under the terms of the GNU General Public License as published by the
++Free Software Foundation; either version 3, or (at your option) any
++later version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT
++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "target.h"
++#include "tree.h"
++#include "gimple.h"
++#include "tree-pass.h"
++#include "gimple-ssa.h"
++#include "tree-pretty-print.h"
++#include "fold-const.h"
++#include "gimplify.h"
++#include "gimple-iterator.h"
++#include "tree-ssa-loop-manip.h"
++#include "tree-ssa-loop.h"
++#include "ssa.h"
++#include "tree-into-ssa.h"
++#include "cfganal.h"
++#include "cfgloop.h"
++#include "gimple-pretty-print.h"
++#include "tree-cfg.h"
++#include "cgraph.h"
++#include "print-tree.h"
++#include "cfghooks.h"
++#include "gimple-fold.h"
++
++/* This pass handles scenarios similar to the following:
++
++   uint32_t
++   func (uint32_t len0, uint32_t len1, const uint32_t len_limit,
++	 const uint8_t *const pb, const uint8_t *const cur)
++   {
++     uint32_t len = my_min (len0, len1);
++     while (++len != len_limit)
++       if (pb[len] != cur[len])
++	 break;
++     return len;
++   }
++
++   Features of this type of loop:
++     1) the loop has two exits;
++     2) One of the exits comes from the comparison result of the array;
++
++   From the source code point of view, the pass completes the conversion of the
++   above scenario into:
++
++   uint32_t
++   func (uint32_t len0, uint32_t len1, const uint32_t len_limit,
++	 const uint8_t *const pb, const uint8_t *const cur)
++   {
++     uint32_t len = my_min (len0, len1);
++     // align_loop
++     for(++len; len + sizeof(uint64_t) <= len_limit; len += sizeof (uint64_t))
++     {
++       uint64_t a = *((uint64_t*)(cur+len));
++       uint64_t b = *((uint64_t*)(pb+len));
++       if (a != b)
++       {
++	 int lz = __builtin_ctzll (a ^ b);
++	 len += lz / 8;
++	 return len;
++       }
++     }
++     // epilogue_loop
++     for (;len != len_limit; ++len)
++       if (pb[len] != cur[len])
++	 break;
++     return len;
++   }
++
++   This pass is to complete the conversion of such scenarios from the internal
++   perspective of the compiler:
++     1) determine_loop_form: The function completes the screening of such
++			     scenarios;
++     2) convert_to_new_loop: The function completes the conversion of
++     			     origin_loop to new loops, and removes origin_loop;
++     3) origin_loop_info: The structure is used to record important information
++     			  of origin_loop: such as loop exit, growth step size
++			  of loop induction variable, initial value
++			  of induction variable, etc;
++     4) create_new_loops: The function is used as the key content of the pass
++			  to complete the creation of new loops.  */
++
++/* The useful information of origin loop.  */
++
++struct origin_loop_info
++{
++  tree base;		/* The initial index of the array in the old loop.  */
++  tree limit;		/* The limit index of the array in the old loop.  */
++  tree arr1;		/* Array 1 in the old loop.  */
++  tree arr2;		/* Array 2 in the old loop.  */
++  edge entry_edge;	/* The edge into the old loop.  */
++  basic_block exit_bb1;
++  basic_block exit_bb2;
++  edge exit_e1;
++  edge exit_e2;
++  gimple *cond_stmt1;
++  gimple *cond_stmt2;
++  gimple *update_stmt;
++  bool exist_prolog_assgin;
++			/* Whether the marker has an initial value assigned
++			   to the array index.  */
++  unsigned HOST_WIDE_INT step;
++  			/* The growth step of the loop induction variable.  */
++};
++
++typedef struct origin_loop_info origin_loop_info;
++
++static origin_loop_info origin_loop;
++hash_map <basic_block, tree> defs_map;
++
++/* Dump the bb information in a loop.  */
++
++static void
++dump_loop_bb (struct loop *loop)
++{
++  basic_block *body = get_loop_body_in_dom_order (loop);
++  basic_block bb = NULL;
++
++  for (unsigned i = 0; i < loop->num_nodes; i++)
++    {
++      bb = body[i];
++      if (bb->loop_father != loop)
++	{
++	  continue;
++	}
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "===== the %dth bb of loop ==========:\n", i);
++	  gimple_dump_bb (dump_file, bb, 0, dump_flags);
++	  fprintf (dump_file, "\n");
++	}
++    }
++  free (body);
++}
++
++/* Return true if the loop has precisely one backedge.  */
++
++static bool
++loop_single_backedge_p (class loop *loop)
++{
++  basic_block latch = loop->latch;
++  if (!single_succ_p (latch))
++    return false;
++
++  edge e = single_succ_edge (latch);
++  edge backedge = find_edge (latch, loop->header);
++
++  if (e != backedge)
++    return false;
++
++  return true;
++}
++
++/* Return true if the loop has precisely one preheader BB.  */
++
++static bool
++loop_single_preheader_bb (class loop *loop)
++{
++  basic_block header = loop->header;
++  if (EDGE_COUNT (header->preds) != 2)
++    return false;
++
++  edge e1 = EDGE_PRED (header, 0);
++  edge e2 = EDGE_PRED (header, 1);
++
++  if ((e1->src == loop->latch && e2->src->loop_father != loop)
++      || (e2->src == loop->latch && e1->src->loop_father != loop))
++  return true;
++
++  return false;
++}
++
++/* Initialize the origin_loop structure.  */
++static void
++init_origin_loop_structure ()
++{
++  origin_loop.base = NULL;
++  origin_loop.limit = NULL;
++  origin_loop.arr1 = NULL;
++  origin_loop.arr2 = NULL;
++  origin_loop.exit_e1 = NULL;
++  origin_loop.exit_e2 = NULL;
++  origin_loop.exit_bb1 = NULL;
++  origin_loop.exit_bb2 =NULL;
++  origin_loop.entry_edge = NULL;
++  origin_loop.cond_stmt1 = NULL;
++  origin_loop.cond_stmt2 = NULL;
++  origin_loop.update_stmt = NULL;
++  origin_loop.exist_prolog_assgin = false;
++  origin_loop.step = 0;
++}
++
++/* Get the edge that first entered the loop.  */
++
++static edge
++get_loop_preheader_edge (class loop *loop)
++{
++  edge e;
++  edge_iterator ei;
++
++  FOR_EACH_EDGE (e, ei, loop->header->preds)
++    if (e->src != loop->latch)
++      break;
++
++  if (!e)
++    {
++      gcc_assert (!loop_outer (loop));
++      return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
++    }
++
++  return e;
++}
++
++/* Make sure the exit condition stmt satisfies a specific form.  */
++
++static bool
++check_cond_stmt (gimple *stmt)
++{
++  if (!stmt)
++    return false;
++  if (gimple_code (stmt) != GIMPLE_COND)
++    return false;
++
++  if (gimple_cond_code (stmt) != NE_EXPR && gimple_cond_code (stmt) != EQ_EXPR)
++    return false;
++
++  tree lhs = gimple_cond_lhs (stmt);
++  tree rhs = gimple_cond_rhs (stmt);
++
++  /* The parameter that does not support the cond statement is not SSA_NAME.
++     eg: if (len_1 != 100).  */
++  if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME)
++    return false;
++
++  return true;
++}
++
++/* Record the exit information in the original loop including exit edge,
++   exit bb block, exit condition stmt,
++   eg: exit_eX origin_exit_bbX cond_stmtX.  */
++
++static bool
++record_origin_loop_exit_info (class loop *loop)
++{
++  bool found = false;
++  edge e = NULL;
++  unsigned i = 0;
++  gimple *stmt;
++
++  if (origin_loop.exit_e1 != NULL || origin_loop.exit_bb1 != NULL
++      || origin_loop.exit_e2 != NULL || origin_loop.exit_bb2 != NULL
++      || origin_loop.cond_stmt1 != NULL || origin_loop.cond_stmt2 != NULL)
++    return false;
++
++  vec<edge> exit_edges = get_loop_exit_edges (loop);
++  if (exit_edges == vNULL)
++    return false;
++
++  if (exit_edges.length () != 2)
++    goto fail;
++
++  FOR_EACH_VEC_ELT (exit_edges, i, e)
++    {
++      if (e->src == loop->header)
++	{
++	  origin_loop.exit_e1 = e;
++	  origin_loop.exit_bb1 = e->dest;
++	  stmt = gsi_stmt (gsi_last_bb (e->src));
++	  if (check_cond_stmt (stmt))
++	    origin_loop.cond_stmt1 = stmt;
++	}
++      else
++	{
++	  origin_loop.exit_e2 = e;
++	  origin_loop.exit_bb2 = e->dest;
++	  stmt = gsi_stmt (gsi_last_bb (e->src));
++	  if (check_cond_stmt (stmt))
++	    origin_loop.cond_stmt2 = stmt;
++	}
++    }
++
++  if (origin_loop.exit_e1 != NULL && origin_loop.exit_bb1 != NULL
++      && origin_loop.exit_e2 != NULL && origin_loop.exit_bb2 != NULL
++      && origin_loop.cond_stmt1 != NULL && origin_loop.cond_stmt2 != NULL)
++    found = true;
++
++fail:
++  exit_edges.release ();
++  return found;
++}
++
++/* Returns true if t is SSA_NAME and user variable exists.  */
++
++static bool
++ssa_name_var_p (tree t)
++{
++  if (!t || TREE_CODE (t) != SSA_NAME)
++    return false;
++  if (SSA_NAME_VAR (t))
++    return true;
++  return false;
++}
++
++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable.  */
++
++static bool
++same_ssa_name_var_p (tree t1, tree t2)
++{
++  if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2))
++    return false;
++  if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2))
++    return true;
++  return false;
++}
++
++/* Get origin loop induction variable upper bound.  */
++
++static bool
++get_iv_upper_bound (gimple *stmt)
++{
++  if (origin_loop.limit != NULL)
++    return false;
++
++  tree lhs = gimple_cond_lhs (stmt);
++  tree rhs = gimple_cond_rhs (stmt);
++
++  if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE
++      || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE)
++    return false;
++
++  gimple *g = SSA_NAME_DEF_STMT (rhs);
++
++  /* TODO: Currently, the input restrictions on lhs and rhs are implemented
++     through PARM_DECL. We may consider releasing the restrictions later, and
++     we need to consider the overall adaptation scenario and adding test
++     cases. */
++  if (ssa_name_var_p (rhs) && TREE_CODE (SSA_NAME_VAR (rhs)) == PARM_DECL
++      && g && gimple_code (g) == GIMPLE_NOP
++      && (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) != PARM_DECL))
++    {
++      origin_loop.limit = rhs;
++    }
++  else
++    return false;
++
++  if (origin_loop.limit != NULL)
++    return true;
++
++  return false;
++}
++
++/* Returns true only when the expression on the rhs code of stmt is PLUS_EXPR,
++   rhs1 is SSA_NAME with the same var as origin_loop base, and rhs2 is
++   INTEGER_CST.  */
++
++static bool
++check_update_stmt (gimple *stmt)
++{
++  if (!stmt)
++    return false;
++
++  if (gimple_assign_rhs_code (stmt) == PLUS_EXPR)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST
++	  && same_ssa_name_var_p (rhs1, origin_loop.base))
++	{
++	  origin_loop.step = tree_to_uhwi (rhs2);
++	  if (origin_loop.step == 1)
++	    return true;
++	}
++    }
++  return false;
++}
++
++/* Get origin loop induction variable initial value.  */
++
++static bool
++get_iv_base (gimple *stmt)
++{
++  tree lhs = gimple_cond_lhs (stmt);
++  if (origin_loop.base != NULL || origin_loop.update_stmt != NULL)
++    return false;
++
++  basic_block header = gimple_bb (stmt);
++
++  gphi_iterator gsi;
++  edge e;
++  edge_iterator ei;
++  tree iv_after;
++
++  for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi = gsi.phi ();
++      tree res = gimple_phi_result (phi);
++      if (!same_ssa_name_var_p (res, lhs))
++	continue;
++      tree base = PHI_ARG_DEF_FROM_EDGE (phi, origin_loop.entry_edge);
++      if (!same_ssa_name_var_p (base, lhs))
++	return false;
++      origin_loop.base = base;
++      FOR_EACH_EDGE (e, ei, header->preds)
++	{
++	  if (e != origin_loop.entry_edge)
++	    {
++	      iv_after = PHI_ARG_DEF_FROM_EDGE (phi, e);
++	      gimple *update = SSA_NAME_DEF_STMT (iv_after);
++	      if (!check_update_stmt (update))
++	        return false;
++	      origin_loop.update_stmt = update;
++	      if (gimple_bb (update) == header && iv_after == lhs)
++		origin_loop.exist_prolog_assgin = true;
++	    }
++	}
++    }
++
++  if (origin_loop.base != NULL && origin_loop.update_stmt != NULL)
++    return true;
++
++  return false;
++}
++
++/* Record the upper bound and initial value of the induction variable in the
++   original loop; When prolog_assign is present, make sure loop header is in
++   simple form; And the interpretation of prolog_assign is as follows:
++   eg: while (++len != limit)
++	......
++   For such a loop, ++len will be processed before entering header_bb, and the
++   assign is regarded as the prolog_assign of the loop.  */
++
++static bool
++record_origin_loop_header (class loop *loop)
++{
++  basic_block header = loop->header;
++
++  if (origin_loop.entry_edge != NULL || origin_loop.base != NULL
++      || origin_loop.update_stmt != NULL || origin_loop.limit != NULL)
++    return false;
++  origin_loop.entry_edge = get_loop_preheader_edge (loop);
++
++  gimple_stmt_iterator gsi;
++  gimple *stmt;
++
++  for (gsi = gsi_last_bb (header); !gsi_end_p (gsi); gsi_prev (&gsi))
++    {
++      stmt = gsi_stmt (gsi);
++      if (stmt && is_gimple_debug (stmt))
++        continue;
++      if (stmt && gimple_code (stmt) == GIMPLE_COND)
++	{
++	  if (!get_iv_upper_bound (stmt))
++	    return false;
++	  if (!get_iv_base (stmt))
++	    return false;
++	}
++      else if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
++	{
++	  if (stmt != origin_loop.update_stmt || !origin_loop.exist_prolog_assgin)
++	    return false;
++	}
++      else
++	return false;
++    }
++
++  if (origin_loop.entry_edge != NULL && origin_loop.base != NULL
++      && origin_loop.update_stmt != NULL && origin_loop.limit != NULL)
++    return true;
++
++  return false;
++}
++
++/* When prolog_assign does not exist, make sure that update_stmt exists in the
++   loop latch, and its form is a specific form, eg:
++   len_2 = len_1 + 1.  */
++
++static bool
++record_origin_loop_latch (class loop *loop)
++{
++  basic_block latch = loop->latch;
++  gimple_stmt_iterator gsi;
++  gimple *stmt;
++
++  gsi = gsi_start_bb (latch);
++
++  if (origin_loop.exist_prolog_assgin)
++    {
++      if (gsi_end_p (gsi))
++	return true;
++    }
++  else
++    {
++      if (gsi_one_before_end_p (gsi))
++	{
++	  stmt = gsi_stmt (gsi);
++	  if (stmt == origin_loop.update_stmt)
++	    return true;
++	}
++    }
++  return false;
++}
++
++/* Returns true when the DEF_STMT corresponding to arg0 of the mem_ref tree
++   satisfies the POINTER_PLUS_EXPR type.  */
++
++static bool
++check_body_mem_ref (tree mem_ref)
++{
++  tree arg0 = TREE_OPERAND (mem_ref , 0);
++  tree arg1 = TREE_OPERAND (mem_ref , 1);
++
++  if (TREE_CODE (TREE_TYPE (arg0)) == POINTER_TYPE
++      && TREE_CODE (arg1) == INTEGER_CST
++      && tree_to_uhwi (arg1) == 0)
++    {
++      gimple *tmp_g = SSA_NAME_DEF_STMT (arg0);
++      if (tmp_g && gimple_assign_rhs_code (tmp_g) == POINTER_PLUS_EXPR)
++	return true;
++    }
++  return false;
++}
++
++/* Returns true if the rh2 of the current stmt comes from the base in the
++   original loop.  */
++
++static bool
++check_body_pointer_plus (gimple *stmt, tree &tmp_index)
++{
++  tree rhs1 = gimple_assign_rhs1 (stmt);
++  tree rhs2 = gimple_assign_rhs2 (stmt);
++  if (TREE_CODE (TREE_TYPE (rhs1)) == POINTER_TYPE)
++    {
++      gimple *g = SSA_NAME_DEF_STMT (rhs2);
++      if (g && gimple_assign_rhs_code (g) == NOP_EXPR)
++	{
++	  tree nop_rhs = gimple_assign_rhs1 (g);
++	  if (same_ssa_name_var_p (nop_rhs, origin_loop.base))
++	    {
++	      if (!origin_loop.arr1)
++		{
++		  origin_loop.arr1 = rhs1;
++		  tmp_index = rhs2;
++		}
++	      else if (!origin_loop.arr2)
++		{
++		  origin_loop.arr2 = rhs1;
++		  if (tmp_index != rhs2)
++		    return false;
++		}
++	      else
++		return false;
++	      return true;
++	    }
++	}
++    }
++  return false;
++}
++
++/* Record the array comparison information in the original loop, while ensuring
++   that there are only statements related to cont_stmt in the loop body.  */
++
++static bool
++record_origin_loop_body (class loop *loop)
++{
++  basic_block body = gimple_bb (origin_loop.cond_stmt2);
++
++  if (origin_loop.arr1 != NULL || origin_loop.arr2 != NULL)
++    return false;
++
++  gimple_stmt_iterator gsi;
++  for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gimple_set_visited (gsi_stmt (gsi), false);
++    }
++
++  tree cond_lhs = gimple_cond_lhs (origin_loop.cond_stmt2);
++  tree cond_rhs = gimple_cond_rhs (origin_loop.cond_stmt2);
++  if (TREE_CODE (TREE_TYPE (cond_lhs)) != INTEGER_TYPE
++      || TREE_CODE (TREE_TYPE (cond_rhs)) != INTEGER_TYPE)
++    return false;
++
++  auto_vec<tree> stack;
++  tree tmp_index = NULL;
++  stack.safe_push (cond_lhs);
++  stack.safe_push (cond_rhs);
++  gimple_set_visited (origin_loop.cond_stmt2, true);
++
++  while (!stack.is_empty ())
++    {
++      tree op = stack.pop ();
++      gimple *g = SSA_NAME_DEF_STMT (op);
++      if (!g || gimple_bb (g) != body || !is_gimple_assign (g))
++	continue;
++      gimple_set_visited (g, true);
++      if (gimple_assign_rhs_code (g) == MEM_REF)
++	{
++	  tree mem_ref = gimple_assign_rhs1 (g);
++	  if (!check_body_mem_ref (mem_ref))
++	    return false;
++	  stack.safe_push (TREE_OPERAND (mem_ref , 0));
++	}
++      else if (gimple_assign_rhs_code (g) == POINTER_PLUS_EXPR)
++	{
++	  tree rhs2 = gimple_assign_rhs2 (g);
++	  if (!check_body_pointer_plus (g, tmp_index))
++	    return false;
++	  stack.safe_push (rhs2);
++	}
++      else if (gimple_assign_rhs_code (g) == NOP_EXPR)
++	{
++	  tree rhs = gimple_assign_rhs1 (g);
++	  if (!same_ssa_name_var_p (rhs, origin_loop.base))
++	    return false;
++	  stack.safe_push (rhs);
++	}
++      else
++	return false;
++    }
++  bool allvisited = true;
++  for (gsi = gsi_start_bb (body); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      if (!gimple_visited_p (gsi_stmt (gsi))
++	  && !is_gimple_debug (gsi_stmt (gsi)))
++	allvisited = false;
++    }
++  if (allvisited)
++    {
++      if (origin_loop.arr1 != NULL && origin_loop.arr2 != NULL)
++	return true;
++    }
++  return false;
++}
++
++/* Dump the original loop information to see if the origin loop
++   form matches.  */
++
++static void
++dump_origin_loop_info ()
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nThe origin loop info:\n");
++      fprintf (dump_file, "\n    the origin_loop.limit is:\n");
++      print_node (dump_file, "", origin_loop.limit, 0);
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "\n    the origin_loop.base is:\n");
++      print_node (dump_file, "", origin_loop.base, 0);
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "\n    the origin_loop.arr1 is:\n");
++      print_node (dump_file, "", origin_loop.arr1, 0);
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "\n    the origin_loop.arr2 is:\n");
++      print_node (dump_file, "", origin_loop.arr2, 0);
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "\n    the origin_loop.cond_stmt1 is:\n");
++      print_gimple_stmt (dump_file, origin_loop.cond_stmt1, 0);
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "\n    the origin_loop.cond_stmt2 is:\n");
++      print_gimple_stmt (dump_file, origin_loop.cond_stmt2, 0);
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "\n    the origin_loop.update_stmt is:\n");
++      print_gimple_stmt (dump_file, origin_loop.update_stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++}
++
++/* Returns true only if the exit bb of the original loop is unique and its phi
++   node parameter comes from the same variable.  */
++
++static bool
++check_exit_bb (class loop *loop)
++{
++  if (origin_loop.exit_bb1 != origin_loop.exit_bb2
++      || flow_bb_inside_loop_p (loop, origin_loop.exit_bb1))
++    return false;
++
++  gphi_iterator gsi;
++  for (gsi = gsi_start_phis (origin_loop.exit_bb1); !gsi_end_p (gsi);
++       gsi_next (&gsi))
++    {
++      gphi *phi = gsi.phi ();
++      tree res = gimple_phi_result (phi);
++      if (!same_ssa_name_var_p (res, origin_loop.base))
++	continue;
++      if (gimple_phi_num_args (phi) == 2)
++	{
++	  tree arg0 = gimple_phi_arg_def (phi, 0);
++	  tree arg1 = gimple_phi_arg_def (phi, 1);
++	  if (arg0 == arg1)
++	    return true;
++	}
++    }
++  return false;
++}
++
++/* Make sure that the recorded origin_loop information meets the
++   relative requirements.  */
++
++static bool
++check_origin_loop_info (class loop *loop)
++{
++  dump_origin_loop_info ();
++  tree arr1_elem_size, arr2_elem_size;
++
++  if (!check_exit_bb (loop))
++    return false;
++
++  if (TREE_CODE (origin_loop.base) != SSA_NAME)
++    return false;
++
++  if (!TYPE_READONLY (TREE_TYPE (origin_loop.limit)))
++    return false;
++
++  if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr1))))
++    return false;
++
++  if (!TYPE_READONLY (TREE_TYPE (TREE_TYPE (origin_loop.arr2))))
++    return false;
++
++  if (TREE_CODE (TREE_TYPE (origin_loop.arr1)) != POINTER_TYPE
++      || TREE_CODE (TREE_TYPE (origin_loop.arr2)) != POINTER_TYPE
++      || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr1))) != INTEGER_TYPE
++      || TREE_CODE (TREE_TYPE (TREE_TYPE (origin_loop.arr2))) != INTEGER_TYPE)
++    return false;
++
++  arr1_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr1)));
++  arr2_elem_size = TYPE_SIZE (TREE_TYPE (TREE_TYPE (origin_loop.arr2)));
++
++  if (tree_to_uhwi (arr1_elem_size) != 8 || tree_to_uhwi (arr2_elem_size) != 8)
++    return false;
++
++  return true;
++}
++
++/* Record the useful information of the original loop and judge whether the
++   information meets the specified conditions.  */
++
++static bool
++check_record_loop_form (class loop *loop)
++{
++  if (!record_origin_loop_exit_info (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nFailed to record loop exit information.\n");
++	}
++      return false;
++    }
++
++  if (!record_origin_loop_header (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nFailed to record loop header information.\n");
++	}
++      return false;
++    }
++
++  if (!record_origin_loop_latch (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nFailed to record loop latch information.\n");
++	}
++      return false;
++    }
++
++  if (!record_origin_loop_body (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nFailed to record loop body information.\n");
++	}
++      return false;
++    }
++
++  if (!check_origin_loop_info (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nFailed to check origin loop information.\n");
++	}
++      return false;
++    }
++
++  return true;
++}
++
++/* The main entry for judging whether the loop meets some conditions.  */
++
++static bool
++determine_loop_form (class loop *loop)
++{
++  /* Currently only standard loops are processed, that is, only loop_header,
++     loop_latch, loop_body 3 bb blocks are included.  */
++  if (loop->inner || loop->num_nodes != 3)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nWrong loop form, there is inner loop or"
++			      "redundant bb.\n");
++	}
++      return false;
++    }
++
++  if (single_exit (loop) || !loop->latch)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nWrong loop form, only one exit or loop_latch"
++			      "does not exist.\n");
++	}
++      return false;
++    }
++
++  /* Support loop with only one backedge.  */
++  if (!loop_single_backedge_p (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nWrong loop form, loop back edges are not"
++			      "unique.\n");
++	}
++      return false;
++    }
++
++  /* Support loop with only one preheader BB.  */
++  if (!loop_single_preheader_bb (loop))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nWrong loop form, loop preheader bb are not"
++			      "unique.\n");
++	}
++      return false;
++    }
++
++  init_origin_loop_structure ();
++  if (!check_record_loop_form (loop))
++    return false;
++
++  return true;
++}
++
++/* Create prolog bb for newly constructed loop; When prolog_assign exists in
++   the original loop, the corresponding assign needs to be added to prolog_bb;
++   eg: <bb 7>
++       len_16 = len_10 + 1
++   Create simple copy statement when prolog_assign does not exist;
++   eg: <bb 7>
++       len_16 = len_10
++
++   The IR of bb is as above.  */
++
++static void
++create_prolog_bb (basic_block &prolog_bb, basic_block after_bb,
++		  basic_block dominator_bb, class loop *outer, edge entry_edge)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  tree lhs1;
++
++  prolog_bb = create_empty_bb (after_bb);
++  add_bb_to_loop (prolog_bb, outer);
++  redirect_edge_and_branch (entry_edge, prolog_bb);
++  set_immediate_dominator (CDI_DOMINATORS, prolog_bb, dominator_bb);
++  gsi = gsi_last_bb (prolog_bb);
++  lhs1 = copy_ssa_name (origin_loop.base);
++
++  if (origin_loop.exist_prolog_assgin)
++    g = gimple_build_assign (lhs1, PLUS_EXPR, origin_loop.base,
++	  build_int_cst (TREE_TYPE (origin_loop.base), origin_loop.step));
++  else
++    g = gimple_build_assign (lhs1, NOP_EXPR, origin_loop.base);
++  gimple_seq_add_stmt (&stmts, g);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  set_current_def (origin_loop.base, lhs1);
++  defs_map.put (prolog_bb, lhs1);
++}
++
++/* Create preheader bb for new loop; In order to ensure the standard form of
++   the loop, add a preheader_bb before loop_header.  */
++
++static void
++create_loop_pred_bb (basic_block &loop_pred_bb, basic_block after_bb,
++		     basic_block dominator_bb, class loop *outer)
++{
++  loop_pred_bb = create_empty_bb (after_bb);
++  add_bb_to_loop (loop_pred_bb, outer);
++  set_immediate_dominator (CDI_DOMINATORS, loop_pred_bb, dominator_bb);
++  defs_map.put (loop_pred_bb, get_current_def (origin_loop.base));
++}
++
++/* Add phi_arg for bb with phi node.  */
++
++static void
++rewrite_add_phi_arg (basic_block bb)
++{
++  edge e;
++  edge_iterator ei;
++  gphi *phi;
++  gphi_iterator gsi;
++  tree res;
++  location_t loc;
++
++  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      phi = gsi.phi ();
++      res = gimple_phi_result (phi);
++
++      FOR_EACH_EDGE (e, ei, bb->preds)
++	{
++	  if (PHI_ARG_DEF_FROM_EDGE (phi, e))
++	    continue;
++	  tree var = *(defs_map.get (e->src));
++	  if (!same_ssa_name_var_p (var, res))
++	    continue;
++	  if (virtual_operand_p (var))
++	    loc = UNKNOWN_LOCATION;
++	  else
++	    loc = gimple_location (SSA_NAME_DEF_STMT (var));
++	  add_phi_arg (phi, var, e, loc);
++	}
++    }
++}
++
++/* Create loop_header BB for align_loop.
++   eg: <bb 9>
++       _18 = (long unsigned int) len_17;
++       _19 = _18 + 8;
++       _20 = (long unsigned int) len_limit_12 (D);
++       if (_19 <= _20)
++
++   The IR of bb is as above.  */
++
++static void
++create_align_loop_header (basic_block &align_loop_header, basic_block after_bb,
++			  basic_block dominator_bb, class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gcond *cond_stmt;
++  gphi *phi;
++  tree res;
++
++  tree entry_node = get_current_def (origin_loop.base);
++  align_loop_header = create_empty_bb (after_bb);
++  add_bb_to_loop (align_loop_header, outer);
++  make_single_succ_edge (after_bb, align_loop_header, EDGE_FALLTHRU);
++  set_immediate_dominator (CDI_DOMINATORS, align_loop_header, dominator_bb);
++  gsi = gsi_last_bb (align_loop_header);
++  phi = create_phi_node (NULL_TREE, align_loop_header);
++  create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi));
++  res = gimple_phi_result (phi);
++
++  tree lhs1 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node, res);
++  tree lhs2 = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (lhs1), lhs1,
++			    build_int_cst (TREE_TYPE (lhs1), 8));
++  tree lhs3 = gimple_build (&stmts, NOP_EXPR, long_unsigned_type_node,
++  				origin_loop.limit);
++  cond_stmt = gimple_build_cond (LE_EXPR, lhs2, lhs3, NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++
++  set_current_def (origin_loop.base, res);
++  defs_map.put (align_loop_header, res);
++}
++
++/* Create loop body BB for align_loop.
++   eg: <bb 10>
++       _21 = (sizetype) len_17;
++       _22 = cur_15 (D) + _21;
++       _23 = MEM[(long unsigned int *)_22];
++       _24 = pb_13 (D) + _21;
++       _25 = MEM[(long unsigned int *)_24];
++       if (_23 != _25)
++
++   The IR of bb is as above.  */
++
++static void
++create_align_loop_body_bb (basic_block &align_loop_body_bb,
++			   basic_block after_bb, basic_block dominator_bb,
++			   class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  gcond *cond_stmt;
++  tree lhs1, lhs2;
++
++  align_loop_body_bb = create_empty_bb (after_bb);
++  add_bb_to_loop (align_loop_body_bb, outer);
++  make_edge (after_bb, align_loop_body_bb, EDGE_TRUE_VALUE);
++  set_immediate_dominator (CDI_DOMINATORS, align_loop_body_bb, dominator_bb);
++  gsi = gsi_last_bb (align_loop_body_bb);
++
++  tree var = gimple_build (&stmts, NOP_EXPR, sizetype,
++			   get_current_def (origin_loop.base));
++  lhs1 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2),
++  		       origin_loop.arr2, var);
++  g = gimple_build_assign (make_ssa_name (long_unsigned_type_node),
++	fold_build2 (MEM_REF, long_unsigned_type_node, lhs1,
++	  build_int_cst (build_pointer_type (long_unsigned_type_node), 0)));
++  gimple_seq_add_stmt (&stmts, g);
++  lhs1 = gimple_assign_lhs (g);
++  lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1),
++  		       origin_loop.arr1, var);
++  g = gimple_build_assign (make_ssa_name (long_unsigned_type_node),
++	fold_build2 (MEM_REF, long_unsigned_type_node, lhs2,
++	  build_int_cst (build_pointer_type (long_unsigned_type_node), 0)));
++  gimple_seq_add_stmt (&stmts, g);
++  lhs2 = gimple_assign_lhs (g);
++  cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2),
++  				 lhs1, lhs2, NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++}
++
++/* Create loop_latch BB for align_loop.
++   eg: <bb 11>
++       len_26 = len_17 + 8;
++
++   The IR of bb is as above.  */
++
++static void
++create_align_loop_latch (basic_block &align_loop_latch, basic_block after_bb,
++			 basic_block dominator_bb, class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  tree res;
++
++  tree entry_node = get_current_def (origin_loop.base);
++  align_loop_latch = create_empty_bb (after_bb);
++  add_bb_to_loop (align_loop_latch, outer);
++  make_edge (after_bb, align_loop_latch, EDGE_FALSE_VALUE);
++  set_immediate_dominator (CDI_DOMINATORS, align_loop_latch, dominator_bb);
++  gsi = gsi_last_bb (align_loop_latch);
++  res = copy_ssa_name (entry_node);
++  g = gimple_build_assign (res, PLUS_EXPR, entry_node,
++			   build_int_cst (TREE_TYPE (entry_node), 8));
++  gimple_seq_add_stmt (&stmts, g);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  defs_map.put (align_loop_latch, res);
++}
++
++/* Create a new loop and add it to outer_loop and return.  */
++
++static class loop *
++init_new_loop (class loop *outer_loop, basic_block header, basic_block latch)
++{
++  class loop *new_loop;
++  new_loop = alloc_loop ();
++  new_loop->header = header;
++  new_loop->latch = latch;
++  add_loop (new_loop, outer_loop);
++
++  return new_loop;
++}
++
++/* Create necessary exit BB for align_loop.
++   eg: <bb 12>
++       _27 = _23 ^ _25;
++       _28 = __builtin_ctzll (_27);
++       _29 = _28 >> 3;
++       len_30 = _29 + len_17;
++
++   The IR of bb is as above.  */
++
++static void
++create_align_loop_exit_bb (basic_block &align_loop_exit_bb,
++			   basic_block after_bb, basic_block dominator_bb,
++			   class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  gimple *cond_stmt;
++  tree lhs1, lhs2;
++  tree cond_lhs, cond_rhs;
++  gcall *build_ctzll;
++
++  tree entry_node = get_current_def (origin_loop.base);
++  align_loop_exit_bb = create_empty_bb (after_bb);
++  add_bb_to_loop (align_loop_exit_bb, outer);
++  make_edge (after_bb, align_loop_exit_bb, EDGE_TRUE_VALUE);
++  set_immediate_dominator (CDI_DOMINATORS, align_loop_exit_bb, dominator_bb);
++  gsi = gsi_last_bb (align_loop_exit_bb);
++
++  cond_stmt = gsi_stmt (gsi_last_bb (after_bb));
++  cond_lhs = gimple_cond_lhs (cond_stmt);
++  cond_rhs = gimple_cond_rhs (cond_stmt);
++
++  lhs1 = gimple_build (&stmts, BIT_XOR_EXPR, TREE_TYPE (cond_lhs), cond_lhs,
++  		       cond_rhs);
++  build_ctzll = gimple_build_call (builtin_decl_explicit (BUILT_IN_CTZLL), 1,
++  				   lhs1);
++  lhs1 = make_ssa_name (integer_type_node);
++  gimple_call_set_lhs (build_ctzll, lhs1);
++  gimple_seq_add_stmt (&stmts, build_ctzll);
++  lhs2 = copy_ssa_name (lhs1);
++  g = gimple_build_assign (lhs2, RSHIFT_EXPR, lhs1,
++  			   build_int_cst (TREE_TYPE (lhs1), 3));
++  gimple_seq_add_stmt (&stmts, g);
++  lhs1 = gimple_build (&stmts, NOP_EXPR, TREE_TYPE (entry_node), lhs2);
++  lhs2 = copy_ssa_name (entry_node);
++  g = gimple_build_assign (lhs2, PLUS_EXPR, lhs1, entry_node);
++  gimple_seq_add_stmt (&stmts, g);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  defs_map.put (align_loop_exit_bb, lhs2);
++}
++
++/* Create loop_header BB for epilogue_loop.
++   eg: <bb 14>
++       # len_31 = PHI <len_17 (13), len_37 (16)>
++       if (len_31 != len_limit_12 (D))
++
++   The IR of bb is as above.  */
++
++static void
++create_epilogue_loop_header (basic_block &epilogue_loop_header,
++			     basic_block after_bb, basic_block dominator_bb,
++			     class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gcond *cond_stmt;
++  tree res;
++  gphi *phi;
++
++  tree entry_node = get_current_def (origin_loop.base);
++  epilogue_loop_header = create_empty_bb (after_bb);
++  add_bb_to_loop (epilogue_loop_header, outer);
++  make_single_succ_edge (after_bb, epilogue_loop_header, EDGE_FALLTHRU);
++  set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_header, dominator_bb);
++  gsi = gsi_last_bb (epilogue_loop_header);
++  phi = create_phi_node (NULL_TREE, epilogue_loop_header);
++  create_new_def_for (entry_node, phi, gimple_phi_result_ptr (phi));
++  res = gimple_phi_result (phi);
++  cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt1), res,
++  				 origin_loop.limit, NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++
++  set_current_def (origin_loop.base, res);
++  defs_map.put (epilogue_loop_header, res);
++}
++
++/* Create loop body BB for epilogue_loop.
++   eg: <bb 15>
++       _32 = (sizetype) len_31;
++       _33 = pb_13 (D) + _32;
++       _34 = *_33;
++       _35 = cur_15 (D) + _32;
++       _36 = *_35;
++       if (_34 != _36)
++
++   The IR of bb is as above.  */
++
++static void
++create_epilogue_loop_body_bb (basic_block &epilogue_loop_body_bb,
++			      basic_block after_bb, basic_block dominator_bb,
++			      class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  gcond *cond_stmt;
++  tree lhs1, lhs2, lhs3;
++
++  tree entry_node = get_current_def (origin_loop.base);
++  epilogue_loop_body_bb = create_empty_bb (after_bb);
++  add_bb_to_loop (epilogue_loop_body_bb, outer);
++  make_edge (after_bb, epilogue_loop_body_bb, EDGE_TRUE_VALUE);
++  set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_body_bb, dominator_bb);
++  gsi = gsi_last_bb (epilogue_loop_body_bb);
++  lhs1 = gimple_build (&stmts, NOP_EXPR, sizetype, entry_node);
++  lhs2 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr1),
++  		       origin_loop.arr1, lhs1);
++  g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
++	fold_build2 (MEM_REF, unsigned_char_type_node, lhs2,
++		     build_int_cst (TREE_TYPE (lhs2), 0)));
++  gimple_seq_add_stmt (&stmts, g);
++  lhs2 = gimple_assign_lhs (g);
++  lhs3 = gimple_build (&stmts, POINTER_PLUS_EXPR, TREE_TYPE (origin_loop.arr2),
++  		       origin_loop.arr2, lhs1);
++  g = gimple_build_assign (make_ssa_name (unsigned_char_type_node),
++  	fold_build2 (MEM_REF, unsigned_char_type_node, lhs3,
++		     build_int_cst (TREE_TYPE (lhs3), 0)));
++  gimple_seq_add_stmt (&stmts, g);
++  lhs3 = gimple_assign_lhs (g);
++  cond_stmt = gimple_build_cond (gimple_cond_code (origin_loop.cond_stmt2), lhs2,
++  				 lhs3, NULL_TREE, NULL_TREE);
++  gimple_seq_add_stmt (&stmts, cond_stmt);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  defs_map.put (epilogue_loop_body_bb, get_current_def (origin_loop.base));
++}
++
++/* Create loop_latch BB for epilogue_loop.
++   eg: <bb 16>
++       len_37 = len_31 + 1;
++
++   The IR of bb is as above.  */
++
++static void
++create_epilogue_loop_latch (basic_block &epilogue_loop_latch,
++			    basic_block after_bb, basic_block dominator_bb,
++			    class loop *outer)
++{
++  gimple_seq stmts = NULL;
++  gimple_stmt_iterator gsi;
++  gimple *g;
++  tree res;
++
++  tree entry_node = get_current_def (origin_loop.base);
++  epilogue_loop_latch = create_empty_bb (after_bb);
++  add_bb_to_loop (epilogue_loop_latch, outer);
++  make_edge (after_bb, epilogue_loop_latch, EDGE_FALSE_VALUE);
++  set_immediate_dominator (CDI_DOMINATORS, epilogue_loop_latch, dominator_bb);
++  gsi = gsi_last_bb (epilogue_loop_latch);
++  res = copy_ssa_name (entry_node);
++  g = gimple_build_assign (res, PLUS_EXPR, entry_node,
++	build_int_cst (TREE_TYPE (entry_node), origin_loop.step));
++  gimple_seq_add_stmt (&stmts, g);
++  gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
++  defs_map.put (epilogue_loop_latch, res);
++}
++
++/* convert_to_new_loop
++   |               |
++   |               |
++   |               | entry_edge
++   |    ______     |
++   |  /       V    V
++   | |       -----origin_loop_header---
++   | |      |                          |
++   | |       -------------------------\
++   | |        |                        \
++   | |        V                         \___ ___ ___ ___ ___ ___ ___
++   | |       -----origin_loop_body-----                             |
++   | |      |                          |                            |
++   | |       -------------------------\                             |
++   | |        |                        \___ ___ ___ ___             |
++   | |        V                                        V            V
++   | |       -----origin_loop_latch----              -----exit_bb------
++   | |      |                          |            |                  |
++   | |      /--------------------------              ------------------
++   |  \ __ /
++   |
++   |                         |
++   |  ====>                  |entry_edge
++   |                         V
++   |                      -------prolog_bb-----
++   |                     |                     |
++   |                      ---------------------
++   |                         |
++   |                         V
++   |                      -----align_loop_header----
++   | /-----------------> |                          |
++   |/                     --------------------------
++   ||                        /                   \
++   ||                       V                     V
++   ||     ---align_loop_body---                ---epilogue_loop_header--
++   ||    |                     |       -------|                         |<---|
++   ||     --------------------\       /        -------------------------     |
++   ||              |           \____  |                   |                  |
++   ||              V                | |                   V                  |
++   ||     ---align_loop_latch---    | |        ---epilogue_loop_body----     |
++   ||    |                      |   | |   ----|                         |    |
++   ||     ----------------------    | |  /     -------------------------     |
++   ||              /     __________/  |  |                |                  |
++   ||             /     |             |  |                V                  |
++   | \ __________/      |             |  |     ---epilogue_loop_latch---     |
++   |                    |             |  |    |                         |    |
++   |                    |             |  |     -------------------------    /
++   |                    V             |  |                |                /
++   |        -align_loop_exit_bb-      |  |                 \______________/
++   |       |                    |     |  |
++   |        --------------------      |  |
++   |                |                 |  |
++   |                |                 V  V
++   |                |      -----exit_bb------
++   |                |---->|                  |
++   |                       ------------------
++
++   The origin_loop conversion process starts from entry_edge and ends at
++   exit_bb; The execution logic of origin_loop is completely replaced by
++   align_loop + epilogue_loop:
++     1) align_loop mainly implements the idea of ​​using wide-type dereference
++	and comparison on array elements, so as to achieve the effect of
++	acceleration; For the corresponding source code understanding, please
++	refer to the description of the pass at the beginning;
++     2) epilogue_loop processes the previous loop remaining array element
++	comparison.  */
++
++static void
++create_new_loops (edge entry_edge)
++{
++  basic_block prolog_bb;
++  basic_block align_loop_header, align_loop_latch, align_loop_body_bb;
++  basic_block align_pred_bb, align_loop_exit_bb;
++  basic_block epilogue_loop_header, epilogue_loop_latch, epilogue_loop_body_bb;
++  basic_block epilogue_loop_pred_bb;
++  class loop *align_loop;
++  class loop *epilogue_loop;
++
++  class loop *outer = entry_edge->src->loop_father;
++
++  create_prolog_bb (prolog_bb, entry_edge->src, entry_edge->src, outer,
++  		    entry_edge);
++
++  create_loop_pred_bb (align_pred_bb, prolog_bb, prolog_bb, outer);
++  make_single_succ_edge (prolog_bb, align_pred_bb, EDGE_FALLTHRU);
++
++  create_align_loop_header (align_loop_header, align_pred_bb,
++  					align_pred_bb, outer);
++
++  create_align_loop_body_bb (align_loop_body_bb, align_loop_header,
++  			     align_loop_header, outer);
++
++  create_align_loop_latch (align_loop_latch, align_loop_body_bb,
++  			   align_loop_body_bb, outer);
++  make_edge (align_loop_latch, align_loop_header, EDGE_FALLTHRU);
++  rewrite_add_phi_arg (align_loop_header);
++
++  align_loop = init_new_loop (outer, align_loop_header, align_loop_latch);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nPrint byte align loop %d:\n", align_loop->num);
++      flow_loop_dump (align_loop, dump_file, NULL, 1);
++      fprintf (dump_file, "\n\n");
++    }
++
++  create_align_loop_exit_bb (align_loop_exit_bb, align_loop_body_bb,
++  			     align_loop_body_bb, outer);
++
++  create_loop_pred_bb (epilogue_loop_pred_bb, align_loop_header,
++  		       align_loop_header, outer);
++  make_edge (align_loop_header, epilogue_loop_pred_bb, EDGE_FALSE_VALUE);
++
++  create_epilogue_loop_header (epilogue_loop_header, epilogue_loop_pred_bb,
++  			       epilogue_loop_pred_bb, outer);
++
++  create_epilogue_loop_body_bb (epilogue_loop_body_bb, epilogue_loop_header,
++  				epilogue_loop_header, outer);
++
++  create_epilogue_loop_latch (epilogue_loop_latch, epilogue_loop_body_bb,
++  			      epilogue_loop_body_bb, outer);
++  make_single_succ_edge (epilogue_loop_latch, epilogue_loop_header,
++  			 EDGE_FALLTHRU);
++  rewrite_add_phi_arg (epilogue_loop_header);
++
++  epilogue_loop = init_new_loop (outer, epilogue_loop_header,
++  				 epilogue_loop_latch);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nPrint epilogue loop %d:\n", epilogue_loop->num);
++      flow_loop_dump (epilogue_loop, dump_file, NULL, 1);
++      fprintf (dump_file, "\n\n");
++    }
++  make_single_succ_edge (align_loop_exit_bb, origin_loop.exit_bb1,
++  			 EDGE_FALLTHRU);
++  set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb1,
++  			   entry_edge->src);
++  make_edge (epilogue_loop_body_bb, origin_loop.exit_bb1, EDGE_TRUE_VALUE);
++
++  make_edge (epilogue_loop_header, origin_loop.exit_bb2, EDGE_FALSE_VALUE);
++  set_immediate_dominator (CDI_DOMINATORS, origin_loop.exit_bb2,
++  			   entry_edge->src);
++
++  rewrite_add_phi_arg (origin_loop.exit_bb1);
++  rewrite_add_phi_arg (origin_loop.exit_bb2);
++
++  remove_edge (origin_loop.exit_e1);
++  remove_edge (origin_loop.exit_e2);
++}
++
++/* Make sure that the dominance relationship of the newly inserted cfg
++   is not missing.  */
++
++static void
++update_loop_dominator (cdi_direction dir)
++{
++  gcc_assert (dom_info_available_p (dir));
++
++  basic_block bb;
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      basic_block imm_bb = get_immediate_dominator (dir, bb);
++      if (!imm_bb || bb == origin_loop.exit_bb1)
++	{
++	  set_immediate_dominator (CDI_DOMINATORS, bb,
++	  			   recompute_dominator (CDI_DOMINATORS, bb));
++	  continue;
++	}
++    }
++}
++
++/* Clear information about the original loop.  */
++
++static void
++remove_origin_loop (class loop *loop)
++{
++  basic_block *body;
++
++  body = get_loop_body_in_dom_order (loop);
++  unsigned n = loop->num_nodes;
++  for (unsigned i = 0; i < n; i++)
++    {
++	  delete_basic_block (body[i]);
++    }
++  free (body);
++  delete_loop (loop);
++}
++
++/* Perform the conversion of origin_loop to new_loop.  */
++
++static void
++convert_to_new_loop (class loop *loop)
++{
++  create_new_loops (origin_loop.entry_edge);
++  remove_origin_loop (loop);
++  update_loop_dominator (CDI_DOMINATORS);
++  update_ssa (TODO_update_ssa);
++}
++
++/* The main entry of array-widen-compare optimizes.  */
++
++static unsigned int
++tree_ssa_array_widen_compare ()
++{
++  unsigned int todo = 0;
++  class loop *loop;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      flow_loops_dump (dump_file, NULL, 1);
++      fprintf (dump_file, "\nConfirm which loop can be optimized using"
++			  " array-widen-compare\n");
++    }
++
++  enum li_flags LI = LI_FROM_INNERMOST;
++  FOR_EACH_LOOP (loop, LI)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "======================================\n");
++	  fprintf (dump_file, "Processing loop %d:\n", loop->num);
++	  fprintf (dump_file, "======================================\n");
++	  flow_loop_dump (loop, dump_file, NULL, 1);
++	  fprintf (dump_file, "\n\n");
++	}
++
++      if (determine_loop_form (loop))
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "The %dth loop form is success matched,"
++				  "and the loop can be optimized.\n",
++		       loop->num);
++	      dump_loop_bb (loop);
++	    }
++
++	  convert_to_new_loop (loop);
++	}
++    }
++
++  todo |= (TODO_update_ssa);
++  return todo;
++}
++
++/* Array widen compare.  */
++
++namespace {
++
++const pass_data pass_data_tree_array_widen_compare =
++{
++  GIMPLE_PASS,
++  "awiden_compare",
++  OPTGROUP_LOOP,
++  TV_TREE_ARRAY_WIDEN_COMPARE,
++  (PROP_cfg | PROP_ssa),
++  0,
++  0,
++  0,
++  (TODO_update_ssa | TODO_verify_all)
++};
++
++class pass_array_widen_compare : public gimple_opt_pass
++{
++public:
++  pass_array_widen_compare (gcc::context *ctxt)
++    : gimple_opt_pass (pass_data_tree_array_widen_compare, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *);
++  virtual unsigned int execute (function *);
++
++}; // class pass_array_widen_compare
++
++bool
++pass_array_widen_compare::gate (function *)
++{
++  return (flag_array_widen_compare > 0 && optimize >= 3);
++}
++
++unsigned int
++pass_array_widen_compare::execute (function *fun)
++{
++  if (number_of_loops (fun) <= 1)
++    return 0;
++
++  /* Only supports LP64 data mode.  */
++  if (TYPE_PRECISION (long_integer_type_node) != 64
++      || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "The current data mode is not supported,"
++			    "only the LP64 date mode is supported.\n");
++      return 0;
++    }
++
++  return tree_ssa_array_widen_compare ();
++}
++
++} // anon namespace
++
++gimple_opt_pass *
++make_pass_array_widen_compare (gcc::context *ctxt)
++{
++  return new pass_array_widen_compare (ctxt);
++}
+\ No newline at end of file
+-- 
+2.27.0.windows.1
+
diff --git a/gcc.spec b/gcc.spec
index acc5506cf28222ffa28e9b5fc7442e9461cf4554..d5a0273e20b374eefb49e219dd6f9c26dc6e262f 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -61,7 +61,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: 12
+Release: 13
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
 URL: https://gcc.gnu.org
 
@@ -150,6 +150,18 @@ Patch31: 0031-AutoBOLT-Enable-BOLT-linker-plugin-on-aarch64-3-3.patch
 Patch32: 0032-Autoprefetch-Prune-invaild-loops-containing-edges-wh.patch
 Patch33: 0033-AutoFdo-Fix-memory-leaks-in-autofdo-and-autoprefetch.patch
 Patch34: 0034-Backport-sanitizer-Fix-asan-against-glibc-2.34-PR100.patch
+Patch35: 0035-ccmp-Add-another-optimization-opportunity-for-ccmp-i.patch
+Patch36: 0036-StructReorg-Refactoring-reorder-fields-to-struct-lay.patch
+Patch37: 0037-Backport-loop-invariant-Don-t-move-cold-bb-instructi.patch
+Patch38: 0038-DFE-Add-Dead-Field-Elimination-in-Struct-Reorg.patch
+Patch39: 0039-Backport-ipa-sra-Fix-thinko-when-overriding-safe_to_.patch
+Patch40: 0040-Backport-ifcvt-Allow-constants-for-noce_convert_mult.patch
+Patch41: 0041-Backport-Register-sysroot-in-the-driver-switches-tab.patch
+Patch42: 0042-DFE-Fix-bugs.patch
+Patch43: 0043-Backport-Extend-special_memory_constraint.patch
+Patch44: 0044-Backport-ira-Fix-unnecessary-register-spill.patch
+Patch45: 0045-Transposed-SLP-Enable-Transposed-SLP.patch
+Patch46: 0046-ArrayWidenCompare-Add-a-new-optimization-for-array-c.patch
 
 %global gcc_target_platform %{_arch}-linux-gnu
 
@@ -640,6 +652,18 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch32 -p1
 %patch33 -p1
 %patch34 -p1
+%patch35 -p1
+%patch36 -p1
+%patch37 -p1
+%patch38 -p1
+%patch39 -p1
+%patch40 -p1
+%patch41 -p1
+%patch42 -p1
+%patch43 -p1
+%patch44 -p1
+%patch45 -p1
+%patch46 -p1
 
 
 %build
@@ -2660,6 +2684,12 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Mon Aug 8 2022 benniaobufeijiushiji <linda7@huawei.com> - 10.3.1-13
+- Type:Sync
+- ID:NA
+- SUG:NA
+- DESC:Sync patch from openeuler/gcc
+
 * Fri Jul 08 2022 zhaomengmeng <zhaomengmeng@kylinos.cn> - 10.3.1-12
 - Type:SPEC
 - ID:NA