diff --git a/accelerate-libs.tar.gz b/accelerate-libs.tar.gz
new file mode 100644
index 0000000000000000000000000000000000000000..3dae67a706f2a1b6c06ade5295a0bbf7cdb0b241
Binary files /dev/null and b/accelerate-libs.tar.gz differ
diff --git a/ccmp-instruction-matching.patch b/ccmp-instruction-matching.patch
new file mode 100644
index 0000000000000000000000000000000000000000..845294922faff088258834e13482096ca5218528
--- /dev/null
+++ b/ccmp-instruction-matching.patch
@@ -0,0 +1,415 @@
+diff --git a/gcc/ccmp.c b/gcc/ccmp.c
+index ee8e478..86916a9 100644
+--- a/gcc/ccmp.c
++++ b/gcc/ccmp.c
+@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "cfgexpand.h"
+ #include "ccmp.h"
+ #include "predict.h"
++#include "gimple-iterator.h"
+ 
+ /* Check whether T is a simple boolean variable or a SSA name
+    set by a comparison operator in the same basic block.  */
+@@ -129,6 +130,37 @@ ccmp_candidate_p (gimple *g)
+   return false;
+ }
+ 
++/* Check whether bb is a potential conditional compare candidate.  */
++bool check_ccmp_candidate (basic_block bb)
++{
++  gimple_stmt_iterator bsi;
++  gimple *bb_last_stmt, *stmt;
++  tree op0, op1;
++
++  bsi = gsi_last_bb (bb);
++  bb_last_stmt = gsi_stmt (bsi);
++
++  if (bb_last_stmt && gimple_code (bb_last_stmt) == GIMPLE_COND)
++    {
++      op0 = gimple_cond_lhs (bb_last_stmt);
++      op1 = gimple_cond_rhs (bb_last_stmt);
++
++      if (TREE_CODE (op0) == SSA_NAME
++	  && TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE
++	  && TREE_CODE (op1) == INTEGER_CST
++	  && ((gimple_cond_code (bb_last_stmt) == NE_EXPR)
++	      || (gimple_cond_code (bb_last_stmt) == EQ_EXPR)))
++	{
++	  stmt = SSA_NAME_DEF_STMT (op0);
++	  if (stmt && gimple_code (stmt) == GIMPLE_ASSIGN)
++	    {
++	      return ccmp_candidate_p (stmt);
++	    }
++	}
++    }
++  return false;
++}
++
+ /* Extract the comparison we want to do from the tree.  */
+ void
+ get_compare_parts (tree t, int *up, rtx_code *rcode,
+diff --git a/gcc/ccmp.h b/gcc/ccmp.h
+index cd38db6..f364eb7 100644
+--- a/gcc/ccmp.h
++++ b/gcc/ccmp.h
+@@ -21,5 +21,6 @@ along with GCC; see the file COPYING3.  If not see
+ #define GCC_CCMP_H
+ 
+ extern rtx expand_ccmp_expr (gimple *, machine_mode);
++extern bool check_ccmp_candidate (basic_block bb);
+ 
+ #endif  /* GCC_CCMP_H  */
+diff --git a/gcc/common.opt b/gcc/common.opt
+index d342c4f..c02cba7 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -1820,6 +1820,10 @@ fira-verbose=
+ Common RejectNegative Joined UInteger Var(flag_ira_verbose) Init(5)
+ -fira-verbose=<number>	Control IRA's level of diagnostic messages.
+ 
++fenhance-ccmp-candidate
++Common Report Var(flag_enhance_ccmp_candidate) Init(0) Optimization
++Enhance ccmp candidate in expand pass.
++
+ fivopts
+ Common Report Var(flag_ivopts) Init(1) Optimization
+ Optimize induction variables on trees.
+diff --git a/gcc/testsuite/gcc.target/aarch64/ccmp_3.c b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+new file mode 100644
+index 0000000..6219ccd
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/ccmp_3.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O -fdump-rtl-expand-details -fenhance_ccmp_candidate" } */
++
++int func (int a, int b, int c)
++{
++  while(1)
++    {
++      if(a-- == 0 || b >= c)
++	{
++	  return 1;
++	}
++    }
++}
++
++/* { dg-final { scan-assembler-times "\tccmp\t" 1} } */
+diff --git a/gcc/tree-ssa-coalesce.c b/gcc/tree-ssa-coalesce.c
+index 2ea0a66..b116627 100644
+--- a/gcc/tree-ssa-coalesce.c
++++ b/gcc/tree-ssa-coalesce.c
+@@ -38,6 +38,9 @@ along with GCC; see the file COPYING3.  If not see
+ #include "explow.h"
+ #include "tree-dfa.h"
+ #include "stor-layout.h"
++#include "ccmp.h"
++#include "target.h"
++#include "tree-outof-ssa.h"
+ 
+ /* This set of routines implements a coalesce_list.  This is an object which
+    is used to track pairs of ssa_names which are desirable to coalesce
+@@ -853,6 +856,257 @@ live_track_clear_base_vars (live_track *ptr)
+   bitmap_clear (&ptr->live_base_var);
+ }
+ 
++/* Return true if gimple is a copy assignment.  */
++
++static inline bool
++gimple_is_assign_copy_p (gimple *gs)
++{
++  return (is_gimple_assign (gs) && gimple_assign_copy_p (gs)
++    && TREE_CODE (gimple_assign_lhs (gs)) == SSA_NAME
++    && TREE_CODE (gimple_assign_rhs1 (gs)) == SSA_NAME);
++}
++
++/* Get the live for ccmp candidate.  */
++
++static void
++get_ccmp_candidate_live (live_track *def_live, live_track *live,
++  basic_block bb, var_map map, gimple_stmt_iterator gsi)
++{
++  tree def;
++  ssa_op_iter iter;
++  for (gimple_stmt_iterator bsi = gsi_last_bb (bb);
++       (gsi_stmt (bsi) != gsi_stmt (gsi)); gsi_prev (&bsi))
++    {
++      gimple *stmt = gsi_stmt (bsi);
++      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
++	{
++	  int p = var_to_partition (map, def);
++	  if (p == NO_PARTITION)
++	    {
++	      break;
++	    }
++	  int root = basevar_index (map, p);
++	  if (bitmap_bit_p (&def_live->live_base_var, root)
++	      && bitmap_bit_p (&def_live->live_base_partitions[root], p))
++	    {
++	      bitmap_set_bit (&live->live_base_var, root);
++	      bitmap_set_bit (&live->live_base_partitions[root], p);
++	    }
++	}
++    }
++}
++
++/* This routine will process a DEF in for ccmp candidate.  */
++
++static void
++live_track_for_ccmp_candidate (live_track *def_live, var_map map,
++  basic_block bb, bitmap use_partition, live_track *live)
++{
++  gimple_stmt_iterator gsi = gsi_last_bb (bb);
++  gimple *last_stmt = gsi_stmt (gsi);
++
++  tree use;
++  ssa_op_iter iter;
++
++  auto_vec<tree> stack;
++  stack.safe_push (gimple_cond_lhs (last_stmt));
++
++  while (!stack.is_empty ())
++    {
++      tree first = stack.pop ();
++      gimple *first_stmt = SSA_NAME_DEF_STMT (first);
++
++      if (gimple_bb (first_stmt) != bb || !is_gimple_assign (first_stmt)
++	  || !ssa_is_replaceable_p (first_stmt))
++	{
++	  continue;
++	}
++
++      gsi = gsi_for_stmt (first_stmt);
++
++      FOR_EACH_SSA_TREE_OPERAND (use, first_stmt, iter, SSA_OP_USE)
++	{
++	  if (TREE_CODE (use) == SSA_NAME)
++	    {
++	      stack.safe_push (use);
++	      int p = var_to_partition (map, use);
++	      if (p == NO_PARTITION)
++		{
++		  continue;
++		}
++	      int root = basevar_index (map, p);
++	      bitmap_set_bit (use_partition, root);
++	    }
++	}
++    }
++  gsi_prev (&gsi);
++  get_ccmp_candidate_live (def_live, live, bb, map, gsi);
++}
++
++/* Process def from the same base partition.  */
++
++static bool
++exist_same_base_partition_def (live_track *def_live, var_map map,
++			       basic_block bb)
++{
++  live_track *live;
++  live = new_live_track (map);
++  bitmap same_base_partition_def = BITMAP_ALLOC (NULL);
++  bitmap ccmp_use_partition = BITMAP_ALLOC (NULL);
++  live_track_for_ccmp_candidate (def_live, map, bb, ccmp_use_partition, live);
++  bitmap base_var;
++  bitmap_iterator bvi;
++  unsigned x;
++  base_var = &live->live_base_var;
++  EXECUTE_IF_SET_IN_BITMAP (base_var, 0, x, bvi)
++    {
++      bitmap var_partition;
++      bitmap_iterator bpi;
++      unsigned y = 0;
++      var_partition = &live->live_base_partitions[x];
++      int num = bitmap_count_bits (var_partition);
++      if (num == 1)
++	{
++	  continue;
++	}
++      EXECUTE_IF_SET_IN_BITMAP (var_partition, 0, y, bpi)
++	{
++	  gimple *stmt = SSA_NAME_DEF_STMT (partition_to_var (map, y));
++	  if (!is_gimple_assign (stmt))
++	    {
++	      continue;
++	    }
++	  if (gimple_is_assign_copy_p (stmt))
++	    {
++	      bitmap_set_bit (same_base_partition_def, x);
++	      break;
++	    }
++	}
++    }
++  delete_live_track (live);
++  bitmap_iterator dvi;
++  unsigned d;
++  EXECUTE_IF_SET_IN_BITMAP (same_base_partition_def, 0, d, dvi)
++    {
++      if (bitmap_bit_p (ccmp_use_partition, d))
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "\ndiff dump useanddef partition for"
++				  " debug  : bb[%d]: %d\n", bb->index, d);
++	    }
++	  return true;
++	}
++    }
++  return false;
++}
++
++#define MAX_CCMP_CONFLICT_NUM 5
++
++/* Clear high-cost conflict graphs.  */
++
++static inline void
++remove_high_cost_graph_for_ccmp (ssa_conflicts * conflict_graph)
++{
++  unsigned x = 0;
++  int add_conflict_num = 0;
++  bitmap b;
++  FOR_EACH_VEC_ELT (conflict_graph->conflicts, x, b)
++    {
++      if (b)
++	{
++	  add_conflict_num++;
++	}
++    }
++  if (add_conflict_num>=MAX_CCMP_CONFLICT_NUM)
++    {
++      conflict_graph->conflicts.release ();
++    }
++}
++
++/* Adding a new conflict graph to the original graph.  */
++
++static void
++process_add_graph (gimple_stmt_iterator bsi, live_track *live,
++	basic_block bb, ssa_conflicts * conflict_graph)
++{
++  tree use, def;
++  ssa_op_iter iter;
++  for (gimple_stmt_iterator gsi = gsi_last_bb (bb);
++      (gsi_stmt (gsi) != gsi_stmt (bsi)); gsi_prev (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++      if (gimple_visited_p (gsi_stmt (gsi)) && is_gimple_debug (stmt))
++	{
++	  continue;
++	}
++      if (gimple_is_assign_copy_p (stmt))
++	{
++	  live_track_clear_var (live, gimple_assign_rhs1 (stmt));
++	}
++      FOR_EACH_SSA_TREE_OPERAND (def, stmt, iter, SSA_OP_DEF)
++	{
++	  live_track_process_def (live, def, conflict_graph);
++	}
++      FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
++	{
++	  live_track_process_use (live, use);
++	}
++    }
++}
++
++/* Build a conflict graph based on ccmp candidate.  */
++
++static void
++add_ccmp_conflict_graph (ssa_conflicts * conflict_graph,
++  tree_live_info_p liveinfo, var_map map, basic_block bb)
++{
++  live_track *live;
++  tree use, def;
++  ssa_op_iter iter;
++  live = new_live_track (map);
++  live_track_init (live, live_on_exit (liveinfo, bb));
++  if (exist_same_base_partition_def (live, map, bb))
++    {
++      return;
++    }
++  gimple_stmt_iterator visited_gsi = gsi_last_bb (bb);
++  gimple *last_stmt = gsi_stmt (visited_gsi);
++  auto_vec<tree> stack;
++  stack.safe_push (gimple_cond_lhs (last_stmt));
++  while (!stack.is_empty ())
++    {
++      tree first = stack.pop ();
++      gimple *first_stmt = SSA_NAME_DEF_STMT (first);
++      if (gimple_bb (first_stmt) != bb || !is_gimple_assign (first_stmt)
++	  || !ssa_is_replaceable_p (first_stmt))
++	{
++	  continue;
++	}
++      if (gimple_is_assign_copy_p (first_stmt))
++	{
++	  live_track_clear_var (live, gimple_assign_rhs1 (first_stmt));
++	}
++      gimple_set_visited (first_stmt, true);
++      visited_gsi = gsi_for_stmt (first_stmt);
++      FOR_EACH_SSA_TREE_OPERAND (def, first_stmt, iter, SSA_OP_DEF)
++	{
++	  live_track_process_def (live, def, conflict_graph);
++	}
++      FOR_EACH_SSA_TREE_OPERAND (use, first_stmt, iter, SSA_OP_USE)
++	{
++	  if (TREE_CODE (use) == SSA_NAME)
++	    {
++	      stack.safe_push (use);
++	      live_track_process_use (live, use);
++	    }
++	}
++    }
++  gsi_prev (&visited_gsi);
++  process_add_graph (visited_gsi, live, bb, conflict_graph);
++  delete_live_track (live);
++  remove_high_cost_graph_for_ccmp (conflict_graph);
++}
+ 
+ /* Build a conflict graph based on LIVEINFO.  Any partitions which are in the
+    partition view of the var_map liveinfo is based on get entries in the
+@@ -937,6 +1191,45 @@ build_ssa_conflict_graph (tree_live_info_p liveinfo)
+ 	    live_track_process_use (live, var);
+ 	}
+ 
++  if (flag_enhance_ccmp_candidate
++      && targetm.gen_ccmp_first
++      && check_ccmp_candidate (bb))
++    {
++      for (gimple_stmt_iterator bsi = gsi_start_bb (bb);!gsi_end_p (bsi);
++	   gsi_next (&bsi))
++	{
++	  gimple_set_visited (gsi_stmt (bsi), false);
++	}
++      ssa_conflicts *ccmp_conflict_graph;
++      ccmp_conflict_graph = ssa_conflicts_new (num_var_partitions (map));
++      add_ccmp_conflict_graph (ccmp_conflict_graph, liveinfo, map, bb);
++      unsigned x;
++      bitmap b;
++      if (ccmp_conflict_graph)
++	{
++	  FOR_EACH_VEC_ELT (ccmp_conflict_graph->conflicts, x, b)
++	    {
++	      if (b)
++		{
++		  unsigned y = bitmap_first_set_bit (b);
++		  if ((!graph->conflicts[x]
++		       || !bitmap_bit_p (graph->conflicts[x], y))
++		      && bitmap_single_bit_set_p(ccmp_conflict_graph->conflicts[x])
++		      && bitmap_single_bit_set_p (ccmp_conflict_graph->conflicts[y]))
++		    {
++		      ssa_conflicts_add_one (graph, x, y);
++		      if (dump_file && (dump_flags & TDF_DETAILS))
++			{
++			  fprintf (dump_file, "\nchange code for add confict-ssa : "
++					      "bb[%d]  %d:%d\n", bb->index, x, y);
++			}
++		    }
++		}
++	    }
++	}
++      ssa_conflicts_delete (ccmp_conflict_graph);
++    }
++
+       /* If result of a PHI is unused, looping over the statements will not
+ 	 record any conflicts since the def was never live.  Since the PHI node
+ 	 is going to be translated out of SSA form, it will insert a copy.
diff --git a/dont-move-cold-bb-instructions-to-preheader-in-RTL.patch b/dont-move-cold-bb-instructions-to-preheader-in-RTL.patch
new file mode 100644
index 0000000000000000000000000000000000000000..fd8803ff115fdade9370ccd25d201faba24d691c
--- /dev/null
+++ b/dont-move-cold-bb-instructions-to-preheader-in-RTL.patch
@@ -0,0 +1,54 @@
+This backport contains 1 patch from gcc pipermail patches.
+The url of these patchs list as following in the order of time.
+
+0001-backport-dont-move-cold-bb-instructions-to-preheader-in-RTL.patch
+https://gcc.gnu.org/pipermail/gcc-patches/2021-December/586372.html
+
+diff --git a/gcc/common.opt b/gcc/common.opt
+index d4da697..3e46d37 100644
+--- a/gcc/common.opt
++++ b/gcc/common.opt
+@@ -2724,6 +2724,10 @@ ftree-loop-im
+ Common Report Var(flag_tree_loop_im) Init(1) Optimization
+ Enable loop invariant motion on trees.
+ 
++flim-count-check
++Common Report Var(flag_lim_count_check) Init(0) Optimization
++Check bb count for not hoisting cold code to hotter bbs.
++
+ ftree-loop-linear
+ Common Alias(floop-nest-optimize)
+ Enable loop nest transforms.  Same as -floop-nest-optimize.
+diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
+index 1af8887..4ad21cd 100644
+--- a/gcc/loop-invariant.c
++++ b/gcc/loop-invariant.c
+@@ -1184,10 +1184,18 @@ find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed)
+    call.  */
+ 
+ static void
+-find_invariants_bb (basic_block bb, bool always_reached, bool always_executed)
++find_invariants_bb (struct loop *loop, basic_block bb, bool always_reached,
++		    bool always_executed)
+ {
+   rtx_insn *insn;
+ 
++  basic_block preheader = loop_preheader_edge (loop)->src;
++
++  if (flag_lim_count_check && preheader->count > bb->count)
++    {
++      return;
++    }
++
+   FOR_BB_INSNS (bb, insn)
+     {
+       if (!NONDEBUG_INSN_P (insn))
+@@ -1215,7 +1223,7 @@ find_invariants_body (struct loop *loop, basic_block *body,
+   unsigned i;
+ 
+   for (i = 0; i < loop->num_nodes; i++)
+-    find_invariants_bb (body[i],
++    find_invariants_bb (loop, body[i],
+ 			bitmap_bit_p (always_reached, i),
+ 			bitmap_bit_p (always_executed, i));
+ }
diff --git a/gcc.spec b/gcc.spec
index 368e0e8f450d91ce21aa1d0f9a41bee1d9d7c187..a6dbf9cb831550da4736600cd3f5e8cb0f2998c4 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,4 +1,4 @@
-%global DATE 20210428
+%global DATE 20211230
 
 %global gcc_version 9.3.1
 %global gcc_major 9.3.1
@@ -59,11 +59,12 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: %{DATE}.19
+Release: %{DATE}.20
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
 URL: https://gcc.gnu.org
 
 Source0: https://ftp.gnu.org/gnu/gcc/gcc-9.3.0/gcc-9.3.0.tar.xz
+Source1: accelerate-libs.tar.gz
 %global isl_version 0.16.1
 
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n)
@@ -236,7 +237,11 @@ Patch120: revert-moutline-atomics.patch
 Patch121: fix-ICE-in-eliminate-stmt.patch
 Patch122: revise-type-before-build-MULT.patch
 Patch123: Simplify-X-C1-C2.patch
-
+Patch124: struct-reorder-fields.patch
+Patch125: loop-distribution-and-slp-vectorization.patch
+Patch126: ccmp-instruction-matching.patch
+Patch127: tree-optimization-92328-fix-value-number-with-bogus-type.patch
+Patch128: dont-move-cold-bb-instructions-to-preheader-in-RTL.patch
 %global gcc_target_platform %{_arch}-linux-gnu
 
 %if %{build_go}
@@ -673,8 +678,14 @@ This package contains header files and other support files
 for compiling GCC plugins.  The GCC plugin ABI is currently
 not stable, so plugins must be rebuilt any time GCC is updated.
 
+%package -n accelerate-libs
+Summary: GCC acceleration runtime libs
+
+%description -n accelerate-libs
+This package includes jemalloc, mathlib and stringlib
+
 %prep
-%setup -q -n gcc-9.3.0
+%setup -q -n gcc-9.3.0 -a 1
 /bin/pwd
 
 %patch0 -p1
@@ -800,7 +811,11 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch121 -p1
 %patch122 -p1
 %patch123 -p1
-
+%patch124 -p1
+%patch125 -p1
+%patch126 -p1
+%patch127 -p1
+%patch128 -p1
 
 %build
 
@@ -968,6 +983,8 @@ make prefix=%{buildroot}%{_prefix} mandir=%{buildroot}%{_mandir} \
 chmod 644 %{buildroot}%{_infodir}/gnat*
 %endif
 
+cp -rpd %{_builddir}/gcc-9.3.0/accelerate-libs/* $RPM_BUILD_ROOT%{_prefix}/
+
 FULLPATH=%{buildroot}%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}
 FULLEPATH=%{buildroot}%{_prefix}/libexec/gcc/%{gcc_target_platform}/%{gcc_major}
 
@@ -2731,7 +2748,35 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/libcp1plugin.so*
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
+%ifarch aarch64
+%files -n accelerate-libs
+%dir %{_prefix}/include/jemalloc
+%{_prefix}/include/jemalloc/jemalloc.h
+%{_prefix}/lib64/libjemalloc*
+%dir %{_prefix}/lib64/pkgconfig
+%{_prefix}/lib64/pkgconfig/jemalloc.pc
+%dir %{_prefix}/lib64/libhpc
+%{_prefix}/lib64/libhpc/libjemalloc*
+%dir %{_prefix}/lib64/libhpc/pkgconfig
+%{_prefix}/lib64/libhpc/pkgconfig/jemalloc.pc
+%{_prefix}/include/mathlib.h
+%{_prefix}/lib64/libmathlib*
+%{_prefix}/lib64/libhpc/libmathlib*
+%{_prefix}/include/stringlib.h
+%{_prefix}/lib64/libstringlib*
+%{_prefix}/lib64/libhpc/libstringlib*
+%endif
+
 %changelog
+*Thu Dec 30 2021 benniaobufeijiushiji <linda7@huawei.com> - 9.3.1-20211230.20
+- struct-reorder-fields.patch: New file
+- loop-distribution-and-slp-vectorization.patch: New file
+- ccmp-instruction-matching.patch: New file
+- tree-optimization-92328-fix-value-number-with-bogus-type.patch: New file
+- dont-move-cold-bb-instructions-to-preheader-in-RTL.patch: New file
+- accelerate-libs.tar.gz: New file
+- gcc.spec: Add new patches and runtime libs
+
 * Wed Apr 28 2021 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20210428.19
 - add-fp-model-options.patch: New file
 - enable-simd-math.patch: Enable simd math library in C and Fortran
diff --git a/loop-distribution-and-slp-vectorization.patch b/loop-distribution-and-slp-vectorization.patch
new file mode 100644
index 0000000000000000000000000000000000000000..b18ccdb956b6344c1fbc8aa50ca64f993934d618
--- /dev/null
+++ b/loop-distribution-and-slp-vectorization.patch
@@ -0,0 +1,5172 @@
+diff -Nurp a/gcc/cfgloop.h b/gcc/cfgloop.h
+--- a/gcc/cfgloop.h	2021-12-02 14:17:01.809460600 +0800
++++ b/gcc/cfgloop.h	2021-12-02 15:00:44.233274100 +0800
+@@ -228,6 +228,9 @@ struct GTY ((chain_next ("%h.next"))) lo
+      flag_finite_loops or similar pragmas state.  */
+   unsigned finite_p : 1;
+ 
++  /* True if we are processing this loop in pass ldist.  */
++  unsigned processing_ldist : 1;
++
+   /* The number of times to unroll the loop.  0 means no information given,
+      just do what we always do.  A value of 1 means do not unroll the loop.
+      A value of USHRT_MAX means unroll with no specific unrolling factor.
+diff -Nurp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2021-12-02 14:17:26.165538800 +0800
++++ b/gcc/common.opt	2021-12-02 15:02:27.964014800 +0800
+@@ -2916,6 +2916,10 @@ ftree-vect-analyze-slp-group
+ Common Report Var(flag_tree_slp_group) Init(0)
+ Disable SLP vectorization for reduction chain on tree.
+ 
++ftree-slp-transpose-vectorize
++Common Report Var(flag_tree_slp_transpose_vectorize) Optimization Init(0)
++Enable basic block vectorization (SLP) for transposed stores and loads on trees.
++
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+ -fvect-cost-model=[unlimited|dynamic|cheap]	Specifies the cost model for vectorization.
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c	2021-12-02 14:33:29.334794300 +0800
+@@ -0,0 +1,69 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-do run } */
++/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++static unsigned inline abs2 (unsigned a)
++{
++  unsigned s = ((a>>15)&0x10001)*0xffff;
++  return (a+s)^s;
++}
++
++int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
++{
++  unsigned tmp[4][4];
++  unsigned a0, a1, a2, a3;
++  int sum = 0;
++  for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
++    {
++      a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
++      a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
++      a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
++      a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
++      int t0 = a0 + a1;
++      int t1 = a0 - a1;
++      int t2 = a2 + a3;
++      int t3 = a2 - a3;
++      tmp[i][0] = t0 + t2;
++      tmp[i][2] = t0 - t2;
++      tmp[i][1] = t1 + t3;
++      tmp[i][3] = t1 - t3;
++    }
++  for (int i = 0; i < 4; i++)
++    {
++      int t0 = tmp[0][i] + tmp[1][i];
++      int t1 = tmp[0][i] - tmp[1][i];
++      int t2 = tmp[2][i] + tmp[3][i];
++      int t3 = tmp[2][i] - tmp[3][i];
++      a0 = t0 + t2;
++      a2 = t0 - t2;
++      a1 = t1 + t3;
++      a3 = t1 - t3;
++      sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3);
++    }
++  return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1;
++}
++
++int main ()
++{
++  unsigned char oxa[128] = {0};
++  unsigned char oxb[128] = {0};
++  for (int i = 0; i < 128; i++)
++    {
++      oxa[i] += i * 3;
++      oxb[i] = i * 2;
++    }
++  int sum = foo (oxa, 16, oxb, 32);
++  if (sum != 736)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
++/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */
++/* { dg-final { scan-assembler-times {\tzip1\t} 8 } } */
++/* { dg-final { scan-assembler-times {\tzip2\t} 8 } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c	2021-12-02 14:33:29.347850600 +0800
+@@ -0,0 +1,17 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
++
++unsigned a0[4], a1[4], a2[4], a3[4];
++
++void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
++{
++  for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
++    {
++      a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16);
++      a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16);
++      a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16);
++      a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16);
++    }
++}
++
++/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c	2021-12-02 14:33:29.347850600 +0800
+@@ -0,0 +1,19 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */
++
++unsigned a0[4], a1[4], a2[4], a3[4];
++
++void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib)
++{
++  for (int i = 0; i < 4; i++, oxa += ia, oxb += ib)
++    {
++      a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1;
++      a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2;
++      a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3;
++      a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4;
++    }
++}
++
++/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */
++/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */
++/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-1.c b/gcc/testsuite/gcc.dg/vect/transpose-1.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-1.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-1.c	2021-12-02 15:04:48.597349000 +0800
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 16;
++  int i2 = 8;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 2;
++	input2[i] = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1264)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-2.c b/gcc/testsuite/gcc.dg/vect/transpose-2.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-2.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-2.c	2021-12-02 15:04:51.505138500 +0800
+@@ -0,0 +1,50 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 8
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned short c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++    }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i] + c2[i] + c3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 5;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1440)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-3.c b/gcc/testsuite/gcc.dg/vect/transpose-3.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-3.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-3.c	2021-12-02 15:04:56.148147200 +0800
+@@ -0,0 +1,54 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned short *pix1, int i_pix1, unsigned short *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned short input1[M];
++  unsigned short input2[M];
++  int i1 = 8;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 1680)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-4.c b/gcc/testsuite/gcc.dg/vect/transpose-4.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-4.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-4.c	2021-12-02 15:04:59.972811700 +0800
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++
++int foo (unsigned *pix1, int i_pix1, unsigned *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned c0[N], c1[N], c2[N], c3[N], c4[N], c5[N], c6[N], c7[N];
++  for (i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++      c2[i] = pix1[2] - pix2[2];
++      c3[i] = pix1[3] - pix2[3];
++      c4[i] = pix1[4] - pix2[4];
++      c5[i] = pix1[5] - pix2[5];
++      c6[i] = pix1[6] - pix2[6];
++      c7[i] = pix1[7] - pix2[7];
++    }
++  for (int i = 0; i < N; i++)
++     {
++      sum += c0[i] + c1[i] + c2[i] + c3[i] + c4[i] + c5[i] + c6[i] + c7[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned input1[M];
++  unsigned input2[M];
++  int i1 = 12;
++  int i2 = 6;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 7;
++	input2[i] = i * 3;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 3616)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-5.c b/gcc/testsuite/gcc.dg/vect/transpose-5.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-5.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-5.c	2021-12-02 15:05:02.424844300 +0800
+@@ -0,0 +1,73 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include <math.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++#define eps 1e-8
++
++double foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  unsigned a0[N];
++  unsigned a1[N];
++  unsigned a2[N];
++  unsigned a3[N];
++
++  int b0[N];
++  int b1[N];
++  int b2[N];
++  int b3[N];
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      a0[i] = (pix1[0] - pix2[0]) + ((pix1[4] + pix2[4]) << 16);
++      a1[i] = (pix1[1] - pix2[1]) + ((pix1[5] + pix2[5]) << 16);
++      a2[i] = (pix1[2] - pix2[2]) + ((pix1[6] + pix2[6]) << 16);
++      a3[i] = (pix1[3] - pix2[3]) + ((pix1[7] + pix2[7]) << 16);
++    }
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      b0[i] = (pix1[0] - pix2[0]) + (pix1[4] + pix2[4]);
++      b1[i] = (pix1[1] - pix2[1]) + (pix1[5] + pix2[5]);
++      b2[i] = (pix1[2] - pix2[2]) + (pix1[6] + pix2[6]);
++      b3[i] = (pix1[3] - pix2[3]) + (pix1[7] + pix2[7]);
++    }
++
++  double sum = 0;
++  for (int i = 0; i < N; i++)
++    {
++      sum += a0[i] + a1[i] + a2[i] + a3[i] + b0[i] + b1[i] + b2[i] + b3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 8;
++  int i2 = 3;
++  unsigned char m = 2;
++  unsigned short n = 12;
++  float t = 3.0;
++  double k = 4.2;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 6;
++	input2[i] = i * 3;
++    }
++  double sum = foo (input1, i1, input2, i2);
++  if (fabs (sum - 78648144) > eps)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-6.c b/gcc/testsuite/gcc.dg/vect/transpose-6.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-6.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-6.c	2021-12-02 15:05:04.276533100 +0800
+@@ -0,0 +1,67 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-require-effective-target vect_int } */
++/* { dg-require-effective-target vect_float } */
++#include <stdio.h>
++#include <stdlib.h>
++#include <math.h>
++#include "tree-vect.h"
++
++#define N 4
++#define M 256
++#define eps 1e-8
++
++float foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  unsigned a0[N];
++  unsigned a1[N];
++  unsigned a2[N];
++  unsigned a3[N];
++
++  float c0[N];
++  float c1[N];
++  float c2[N];
++  float c3[N];
++
++  for (int i = 0; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      a0[i] = (pix1[0] - pix2[0]) + ((pix1[4] - pix2[4]) << 16);
++      a1[i] = (pix1[1] - pix2[1]) + ((pix1[5] - pix2[5]) << 16);
++      a2[i] = (pix1[2] - pix2[2]) + ((pix1[6] - pix2[6]) << 16);
++      a3[i] = (pix1[3] - pix2[3]) + ((pix1[7] - pix2[7]) << 16);
++
++      c0[i] = (pix1[0] * pix2[0]) + (pix1[4] * pix2[4]);
++      c1[i] = (pix1[1] * pix2[1]) + (pix1[5] * pix2[5]);
++      c2[i] = (pix1[2] * pix2[2]) + (pix1[6] * pix2[6]);
++      c3[i] = (pix1[3] * pix2[3]) + (pix1[7] * pix2[7]);
++    }
++
++  float sum = 0;
++  for (int i = 0; i < N; i++)
++    {
++      sum += a0[i] + a1[i] + a2[i] + a3[i] + c0[i] + c1[i] + c2[i] + c3[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 18;
++  int i2 = 6;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 4;
++	input2[i] = i * 2;
++    }
++  float sum = foo (input1, i1, input2, i2);
++  if (fabs (sum - 106041168) > eps) 
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizable_store for slp transpose" 2 "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-7.c b/gcc/testsuite/gcc.dg/vect/transpose-7.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-7.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-7.c	2021-12-02 15:05:07.433047500 +0800
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 16
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned char c0[N], c1[N];
++  for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++    }
++  for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++   }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 6;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 5;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 3280)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/transpose-8.c b/gcc/testsuite/gcc.dg/vect/transpose-8.c
+--- a/gcc/testsuite/gcc.dg/vect/transpose-8.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/transpose-8.c	2021-12-02 15:05:12.626561600 +0800
+@@ -0,0 +1,53 @@
++/* { dg-do compile { target { aarch64*-*-linux* } } } */
++/* { dg-additional-options "-fno-tree-loop-vectorize" } */
++/* { dg-require-effective-target vect_int } */
++#include <stdio.h>
++#include <stdlib.h>
++#include "tree-vect.h"
++
++#define N 32
++#define M 256
++
++int foo (unsigned char *pix1, int i_pix1, unsigned char *pix2, int i_pix2)
++{
++  int i = 0;
++  int sum = 0;
++  unsigned char c0[N], c1[N];
++  for (int i = 0; i < N/2; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++    }
++  for (int i = N/2; i < N; i++, pix1 += i_pix1, pix2 += i_pix2)
++    {
++      c0[i] = pix1[0] - pix2[0];
++      c1[i] = pix1[1] - pix2[1];
++   }
++  for (int i = 0; i < N; i++)
++    {
++      sum += c0[i] + c1[i];
++    }
++  return sum;
++}
++
++int main (int argc, const char* argv[])
++{
++  unsigned char input1[M];
++  unsigned char input2[M];
++  int i1 = 6;
++  int i2 = 4;
++  check_vect ();
++  for (int i = 0; i < M; i++)
++    {
++	input1[i] = i * 5;
++	input2[i] = i * 2;
++    }
++  int sum = foo (input1, i1, input2, i2);
++  if (sum != 7584)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized using transposed version" "slp1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect.exp b/gcc/testsuite/gcc.dg/vect/vect.exp
+--- a/gcc/testsuite/gcc.dg/vect/vect.exp	2021-12-02 11:58:01.012117000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/vect.exp	2021-12-02 14:33:29.423164300 +0800
+@@ -114,6 +114,13 @@ et-dg-runtest dg-runtest [lsort \
+ 	[glob -nocomplain $srcdir/$subdir/no-vfa-*.\[cS\]]] \
+ 	"" $DEFAULT_VECTCFLAGS
+ 
++# -ftree-slp-transpose-vectorize SLP tests
++set VECT_SLP_CFLAGS $SAVED_VECT_SLP_CFLAGS
++lappend VECT_SLP_CFLAGS "-ftree-slp-transpose-vectorize"
++et-dg-runtest dg-runtest [lsort \
++	[glob -nocomplain $srcdir/$subdir/transpose-*.\[cS\]]] \
++	"" "-ftree-slp-transpose-vectorize -fdump-tree-slp-details -O3"
++
+ # -ffast-math tests
+ set DEFAULT_VECTCFLAGS $SAVED_DEFAULT_VECTCFLAGS
+ lappend DEFAULT_VECTCFLAGS "-ffast-math"
+diff -Nurp a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
+--- a/gcc/tree-loop-distribution.c	2021-12-02 14:17:25.858988700 +0800
++++ b/gcc/tree-loop-distribution.c	2021-12-23 14:18:00.999081000 +0800
+@@ -36,6 +36,47 @@ along with GCC; see the file COPYING3.
+    |   D(I) = A(I-1)*E
+    |ENDDO
+ 
++   If an unvectorizable loop has grouped loads and calculation from grouped
++   loads is isomorphic, build temp arrays using stmts where isomorphic
++   calculation ends.  Afer distribution, the partition built from temp
++   arrays can be vectorized in pass SLP after loop unrolling.  For example,
++
++   |DO I = 1, N
++   |    A = FOO (ARG_1);
++   |    B = FOO (ARG_2);
++   |    C = BAR_0 (A);
++   |    D = BAR_1 (B);
++   |ENDDO
++
++   is transformed to
++
++   |DO I = 1, N
++   |    J = FOO (ARG_1);
++   |    K = FOO (ARG_2);
++   |    X[I] = J;
++   |    Y[I] = K;
++   |    A = X[I];
++   |    B = Y[I];
++   |    C = BAR_0 (A);
++   |    D = BAR_1 (B);
++   |ENDDO
++
++   and is then distributed to
++
++   |DO I = 1, N
++   |    J = FOO (ARG_1);
++   |    K = FOO (ARG_2);
++   |    X[I] = J;
++   |    Y[I] = K;
++   |ENDDO
++
++   |DO I = 1, N
++   |    A = X[I];
++   |    B = Y[I];
++   |    C = BAR_0 (A);
++   |    D = BAR_1 (B);
++   |ENDDO
++
+    Loop distribution is the dual of loop fusion.  It separates statements
+    of a loop (or loop nest) into multiple loops (or loop nests) with the
+    same loop header.  The major goal is to separate statements which may
+@@ -44,7 +85,9 @@ along with GCC; see the file COPYING3.
+ 
+      1) Seed partitions with specific type statements.  For now we support
+ 	two types seed statements: statement defining variable used outside
+-	of loop; statement storing to memory.
++	of loop; statement storing to memory.  Moreover, for unvectorizable
++	loops, we try to find isomorphic stmts from grouped load and build
++	temp arrays as new seed statements.
+      2) Build reduced dependence graph (RDG) for loop to be distributed.
+ 	The vertices (RDG:V) model all statements in the loop and the edges
+ 	(RDG:E) model flow and control dependencies between statements.
+@@ -103,6 +146,7 @@ along with GCC; see the file COPYING3.
+ #include "cfganal.h"
+ #include "gimple-iterator.h"
+ #include "gimplify-me.h"
++#include "gimplify.h"
+ #include "stor-layout.h"
+ #include "tree-cfg.h"
+ #include "tree-ssa-loop-manip.h"
+@@ -115,6 +159,9 @@ along with GCC; see the file COPYING3.
+ #include "params.h"
+ #include "tree-vectorizer.h"
+ #include "tree-eh.h"
++#include "optabs-tree.h"
++#include <map>
++#include <algorithm>
+ 
+ 
+ #define MAX_DATAREFS_NUM \
+@@ -194,6 +241,58 @@ struct rdg_vertex
+ #define RDG_MEM_WRITE_STMT(RDG, I) RDGV_HAS_MEM_WRITE (&(RDG->vertices[I]))
+ #define RDG_MEM_READS_STMT(RDG, I) RDGV_HAS_MEM_READS (&(RDG->vertices[I]))
+ 
++/* Results of isomorphic group analysis.  */
++#define UNINITIALIZED	(0)
++#define ISOMORPHIC	(1)
++#define HETEROGENEOUS	(1 << 1)
++#define UNCERTAIN	(1 << 2)
++
++/* Information of a stmt while analyzing isomorphic use.  */
++
++typedef struct _group_info
++{
++  gimple *stmt;
++
++  /* True if stmt can be a cut point.  */
++  bool cut_point;
++
++  /* For use_stmt with two rhses, one of which is the lhs of stmt.
++     If the other is unknown to be isomorphic, mark it uncertain.  */
++  bool uncertain;
++
++  /* Searching of isomorphic stmt reaches heterogeneous groups.  */
++  bool done;
++
++  _group_info ()
++    {
++      stmt = NULL;
++      cut_point = false;
++      uncertain = false;
++      done = false;
++    }
++} *group_info;
++
++/* Pair of cut points and corresponding profit.  */
++typedef std::pair<vec<gimple *> *, int> stmts_profit;
++
++/* Map of vector factor VF and corresponding stmts.  */
++typedef std::map<unsigned, stmts_profit> vf_stmts_profit_map;
++
++/* Pair of group_num and iteration_num.  We consider rhses from the same
++   group and interation are isomorphic.  */
++typedef std::pair<unsigned, unsigned> group_iteration;
++
++/* Pair of lhs of use_stmt and group-iteration pair.  */
++typedef std::pair<tree, group_iteration> isomer_info;
++
++/* An isomorphic stmt is detetmined by lhs of use_stmt, group_num and
++   the iteration_num when we insert this stmt to this map.  */
++typedef std::map<tree, group_iteration> isomer_stmt_lhs;
++
++/* Describe an uncertain stmt as <stmt, <rhs, <group_num, iteration>>>.
++   Rhs is the tree that we use to find use_stmt.  */
++typedef std::map<gimple *, isomer_info> uncertain_stmts;
++
+ /* Data dependence type.  */
+ 
+ enum rdg_dep_type
+@@ -1715,7 +1814,8 @@ classify_partition (loop_p loop, struct
+ 
+ static bool
+ share_memory_accesses (struct graph *rdg,
+-		       partition *partition1, partition *partition2)
++		       partition *partition1, partition *partition2,
++		       hash_set <tree>* tmp_array_vars)
+ {
+   unsigned i, j;
+   bitmap_iterator bi, bj;
+@@ -1749,8 +1849,13 @@ share_memory_accesses (struct graph *rdg
+ 	  if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0)
+ 	      && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0)
+ 	      && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0)
+-	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0))
+-	    return true;
++	      && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)
++	      /* An exception, if partition1 and partition2 contain the
++		 temp array we inserted, do not merge them.  */
++	      && !tmp_array_vars->contains (DR_REF (dr1)))
++	    {
++	      return true;
++	    }
+ 	}
+     }
+ 
+@@ -2700,15 +2805,50 @@ fuse_memset_builtins (vec<struct partiti
+     }
+ }
+ 
++/* Fuse PARTITIONS built from inserted temp arrays into one partition,
++   fuse the rest into another.  */
++
++static void
++merge_remaining_partitions (vec<struct partition *> *partitions,
++			    bitmap producers)
++{
++  struct partition *partition = NULL;
++  struct partition *p1 = NULL, *p2 = NULL;
++  for (unsigned i = 0; partitions->iterate (i, &partition); i++)
++    {
++      if (bitmap_intersect_p (producers, partition->stmts))
++	{
++	  if (p1 == NULL)
++	    {
++	      p1 = partition;
++	      continue;
++	    }
++	  partition_merge_into (NULL, p1, partition, FUSE_FINALIZE);
++	}
++      else
++	{
++	  if (p2 == NULL)
++	    {
++	      p2 = partition;
++	      continue;
++	    }
++	  partition_merge_into (NULL, p2, partition, FUSE_FINALIZE);
++	}
++      partitions->unordered_remove (i);
++      partition_free (partition);
++      i--;
++    }
++}
++
+ /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
+    ALIAS_DDRS contains ddrs which need runtime alias check.  */
+ 
+ static void
+ finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
+-		     vec<ddr_p> *alias_ddrs)
++		     vec<ddr_p> *alias_ddrs, bitmap producers)
+ {
+   unsigned i;
+-  struct partition *partition, *a;
++  struct partition *partition;
+ 
+   if (partitions->length () == 1
+       || alias_ddrs->length () > 0)
+@@ -2740,13 +2880,7 @@ finalize_partitions (struct loop *loop,
+       || (loop->inner == NULL
+ 	  && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
+     {
+-      a = (*partitions)[0];
+-      for (i = 1; partitions->iterate (i, &partition); ++i)
+-	{
+-	  partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
+-	  partition_free (partition);
+-	}
+-      partitions->truncate (1);
++      merge_remaining_partitions (partitions, producers);
+     }
+ 
+   /* Fuse memset builtins if possible.  */
+@@ -2754,6 +2888,1435 @@ finalize_partitions (struct loop *loop,
+     fuse_memset_builtins (partitions);
+ }
+ 
++/* Gimple uids of GIMPLE_DEBUG and GIMPLE_LABEL were changed during function
++   vect_analyze_loop, reset them to -1.  */
++
++static void
++reset_gimple_uid (loop_p loop)
++{
++  basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
++  for (int i = 0; i < int (loop->num_nodes); i++)
++    {
++      basic_block bb = bbs[i];
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  gimple *stmt = gsi_stmt (gsi);
++	  if (is_gimple_debug (stmt) || gimple_code (stmt) == GIMPLE_LABEL)
++	    {
++	      gimple_set_uid (stmt, -1);
++	    }
++	}
++    }
++  free (bbs);
++}
++
++static bool
++check_loop_vectorizable (loop_p loop)
++{
++  vec_info_shared shared;
++  loop->processing_ldist = true;
++  vect_analyze_loop (loop, &shared);
++  loop_vec_info vinfo = loop_vec_info_for_loop (loop);
++  loop->processing_ldist = false;
++  reset_gimple_uid (loop);
++  if (vinfo == NULL)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file,
++		   "Loop %d no temp array insertion: bad access pattern, unable"
++		   " to generate loop_vinfo.\n", loop->num);
++	}
++      return false;
++    }
++  if (vinfo->vectorizable)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Loop %d no temp array insertion: original loop"
++			      " will be vectorized without distribution.\n",
++			      loop->num);
++	}
++      delete vinfo;
++      loop->aux = NULL;
++      return false;
++    }
++  if (vinfo->grouped_loads.length () == 0)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Loop %d no temp array insertion: original loop"
++			      " has no grouped loads.\n" , loop->num);
++	}
++      delete vinfo;
++      loop->aux = NULL;
++      return false;
++    }
++  return true;
++}
++
++static inline void
++rebuild_rdg (loop_p loop, struct graph * &rdg, control_dependences *cd)
++{
++  free_rdg (rdg);
++  rdg = build_rdg (loop, cd);
++  gcc_checking_assert (rdg != NULL);
++}
++
++static bool
++may_insert_temp_arrays (loop_p loop, struct graph * &rdg,
++			control_dependences *cd)
++{
++  if (!flag_tree_slp_transpose_vectorize)
++    {
++      return false;
++    }
++  /* Only loops with two basic blocks HEADER and LATCH are supported.  HEADER
++     is the main body of a LOOP and LATCH is the basic block that controls the
++     LOOP execution.  Size of temp array is determined by the time LOOP is
++     executed so it must be a const.  */
++  tree loop_extent = number_of_latch_executions (loop);
++  if (loop->inner != NULL || loop->num_nodes > 2
++      || rdg->n_vertices > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)
++      || TREE_CODE (loop_extent) != INTEGER_CST)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Loop %d no temp array insertion: loop not"
++			      " simple enough to distribute.\n", loop->num);
++	}
++      return false;
++    }
++  if (loop->dont_vectorize)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Loop %d no temp array insertion: this loop"
++			      " should never be vectorized.\n",
++			      loop->num);
++	}
++      return false;
++    }
++
++  /* Do not distribute a LOOP that is able to be vectorized without
++     distribution.  */
++  if (!check_loop_vectorizable (loop))
++    {
++      rebuild_rdg (loop, rdg, cd);
++      return false;
++    }
++  rebuild_rdg (loop, rdg, cd);
++  return true;
++}
++
++/* Return max grouped loads' length if all groupes length satisfy len = 2 ^ n.
++   Otherwise, return 0.  */
++
++static unsigned
++get_max_vf (loop_vec_info vinfo)
++{
++  unsigned size = 0;
++  unsigned max = 0;
++  stmt_vec_info stmt_info;
++  unsigned i = 0;
++  FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
++    {
++      size = stmt_info->size;
++      if (!pow2p_hwi (size))
++	{
++	  return 0;
++	}
++      max = size > max ? size : max;
++    }
++  return max;
++}
++
++/* Convert grouped_loads from linked list to vector with length vf.  */
++
++static unsigned
++build_queue (loop_vec_info vinfo, unsigned vf,
++	     vec<vec<group_info> *> &worklists)
++{
++  stmt_vec_info stmt_info;
++  unsigned i = 0;
++  group_info ginfo = NULL;
++  vec<group_info> *worklist = NULL;
++  FOR_EACH_VEC_ELT (vinfo->grouped_loads, i, stmt_info)
++    {
++      unsigned group_size = stmt_info->size;
++      stmt_vec_info c_stmt_info = stmt_info;
++      bool succ = true;
++      while (group_size >= vf)
++	{
++	  vec_alloc (worklist, vf);
++	  for (unsigned j = 0; j < vf; ++j)
++	    {
++	      if (c_stmt_info == NULL)
++		{
++		  worklist->release ();
++		  succ = false;
++		  break;
++		}
++	      ginfo = new _group_info ();
++	      ginfo->stmt = c_stmt_info->stmt;
++	      worklist->safe_push (ginfo);
++	      c_stmt_info = c_stmt_info->next_element;
++	    }
++	  if (!succ)
++	    {
++	      break;
++	    }
++	  worklists.safe_push (worklist);
++	  group_size -= vf;
++	}
++    }
++  return worklists.length ();
++}
++
++static bool
++check_same_oprand_type (tree op1, tree op2)
++{
++  tree type1 = TREE_TYPE (op1);
++  tree type2 = TREE_TYPE (op2);
++  if (TREE_CODE (type1) != INTEGER_TYPE && TREE_CODE (type1) != REAL_TYPE)
++    {
++      return false;
++    }
++  return (TREE_CODE (type1) == TREE_CODE (type2)
++	  && TYPE_UNSIGNED (type1) == TYPE_UNSIGNED (type2)
++	  && TYPE_PRECISION (type1) == TYPE_PRECISION (type2));
++}
++
++static bool
++bit_field_p (gimple *stmt)
++{
++  unsigned i = 0;
++  auto_vec<data_reference_p, 2> datarefs_vec;
++  data_reference_p dr;
++  if (!find_data_references_in_stmt (NULL, stmt, &datarefs_vec))
++    {
++      return true;
++    }
++
++  FOR_EACH_VEC_ELT (datarefs_vec, i, dr)
++    {
++      if (TREE_CODE (DR_REF (dr)) == COMPONENT_REF
++	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (dr), 1)))
++	{
++	  return true;
++	}
++    }
++  return false;
++}
++
++static inline
++bool shift_operation (enum tree_code op)
++{
++  return op == LSHIFT_EXPR || op == RSHIFT_EXPR || op == LROTATE_EXPR
++	 || op == RROTATE_EXPR;
++}
++
++/* Check if USE_STMT is isomorphic with the first use_stmt of the group.
++   RHS1 is the lhs of stmt recorded in group_info.  If another rhs of use_stmt
++   is not a constant, return UNCERTAIN and re-check it later.  */
++
++static unsigned
++check_isomorphic (gimple* use_stmt, gimple* &first,
++		  tree rhs1, vec<tree> &hetero_lhs)
++{
++  /* Check same operator.  */
++  enum tree_code rhs_code_first = gimple_assign_rhs_code (first);
++  enum tree_code rhs_code_current = gimple_assign_rhs_code (use_stmt);
++  if (rhs_code_first != rhs_code_current)
++    {
++      return HETEROGENEOUS;
++    }
++  /* For shift operations, oprands should be equal.  */
++  if (shift_operation (rhs_code_current))
++    {
++      tree shift_op_first = gimple_assign_rhs2 (first);
++      tree shift_op_current = gimple_assign_rhs2 (use_stmt);
++      if (!operand_equal_p (shift_op_first, shift_op_current, 0)
++	  || !TREE_CONSTANT (shift_op_first))
++	{
++	  return HETEROGENEOUS;
++	}
++      return ISOMORPHIC;
++    }
++  /* Type convert expr or assignment.  */
++  if (gimple_num_ops (first) == 2)
++    {
++      return (rhs_code_first == NOP_EXPR || rhs_code_first == CONVERT_EXPR
++	      || rhs_code_first == SSA_NAME) ? ISOMORPHIC : HETEROGENEOUS;
++    }
++  /* We find use_stmt of LHS of current stmt, this LHS is one of the rhses of
++     use_stmt, check if the other rhs of use_stmt are isomorphic with the
++     first use_stmt.  */
++  tree rhs2_first
++       = gimple_assign_rhs1 (use_stmt) == rhs1
++	 ? gimple_assign_rhs2 (first) : gimple_assign_rhs1 (first);
++  tree rhs2
++       = gimple_assign_rhs1 (use_stmt) == rhs1
++	 ? gimple_assign_rhs2 (use_stmt) : gimple_assign_rhs1 (use_stmt);
++
++  if (check_same_oprand_type (rhs2_first, rhs2))
++    {
++      if (TREE_CONSTANT (rhs2))
++	{
++	  return ISOMORPHIC;
++	}
++      else if (hetero_lhs.contains (rhs2))
++	{
++	  return HETEROGENEOUS;
++	}
++      return UNCERTAIN;
++    }
++  return HETEROGENEOUS;
++}
++
++/* Check if single use_stmt of STMT is isomorphic with the first one in
++   current group.  */
++
++static unsigned
++check_use_stmt (group_info elmt, gimple* &first,
++		vec<gimple *> &tmp_stmts, vec<tree> &hetero_lhs)
++{
++  if (gimple_code (elmt->stmt) != GIMPLE_ASSIGN)
++    {
++      return HETEROGENEOUS;
++    }
++  use_operand_p dummy;
++  tree lhs = gimple_assign_lhs (elmt->stmt);
++  gimple *use_stmt = NULL;
++  single_imm_use (lhs, &dummy, &use_stmt);
++  /* STMTs with three rhs are not supported, e.g., GIMPLE_COND.  */
++  if (use_stmt == NULL || gimple_num_ops (use_stmt) > 3
++      || gimple_code (use_stmt) != GIMPLE_ASSIGN || bit_field_p (use_stmt))
++    {
++      return HETEROGENEOUS;
++    }
++  tmp_stmts.safe_push (use_stmt);
++  if (first == NULL)
++    {
++      first = use_stmt;
++      return UNINITIALIZED;
++    }
++  /* Check if current use_stmt and the first menber's use_stmt in the group
++     are of the same type.  */
++  tree lhs_first = gimple_assign_lhs (first);
++  tree use_lhs = gimple_assign_lhs (use_stmt);
++  if (!check_same_oprand_type (lhs_first, use_lhs))
++    {
++      return HETEROGENEOUS;
++    }
++  return check_isomorphic (use_stmt, first, lhs, hetero_lhs);
++}
++
++/* Replace stmts in group with stmts in TMP_STMTS, and insert them into
++   ISOMER_LHS.  */
++
++static void
++update_isomer_lhs (vec<group_info> *group, unsigned group_num,
++		   unsigned iteration, isomer_stmt_lhs &isomer_lhs,
++		   vec<gimple *> tmp_stmts, int &profit)
++{
++  group_info elmt = NULL;
++  /* Do not insert temp array if isomorphic stmts from grouped load have
++     only casting operations.  Once isomorphic calculation has 3 oprands,
++     such as plus operation, this group can be regarded as cut point.  */
++  bool operated = (gimple_num_ops (tmp_stmts[0]) == 3);
++  /* Do not insert temp arrays if search of iosomophic stmts reaches
++     MEM stmts.  */
++  bool has_vdef = gimple_vdef (tmp_stmts[0]) != NULL;
++  for (unsigned i = 0; i < group->length (); i++)
++    {
++      elmt = (*group)[i];
++      elmt->stmt = has_vdef ? NULL : tmp_stmts[i];
++      elmt->cut_point = has_vdef ? false : (elmt->cut_point || operated);
++      elmt->uncertain = false;
++      elmt->done = has_vdef;
++      tree lhs = gimple_assign_lhs (tmp_stmts[i]);
++      isomer_lhs[lhs] = std::make_pair (group_num, iteration);
++    }
++  enum vect_cost_for_stmt kind = scalar_stmt;
++  int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++  profit = (tmp_stmts.length () - 1) * scalar_cost;
++}
++
++/* Try to find rhs2 in ISOMER_LHS, if all rhs2 were found and their group_num
++   and iteration are same, GROUP is isomorphic.  */
++
++static unsigned
++check_isomorphic_rhs (vec<group_info> *group, vec<gimple*> &tmp_stmts,
++		      isomer_stmt_lhs &isomer_lhs)
++{
++  group_info elmt = NULL;
++  gimple *stmt = NULL;
++  unsigned j = 0;
++  unsigned group_num_tmp = -1u;
++  unsigned iteration_tmp = -1u;
++  tree rhs1 = NULL;
++  tree rhs2 = NULL;
++  unsigned status = UNINITIALIZED;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      rhs1 = gimple_assign_lhs (elmt->stmt);
++      stmt = tmp_stmts[j];
++      rhs2 = (rhs1 == gimple_assign_rhs1 (stmt))
++	     ? gimple_assign_rhs2 (stmt) : gimple_assign_rhs1 (stmt);
++      isomer_stmt_lhs::iterator iter = isomer_lhs.find (rhs2);
++      if (iter != isomer_lhs.end ())
++	{
++	  if (group_num_tmp == -1u)
++	    {
++	      group_num_tmp = iter->second.first;
++	      iteration_tmp = iter->second.second;
++	      status |= ISOMORPHIC;
++	      continue;
++	    }
++	  unsigned group_num_rhs2 = iter->second.first;
++	  unsigned iteration_rhs2 = iter->second.second;
++	  if (group_num_rhs2 == group_num_tmp
++	      && iteration_rhs2 == iteration_tmp)
++	    {
++	      status |= ISOMORPHIC;
++	      continue;
++	    }
++	  return HETEROGENEOUS;
++	}
++      else
++	{
++	  status |= UNCERTAIN;
++	}
++    }
++  return status;
++}
++
++/* Insert <use_stmt, <lhs of stmt, <group_num, iteration>>> into UNCERT_STMTS
++   and update group_info.  */
++
++static void
++update_uncertain_stmts (vec<group_info> *group, unsigned group_num,
++			unsigned iteration, vec<gimple*> &tmp_stmts,
++			uncertain_stmts &uncert_stmts)
++{
++  unsigned j = 0;
++  group_info elmt = NULL;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      tree rhs1 = gimple_assign_lhs (elmt->stmt);
++      tree rhs2 = gimple_assign_rhs1 (tmp_stmts[j]) == rhs1
++		  ? gimple_assign_rhs2 (tmp_stmts[j])
++		  : gimple_assign_rhs1 (tmp_stmts[j]);
++      uncert_stmts[elmt->stmt] = std::make_pair (rhs2,
++				   std::make_pair (group_num, iteration));
++      elmt->uncertain = true;
++      elmt->done = false;
++    }
++}
++
++/* If a group in UNCERT_STMTS is determined to be isomorphic, remove it
++   from UNCERT_STMTS and insert it into ISOMER_STMTS.  */
++
++static void
++remove_uncertain_stmts (uncertain_stmts &uncert_stmts,
++			isomer_stmt_lhs &isomer_lhs, vec<gimple *> &tmp_stmts,
++			unsigned group_num, unsigned iteration)
++{
++  unsigned i = 0;
++  gimple *stmt = NULL;
++  FOR_EACH_VEC_ELT (tmp_stmts, i, stmt)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      uncertain_stmts::iterator it = uncert_stmts.find (stmt);
++      if (it != uncert_stmts.end ())
++	{
++	  uncert_stmts.erase (it);
++	  isomer_lhs[lhs] = std::make_pair (group_num, iteration);
++	}
++    }
++}
++
++/* Try to find stmts in TMP_STMTS in UNCERT_STMTS.  If all of them have same
++   group_num and iteration, GROUP is isomorphic and remove this group from
++   UNCERT_STMTS.  If no stmt was found, GROUP remains uncertain.  Otherwise,
++   GROUP is heterogeneous.  */
++
++static unsigned
++check_uncertain_stmts (vec<group_info> *group, unsigned group_num,
++		       unsigned iteration, bool &fatal,
++		       vec<gimple*> &tmp_stmts, uncertain_stmts &uncert_stmts,
++		       isomer_stmt_lhs &isomer_lhs)
++{
++  group_info elmt = NULL;
++  unsigned j = 0;
++  unsigned group_num_tmp = -1u;
++  unsigned iteration_tmp = -1u;
++  unsigned status = UNINITIALIZED;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      tree rhs1 = gimple_assign_lhs (elmt->stmt);
++      gimple *stmt = tmp_stmts[j];
++      uncertain_stmts::iterator iter = uncert_stmts.find (stmt);
++      if (iter != uncert_stmts.end ())
++	{
++	  tree rhs2 = iter->second.first;
++	  if (rhs1 == rhs2)
++	    {
++	      status |= UNCERTAIN;
++	      continue;
++	    }
++	  /* E.g., stmt1 and stmt2 are in the same group, lhs1 is lhs of stmts,
++	     lhs2 is lhs of stmts.  If use_stmt of lhs1 and lhs2 are same,
++	     mark the group as heterogeneous and no longer analyze it during
++	     later iteration.  */
++	  if (iter->second.second.first == group_num)
++	    {
++	      fatal = true;
++	      return HETEROGENEOUS;
++	    }
++	  /* First time we find STMT in UNCERT_STMTS.  */
++	  if (group_num_tmp == -1u)
++	    {
++	      group_num_tmp = iter->second.second.first;
++	      iteration_tmp = iter->second.second.second;
++	      status |= ISOMORPHIC;
++	      continue;
++	    }
++	  unsigned group_num_rhs2 = iter->second.second.first;
++	  unsigned iteration_rhs2 = iter->second.second.second;
++	  if (group_num_rhs2 == group_num_tmp
++	      && iteration_rhs2 == iteration_tmp)
++	    {
++	      status |= ISOMORPHIC;
++	      continue;
++	    }
++	  return HETEROGENEOUS;
++	}
++    }
++  if (status == ISOMORPHIC)
++    {
++      remove_uncertain_stmts (uncert_stmts, isomer_lhs, tmp_stmts,
++			      group_num, iteration);
++    }
++  return status;
++}
++
++/* Push stmts in TMP_STMTS into HETERO_LHS.  */
++
++static void
++set_hetero (vec<group_info> *group, vec<tree> &hetero_lhs,
++	    vec<gimple *> &tmp_stmts)
++{
++  group_info elmt = NULL;
++  unsigned i = 0;
++  for (i = 0; i < group->length (); i++)
++    {
++      elmt = (*group)[i];
++      elmt->uncertain = false;
++      elmt->done = true;
++    }
++  gimple *stmt = NULL;
++  FOR_EACH_VEC_ELT (tmp_stmts, i, stmt)
++    {
++      if (stmt != NULL)
++	{
++	  hetero_lhs.safe_push (gimple_assign_lhs (stmt));
++	}
++    }
++}
++
++/* Given an uncertain group, TMP_STMTS are use_stmts of stmt in GROUP.  Rhs1 is
++   the lhs of stmt in GROUP, rhs2 is the other one.
++   First try to find rhs2 in ISOMER_LHS, if all found rhs2 have same group_num
++   and iteration, this uncertain group is isomorphic.
++   If no rhs was found in ISOMER_LHS, try to find stmts of the group in
++   UNCERT_STMTS.  If all stmts with rhs2 have same group_num and iteration,
++   this GROUP is isomorphic.  If no stmt was found, this GROUP remains
++   uncertain.  Otherwise, this GROUP is heterogeneous.  */
++
++static bool
++check_uncertain (vec<group_info> *group, unsigned group_num,
++		   unsigned iteration, bool &fatal, int &profit,
++		   vec<gimple*> &tmp_stmts, isomer_stmt_lhs &isomer_lhs,
++		   vec<tree> &hetero_lhs, uncertain_stmts &uncert_stmts)
++{
++  unsigned status = check_isomorphic_rhs (group, tmp_stmts, isomer_lhs);
++  if (status == UNINITIALIZED)
++    {
++      status = check_uncertain_stmts (group, group_num, iteration, fatal,
++				      tmp_stmts, uncert_stmts, isomer_lhs);
++    }
++
++  switch (status)
++    {
++      case UNINITIALIZED:
++	update_uncertain_stmts (group, group_num, iteration,
++				tmp_stmts, uncert_stmts);
++	return false;
++      case ISOMORPHIC:
++	update_isomer_lhs (group, group_num, iteration,
++			   isomer_lhs, tmp_stmts, profit);
++	return false;
++      default:
++	set_hetero (group, hetero_lhs, tmp_stmts);
++	return true;
++    }
++}
++
++/* Return false if current group is isomorophic or uncertain to be isomorphic.
++   Otherwise, return true.  */
++
++static bool
++check_group (vec<group_info> *group, unsigned group_num,
++	     unsigned iteration, bool &fatal, int &profit,
++	     vec<unsigned> &merged_groups, isomer_stmt_lhs &isomer_lhs,
++	     vec<tree> &hetero_lhs, uncertain_stmts uncert_stmts)
++{
++  unsigned j = 0;
++  group_info elmt = NULL;
++  gimple *first = NULL;
++  unsigned res = 0;
++  /* Record single use stmts in TMP_STMTS and decide whether replace stmts in
++     ginfo in succeeding processes.  */
++  auto_vec<gimple *> tmp_stmts;
++  FOR_EACH_VEC_ELT (*group, j, elmt)
++    {
++      if (merged_groups.contains (j))
++	{
++	  return true;
++	}
++      res |= check_use_stmt (elmt, first, tmp_stmts, hetero_lhs);
++    }
++
++  /* Update each group member according to RES.  */
++  switch (res)
++    {
++      case ISOMORPHIC:
++	update_isomer_lhs (group, group_num, iteration,
++			   isomer_lhs, tmp_stmts, profit);
++	return false;
++      case UNCERTAIN:
++	return check_uncertain (group, group_num, iteration, fatal, profit,
++		 tmp_stmts, isomer_lhs, hetero_lhs, uncert_stmts);
++      default:
++	set_hetero (group, hetero_lhs, tmp_stmts);
++	return true;
++    }
++}
++
++/* If all analysises are done except uncertain groups, break the loop.  */
++
++static bool
++end_of_search (vec<vec<group_info> *> &circular_queue)
++{
++  unsigned i = 0;
++  vec<group_info> *group = NULL;
++  group_info elmt = NULL;
++  FOR_EACH_VEC_ELT (circular_queue, i, group)
++    {
++      elmt = (*group)[0];
++      if (!elmt->done && !elmt->uncertain)
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Push valid stmts to STMTS as cutpoints.  */
++
++static bool
++check_any_cutpoints (vec<vec<group_info> *> &circular_queue,
++		     vec<gimple *> * &stmts, vec<unsigned> &merged_groups)
++{
++  unsigned front = 0;
++  vec<group_info> *group = NULL;
++  group_info elmt = NULL;
++  unsigned max = circular_queue.length () * circular_queue[0]->length ();
++  vec_alloc (stmts, max);
++  while (front < circular_queue.length ())
++    {
++      unsigned i = 0;
++      if (merged_groups.contains (front))
++	{
++	  front++;
++	  continue;
++	}
++      group = circular_queue[front++];
++      FOR_EACH_VEC_ELT (*group, i, elmt)
++	{
++	  if (elmt->stmt != NULL && elmt->done && elmt->cut_point)
++	    {
++	      stmts->safe_push (elmt->stmt);
++	    }
++	}
++    }
++  return stmts->length () != 0;
++}
++
++/* Grouped loads are isomorphic.  Make pair for group number and iteration,
++   map load stmt to this pair.  We set iteration 0 here.  */
++
++static void
++init_isomer_lhs (vec<vec<group_info> *> &groups, isomer_stmt_lhs &isomer_lhs)
++{
++  vec<group_info> *group = NULL;
++  group_info elmt = NULL;
++  unsigned i = 0;
++  FOR_EACH_VEC_ELT (groups, i, group)
++    {
++      unsigned j = 0;
++      FOR_EACH_VEC_ELT (*group, j, elmt)
++	{
++	  isomer_lhs[gimple_assign_lhs (elmt->stmt)] = std::make_pair (i, 0);
++	}
++    }
++}
++
++static int
++load_store_profit (unsigned loads, unsigned vf, unsigned tmp)
++{
++  int profit = 0;
++  enum vect_cost_for_stmt kind = scalar_load;
++  int scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++  profit += (loads - (loads / vf)) * scalar_cost;
++  profit -= tmp / vf * scalar_cost;
++  kind = scalar_store;
++  scalar_cost = builtin_vectorization_cost (kind, NULL_TREE, 0);
++  profit -= tmp / vf * scalar_cost;
++  return profit;
++}
++
++/* Breadth first search the graph consisting of define-use chain starting from
++   the circular queue initialized by function BUILD_QUEUE.  Find single use of
++   each stmt in group and check if they are isomorphic.  Isomorphic is defined
++   as same rhs type, same operator, and isomorphic calculation of each rhs
++   starting from load.  If another rhs is uncertain to be isomorphic, put it
++   at the end of circular queue and re-analyze it during the next iteration.
++   If a group shares the same use_stmt with another group, skip it in
++   succeedor prcoesses.  Iterate the circular queue until all groups are
++   ignored or use_stmts are heterogeneous.  If all other groups have finish the
++   analysis, and the left groups are uncertain to be isomorphic, set the group
++   to be heterogeneous to avoid endless loop.  */
++
++static bool
++bfs_find_isomer_stmts (vec<vec<group_info> *> &circular_queue,
++		       stmts_profit &profit_pair, unsigned vf, bool &reach_vdef)
++{
++  isomer_stmt_lhs isomer_lhs;
++  uncertain_stmts uncert_stmts;
++  auto_vec<tree> hetero_lhs;
++  auto_vec<unsigned> merged_groups;
++  vec<group_info> *group = NULL;
++  bool done = false;
++  int profit_sum = 0;
++  vec<gimple *> *stmts = NULL;
++  init_isomer_lhs (circular_queue, isomer_lhs);
++  for (unsigned i = 1; !done; ++i)
++    {
++      unsigned front = 0;
++      bool fatal = false;
++      /* Re-initialize DONE to TRUE while a new iteration begins.  */
++      done = true;
++      while (front < circular_queue.length ())
++	{
++	  int profit = 0;
++	  group = circular_queue[front];
++	  done &= check_group (group, front, i, fatal, profit, merged_groups,
++			       isomer_lhs, hetero_lhs, uncert_stmts);
++	  if (fatal)
++	    {
++	      return false;
++	    }
++	  profit_sum += profit;
++	  if (profit != 0 && (*group)[0]->stmt == NULL)
++	    {
++	      reach_vdef = true;
++	      return false;
++	    }
++	  ++front;
++	}
++      if (!done && end_of_search (circular_queue))
++	{
++	  return false;
++	}
++    }
++  if (check_any_cutpoints (circular_queue, stmts, merged_groups))
++    {
++      profit_pair.first = stmts;
++      unsigned loads = circular_queue.length () * circular_queue[0]->length ();
++      profit_pair.second = profit_sum + load_store_profit (loads, vf,
++							   stmts->length ());
++      if (profit_pair.second > 0)
++	{
++	  return true;
++	}
++    }
++  return false;
++}
++
++/* Free memory allocated by ginfo.  */
++
++static void
++free_ginfos (vec<vec<group_info> *> &worklists)
++{
++  vec<group_info> *worklist;
++  unsigned i = 0;
++  while (i < worklists.length ())
++    {
++      worklist = worklists[i++];
++      group_info ginfo;
++      unsigned j = 0;
++      FOR_EACH_VEC_ELT (*worklist, j, ginfo)
++	{
++	  delete ginfo;
++	}
++    }
++}
++
++static void
++release_tmp_stmts (vf_stmts_profit_map &candi_stmts)
++{
++  vf_stmts_profit_map::iterator iter;
++  for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
++    {
++      iter->second.first->release ();
++    }
++}
++
++/* Choose the group of stmt with maximun profit.  */
++
++static bool
++decide_stmts_by_profit (vf_stmts_profit_map &candi_stmts, vec<gimple *> &stmts)
++{
++  vf_stmts_profit_map::iterator iter;
++  int profit = 0;
++  int max = 0;
++  vec<gimple *> *tmp = NULL;
++  for (iter = candi_stmts.begin (); iter != candi_stmts.end (); ++iter)
++    {
++      profit = iter->second.second;
++      if (profit > max)
++	{
++	  tmp = iter->second.first;
++	  max = profit;
++	}
++      else
++	{
++	  iter->second.first->release ();
++	}
++    }
++  if (max == 0)
++    {
++      release_tmp_stmts (candi_stmts);
++      return false;
++    }
++  unsigned i = 0;
++  gimple *stmt = NULL;
++  FOR_EACH_VEC_ELT (*tmp, i, stmt)
++    {
++      stmts.safe_push (stmt);
++    }
++  release_tmp_stmts (candi_stmts);
++  return stmts.length () != 0;
++}
++
++/* Find isomorphic stmts from grouped loads with vector factor VF.
++
++   Given source code as follows and ignore casting.
++
++   a0 = (a[0] + b[0]) + ((a[4] - b[4]) << 16);
++   a1 = (a[1] + b[1]) + ((a[5] - b[5]) << 16);
++   a2 = (a[2] + b[2]) + ((a[6] - b[6]) << 16);
++   a3 = (a[3] + b[3]) + ((a[7] - b[7]) << 16);
++
++   We get grouped loads in VINFO as
++
++   GROUP_1		GROUP_2
++   _1 = *a		_11 = *b
++   _2 = *(a + 1)	_12 = *(b + 1)
++   _3 = *(a + 2)	_13 = *(b + 2)
++   _4 = *(a + 3)	_14 = *(b + 3)
++   _5 = *(a + 4)	_15 = *(b + 4)
++   _6 = *(a + 5)	_16 = *(b + 5)
++   _7 = *(a + 6)	_17 = *(b + 6)
++   _8 = *(a + 7)	_18 = *(b + 7)
++
++   First we try VF = 8, we get two worklists
++
++   WORKLIST_1		WORKLIST_2
++   _1 = *a		_11 = *b
++   _2 = *(a + 1)	_12 = *(b + 1)
++   _3 = *(a + 2)	_13 = *(b + 2)
++   _4 = *(a + 3)	_14 = *(b + 3)
++   _5 = *(a + 4)	_15 = *(b + 4)
++   _6 = *(a + 5)	_16 = *(b + 5)
++   _7 = *(a + 6)	_17 = *(b + 6)
++   _8 = *(a + 7)	_18 = *(b + 7)
++
++   We find _111 = _1 + _11 and _115 = _5 - _15 are not isomorphic,
++   so we try VF = VF / 2.
++
++   GROUP_1		GROUP_2
++   _1 = *a		_5 = *(a + 4)
++   _2 = *(a + 1)	_6 = *(a + 5)
++   _3 = *(a + 2)	_7 = *(a + 6)
++   _4 = *(a + 3)	_8 = *(a + 7)
++
++   GROUP_3		GROUP_4
++   _11 = *b		_15 = *(b + 4)
++   _12 = *(b + 1)	_16 = *(b + 5)
++   _13 = *(b + 2)	_17 = *(b + 6)
++   _14 = *(b + 3)	_18 = *(b + 7)
++
++   We first treat group_1, and find all operations are isomorphic, then
++   replace stmts in group_1 with their use_stmts.  Group_2 as well.
++
++   GROUP_1		GROUP_2
++   _111 = _1 + _11	_115 = _5 - _15
++   _112 = _2 + _12	_116 = _6 - _16
++   _113 = _3 + _13	_117 = _7 - _17
++   _114 = _4 + _14	_118 = _8 - _18
++
++   When treating group_3 and group_4, we find their use_stmts are the same
++   as group_1 and group_2.  So group_3 is regarded as being merged to group_1
++   and group_4 being merged to group_2.  In future procedures, we will skip
++   group_3 and group_4.
++
++   We repeat such processing until opreations are not isomorphic or searching
++   reaches store stmts.  In our given case, searching end up at a0, a1, a2 and
++   a3.  */
++
++static bool
++find_isomorphic_stmts (loop_vec_info vinfo, vec<gimple *> &stmts)
++{
++  unsigned vf = get_max_vf (vinfo);
++  if (vf == 0)
++    {
++      return false;
++    }
++  auto_vec<vec<group_info> *> circular_queue;
++  /* TMP_STMTS is cut_points inferd from a certain VF.  */
++  stmts_profit profit_map;
++  /* CANDI_STMTS is a map of TMP_STMTS and VF.  */
++  vf_stmts_profit_map candi_stmts;
++  bool reach_vdef = false;
++  while (vf > 2)
++    {
++      if (build_queue (vinfo, vf, circular_queue) == 0)
++	{
++	  return false;
++	}
++      if (!bfs_find_isomer_stmts (circular_queue, profit_map, vf, reach_vdef))
++	{
++	  if (reach_vdef)
++	    {
++	      release_tmp_stmts (candi_stmts);
++	      circular_queue.release ();
++	      return false;
++	    }
++	  vf /= 2;
++	  circular_queue.release ();
++	  continue;
++	}
++      candi_stmts[vf] = profit_map;
++      free_ginfos (circular_queue);
++      vf /= 2;
++      circular_queue.release ();
++    }
++  return decide_stmts_by_profit (candi_stmts, stmts);
++}
++
++/* Check if all iteration variables are same and return the iteration variable.
++   Otherwise, return NULL.  */
++
++static tree
++find_index (vec<gimple *> seed_stmts)
++{
++  if (seed_stmts.length () == 0)
++    {
++      return NULL;
++    }
++  bool found_index = false;
++  tree index = NULL;
++  unsigned ui = 0;
++  for (ui = 0; ui < seed_stmts.length (); ui++)
++    {
++      if (!gimple_vdef (seed_stmts[ui]))
++	{
++	  return NULL;
++	}
++      tree lhs = gimple_assign_lhs (seed_stmts[ui]);
++      while (TREE_CODE (lhs) == ARRAY_REF)
++	{
++	  if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME)
++	    {
++	      if (index == NULL)
++		{
++		  index = TREE_OPERAND (lhs, 1);
++		  found_index = true;
++		}
++	      else if (index != TREE_OPERAND (lhs, 1))
++		{
++		  return NULL;
++		}
++	    }
++	  lhs = TREE_OPERAND (lhs, 0);
++  	}
++      if (!found_index)
++	{
++	  return NULL;
++	}
++    }
++  return index;
++}
++
++/* Check if expression of phi is an increament of a const.  */
++
++static void
++check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc)
++{
++  struct graph_edge *e_phi;
++  for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next)
++    {
++      struct vertex *v_inc = &(rdg->vertices[e_phi->dest]);
++      if (!is_gimple_assign (RDGV_STMT (v_inc))
++	  || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR)
++	{
++	  continue;
++	}
++      tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc));
++      tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc));
++      if (!(integer_onep (rhs1) || integer_onep (rhs2)))
++	{
++	  continue;
++	}
++      struct graph_edge *e_inc;
++      /* find cycle with only two vertices inc and phi: inc <--> phi.  */
++      bool found_cycle = false;
++      for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next)
++	{
++	  if (e_inc->dest == e_phi->src)
++	    {
++	      found_cycle = true;
++	      break;
++	    }
++	}
++      if (!found_cycle)
++	{
++	  continue;
++	}
++      found_inc = true;
++    }
++}
++
++/* Check if phi satisfies form like PHI <0 (2), i_0 (8)>.  */
++
++static inline bool
++iv_check_phi_stmt (gimple *phi_stmt)
++{
++  return gimple_phi_num_args (phi_stmt) == 2
++	 && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0))
++	     || integer_zerop (gimple_phi_arg_def (phi_stmt, 1)));
++}
++
++/* Find iteration variable and phi expr, check they are compatible.  */
++
++static tree
++get_real_iv (struct graph *flow_only_rdg, vec<gimple *> seed_stmts)
++{
++  tree index = find_index (seed_stmts);
++  if (index == NULL)
++    {
++      return NULL;
++    }
++  for (int i = 0; i < flow_only_rdg->n_vertices; i++)
++    {
++      struct vertex *v = &(flow_only_rdg->vertices[i]);
++      if (RDGV_STMT (v) != seed_stmts[0])
++	{
++	  continue;
++	}
++      struct graph_edge *e;
++      bool found_phi = false;
++      for (e = v->pred; e; e = e->pred_next)
++	{
++	  struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]);
++	  gimple *phi_stmt = RDGV_STMT (v_phi);
++	  if (gimple_code (phi_stmt) != GIMPLE_PHI
++	      || gimple_phi_result (phi_stmt) != index)
++	    {
++	      continue;
++	    }
++	  if (!iv_check_phi_stmt (phi_stmt))
++	    {
++	      return NULL;
++	    }
++	  /* find inc expr in succ of phi.  */
++	  bool found_inc = false;
++	  check_phi_inc (v_phi, flow_only_rdg, found_inc);
++	  if (!found_inc)
++	    {
++	      return NULL;
++	    }
++	  found_phi = true;
++	  break;
++  	}
++      if (!found_phi)
++	{
++	  return NULL;
++	}
++      break;
++    }
++  return index;
++}
++
++/* Check vertexes in ROOT_MAP have no data dependency with each other in
++   FLOW_ONLY_RDG.  If so, we regard there is no dependency and is proper to
++   distribute loop with ROOT_MAP.  */
++
++static bool
++check_no_dependency (struct graph *flow_only_rdg, bitmap root_map)
++{
++  bitmap_iterator bi;
++  unsigned ui;
++  auto_vec<unsigned, 16> visited_nodes;
++  auto_bitmap visited_map;
++  EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi)
++    {
++      visited_nodes.safe_push (ui);
++    }
++  for (ui = 0; ui < visited_nodes.length (); ui++)
++    {
++      struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]);
++      struct graph_edge *e;
++      for (e = v->succ; e; e = e->succ_next)
++	{
++	  if (bitmap_bit_p (root_map, e->dest))
++	    {
++	      return false;
++	    }
++	  if (bitmap_bit_p (visited_map, e->dest))
++	    {
++	      continue;
++	    }
++	  visited_nodes.safe_push (e->dest);
++	  bitmap_set_bit (visited_map, e->dest);
++	}
++    }
++  return true;
++}
++
++/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure
++   there is no dependency among those STMT we found.  */
++
++static unsigned
++get_cut_points (struct graph *flow_only_rdg, bitmap cut_points,
++		loop_vec_info vinfo)
++{
++  unsigned n_stmts = 0;
++
++  /* STMTS that may be CUT_POINTS.  */
++  auto_vec<gimple *> stmts;
++  if (!find_isomorphic_stmts (vinfo, stmts))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "No temp array insertion: no isomorphic stmts"
++			      " were found.\n");
++	}
++      return 0;
++    }
++
++  for (int i = 0; i < flow_only_rdg->n_vertices; i++)
++    {
++      if (stmts.contains (RDG_STMT (flow_only_rdg, i)))
++	{
++	  bitmap_set_bit (cut_points, i);
++	}
++    }
++  n_stmts = bitmap_count_bits (cut_points);
++
++  bool succ = check_no_dependency (flow_only_rdg, cut_points);
++  if (!succ)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "No temp array inserted: data dependency exists"
++			      " among isomorphic stmts.\n");
++	}
++      return 0;
++    }
++  return n_stmts;
++}
++
++static void
++build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi,
++		  poly_uint64 array_extent, tree iv,
++		  hash_set <tree>* tmp_array_vars,
++		  vec<gimple *> *transformed)
++{
++  gimple *stmt = RDGV_STMT (v);
++  tree lhs = gimple_assign_lhs (stmt);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "original stmt:\t");
++      print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
++    }
++  tree var_ssa = duplicate_ssa_name (lhs, stmt);
++  gimple_assign_set_lhs (stmt, var_ssa);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "changed to:\t");
++      print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS);
++    }
++  gimple_set_uid (gsi_stmt (gsi), -1);
++  tree vect_elt_type = TREE_TYPE (lhs);
++  tree array_type = build_array_type_nelts (vect_elt_type, array_extent);
++  tree array = create_tmp_var (array_type);
++  tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
++  tmp_array_vars->add (array_ssa);
++  gimple *store = gimple_build_assign (array_ssa, var_ssa);
++  tree new_vdef = make_ssa_name (gimple_vop (cfun), store);
++  gsi_insert_after (&gsi, store, GSI_NEW_STMT);
++  gimple_set_vdef (store, new_vdef);
++  transformed->safe_push (store);
++  gimple_set_uid (gsi_stmt (gsi), -1);
++  tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL);
++  tmp_array_vars->add (array_ssa2);
++  gimple *load = gimple_build_assign (lhs, array_ssa2);
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "insert stmt:\t");
++      print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS);
++      fprintf (dump_file, " and stmt:\t");
++      print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS);
++    }
++  gimple_set_vuse (load, new_vdef);
++  gsi_insert_after (&gsi, load, GSI_NEW_STMT);
++  gimple_set_uid (gsi_stmt (gsi), -1);
++}
++
++/* Set bitmap PRODUCERS based on vec TRANSFORMED.  */
++
++static void
++build_producers (loop_p loop, bitmap producers, vec<gimple *> &transformed)
++{
++  auto_vec<gimple *, 10> stmts;
++  stmts_from_loop (loop, &stmts);
++  int i = 0;
++  gimple *stmt = NULL;
++
++  FOR_EACH_VEC_ELT (stmts, i, stmt)
++    {
++      gimple_set_uid (stmt, i);
++    }
++  i = 0;
++  FOR_EACH_VEC_ELT (transformed, i, stmt)
++    {
++      bitmap_set_bit (producers, stmt->uid);
++    }
++}
++
++/* Transform stmt
++
++   A = FOO (ARG_1);
++
++   to
++
++   STMT_1: A1= FOO (ARG_1);
++   STMT_2: X[I] = A1;
++   STMT_3: A = X[I];
++
++   and producer is STMT_2.  */
++
++static void
++do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv,
++	      bitmap cut_points, hash_set <tree>* tmp_array_vars,
++	      bitmap producers)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "=== do insertion ===\n");
++    }
++  auto_vec<gimple *> transformed;
++  poly_uint64 array_extent
++    = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1;
++  /* Insert new stmts to loop.  */
++  basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
++  for (int i = 0; i < int (loop->num_nodes); i++)
++    {
++      basic_block bb = bbs[i];
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  unsigned j = gimple_uid (gsi_stmt (gsi));
++	  if (bitmap_bit_p (cut_points, j))
++	    {
++	      struct vertex *v = &(flow_only_rdg->vertices[j]);
++	      build_temp_array (v, gsi, array_extent, iv, tmp_array_vars,
++				&transformed);
++	    }
++	}
++    }
++  build_producers (loop, producers, transformed);
++  update_ssa (TODO_update_ssa);
++  free (bbs);
++}
++
++/* After temp array insertion, given stmts
++   STMT_1: M = FOO (ARG_1);
++   STMT_2: X[I] = M;
++   STMT_3: A = X[I];
++   STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next.
++   Replace M with A, and remove STMT_2 and STMT_3.  */
++
++static void
++reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition,
++		     gimple_stmt_iterator &gsi, int j)
++{
++  struct vertex *v = &(flow_only_rdg->vertices[j]);
++  gimple *stmt = RDGV_STMT (v);
++  gimple *prev = stmt->prev;
++  gimple *next = stmt->next;
++  tree n_lhs = gimple_assign_lhs (next);
++  gimple_assign_set_lhs (prev, n_lhs);
++  unlink_stmt_vdef (stmt);
++  if (partition)
++    {
++      bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
++    }
++  gsi_remove (&gsi, true);
++  release_defs (stmt);
++  if (partition)
++    {
++      bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi)));
++    }
++  gsi_remove (&gsi, true);
++}
++
++static void
++remove_insertion (loop_p loop, struct graph *flow_only_rdg,
++		  bitmap producers, struct partition *partition)
++{
++  basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
++  for (int i = 0; i < int (loop->num_nodes); i++)
++    {
++      basic_block bb = bbs[i];
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  unsigned j = gimple_uid (gsi_stmt (gsi));
++	  if (bitmap_bit_p (producers, j))
++	    {
++	      reset_gimple_assign (flow_only_rdg, partition, gsi, j);
++	    }
++	}
++    }
++  update_ssa (TODO_update_ssa);
++  free (bbs);
++}
++
++/* Insert temp arrays if isomorphic computation exists.  Temp arrays will be
++   regarded as SEED_STMTS for building partitions in succeeding processes.  */
++
++static bool
++insert_temp_arrays (loop_p loop, vec<gimple *> seed_stmts,
++		    hash_set<tree> *tmp_array_vars, bitmap producers)
++{
++  struct graph *flow_only_rdg = build_rdg (loop, NULL);
++  gcc_checking_assert (flow_only_rdg != NULL);
++  auto_bitmap cut_points;
++  loop_vec_info vinfo = loop_vec_info_for_loop (loop);
++  unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo);
++  delete vinfo;
++  loop->aux = NULL;
++  if (n_cut_points == 0)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Loop %d no temp array insertion: no cut points"
++			      " found.\n", loop->num);
++	}
++      free_rdg (flow_only_rdg);
++      return false;
++    }
++  tree iv = get_real_iv (flow_only_rdg, seed_stmts);
++  if (iv == NULL)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Loop %d no temp array insertion: failed to get"
++			      " iteration variable.\n", loop->num);
++	}
++      free_rdg (flow_only_rdg);
++      return false;
++  }
++  do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers);
++  if (dump_enabled_p ())
++    {
++      dump_user_location_t loc = find_loop_location (loop);
++      dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:"
++		       " %d temp arrays inserted in Loop %d.\n",
++		       n_cut_points, loop->num);
++    }
++  free_rdg (flow_only_rdg);
++  return true;
++}
++
++/* Given LOOP, this function records seed statements for distribution in
++   WORK_LIST.  Return false if there is nothing for distribution.  */
++
++static bool
++find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
++{
++  basic_block *bbs = get_loop_body_in_dom_order (loop);
++
++  /* Initialize the worklist with stmts we seed the partitions with.  */
++  for (unsigned i = 0; i < loop->num_nodes; ++i)
++    {
++      for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
++	   !gsi_end_p (gsi); gsi_next (&gsi))
++	{
++	  gphi *phi = gsi.phi ();
++	  if (virtual_operand_p (gimple_phi_result (phi)))
++	    continue;
++	  /* Distribute stmts which have defs that are used outside of
++	     the loop.  */
++	  if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
++	    continue;
++	  work_list->safe_push (phi);
++	}
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
++	   !gsi_end_p (gsi); gsi_next (&gsi))
++	{
++	  gimple *stmt = gsi_stmt (gsi);
++
++	  /* If there is a stmt with side-effects bail out - we
++	     cannot and should not distribute this loop.  */
++	  if (gimple_has_side_effects (stmt))
++	    {
++	      free (bbs);
++	      return false;
++	    }
++
++	  /* Distribute stmts which have defs that are used outside of
++	     the loop.  */
++	  if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
++	    ;
++	  /* Otherwise only distribute stores for now.  */
++	  else if (!gimple_vdef (stmt))
++	    continue;
++
++	  work_list->safe_push (stmt);
++	}
++    }
++  free (bbs);
++  return work_list->length () > 0;
++}
++
+ /* Distributes the code from LOOP in such a way that producer statements
+    are placed before consumer statements.  Tries to separate only the
+    statements from STMTS into separate loops.  Returns the number of
+@@ -2810,6 +4373,36 @@ distribute_loop (struct loop *loop, vec<
+       return 0;
+     }
+ 
++  /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize.
++     If LOOP has grouped loads, recursively find isomorphic stmts and insert
++     temp arrays, rebuild RDG and call find_seed_stmts_for_distribution
++     to replace STMTS.  */
++
++  hash_set <tree> tmp_array_vars;
++
++  /* STMTs that define those inserted TMP_ARRAYs.  */
++  auto_bitmap producers;
++
++  /* New SEED_STMTS after insertion.  */
++  auto_vec<gimple *> work_list;
++  bool insert_succ = false;
++  if (may_insert_temp_arrays (loop, rdg, cd))
++    {
++      if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers))
++	{
++	  if (find_seed_stmts_for_distribution (loop, &work_list))
++	    {
++	      insert_succ = true;
++	      stmts = work_list;
++	    }
++	  else
++	    {
++	      remove_insertion (loop, rdg, producers, NULL);
++	    }
++	  rebuild_rdg (loop, rdg, cd);
++	}
++    }
++
+   data_reference_p dref;
+   for (i = 0; datarefs_vec.iterate (i, &dref); ++i)
+     dref->aux = (void *) (uintptr_t) i;
+@@ -2888,7 +4481,7 @@ distribute_loop (struct loop *loop, vec<
+       for (int j = i + 1;
+ 	   partitions.iterate (j, &partition); ++j)
+ 	{
+-	  if (share_memory_accesses (rdg, into, partition))
++	  if (share_memory_accesses (rdg, into, partition, &tmp_array_vars))
+ 	    {
+ 	      partition_merge_into (rdg, into, partition, FUSE_SHARE_REF);
+ 	      partitions.unordered_remove (j);
+@@ -2923,9 +4516,27 @@ distribute_loop (struct loop *loop, vec<
+ 	}
+     }
+ 
+-  finalize_partitions (loop, &partitions, &alias_ddrs);
++  finalize_partitions (loop, &partitions, &alias_ddrs, producers);
+ 
+   nbp = partitions.length ();
++
++  /* If we have inserted TMP_ARRAYs but there is only one partition left in
++     the succeeding processes, remove those inserted TMP_ARRAYs back to the
++     original version.  */
++
++  if (nbp == 1 && insert_succ)
++    {
++      struct partition *partition = NULL;
++      partitions.iterate (0, &partition);
++      remove_insertion (loop, rdg, producers, partition);
++      if (dump_enabled_p ())
++	{
++	  dump_user_location_t loc = find_loop_location (loop);
++	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed"
++			   " unable to distribute loop %d.\n", loop->num);
++	}
++    }
++
+   if (nbp == 0
+       || (nbp == 1 && !partition_builtin_p (partitions[0]))
+       || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
+@@ -3005,57 +4616,6 @@ public:
+ }; // class pass_loop_distribution
+ 
+ 
+-/* Given LOOP, this function records seed statements for distribution in
+-   WORK_LIST.  Return false if there is nothing for distribution.  */
+-
+-static bool
+-find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
+-{
+-  basic_block *bbs = get_loop_body_in_dom_order (loop);
+-
+-  /* Initialize the worklist with stmts we seed the partitions with.  */
+-  for (unsigned i = 0; i < loop->num_nodes; ++i)
+-    {
+-      for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
+-	   !gsi_end_p (gsi); gsi_next (&gsi))
+-	{
+-	  gphi *phi = gsi.phi ();
+-	  if (virtual_operand_p (gimple_phi_result (phi)))
+-	    continue;
+-	  /* Distribute stmts which have defs that are used outside of
+-	     the loop.  */
+-	  if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
+-	    continue;
+-	  work_list->safe_push (phi);
+-	}
+-      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
+-	   !gsi_end_p (gsi); gsi_next (&gsi))
+-	{
+-	  gimple *stmt = gsi_stmt (gsi);
+-
+-	  /* If there is a stmt with side-effects bail out - we
+-	     cannot and should not distribute this loop.  */
+-	  if (gimple_has_side_effects (stmt))
+-	    {
+-	      free (bbs);
+-	      return false;
+-	    }
+-
+-	  /* Distribute stmts which have defs that are used outside of
+-	     the loop.  */
+-	  if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
+-	    ;
+-	  /* Otherwise only distribute stores for now.  */
+-	  else if (!gimple_vdef (stmt))
+-	    continue;
+-
+-	  work_list->safe_push (stmt);
+-	}
+-    }
+-  free (bbs);
+-  return work_list->length () > 0;
+-}
+-
+ /* Given innermost LOOP, return the outermost enclosing loop that forms a
+    perfect loop nest.  */
+ 
+diff -Nurp a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
+--- a/gcc/tree-vect-data-refs.c	2021-12-02 14:32:41.048404000 +0800
++++ b/gcc/tree-vect-data-refs.c	2021-12-02 20:31:21.682836100 +0800
+@@ -2617,6 +2617,9 @@ vect_analyze_group_access_1 (dr_vec_info
+       DR_GROUP_GAP (stmt_info) = groupsize - last_accessed_element;
+ 
+       DR_GROUP_SIZE (stmt_info) = groupsize;
++
++      DR_GROUP_SLP_TRANSPOSE (stmt_info) = false;
++
+       if (dump_enabled_p ())
+ 	{
+ 	  dump_printf_loc (MSG_NOTE, vect_location,
+@@ -2647,6 +2650,20 @@ vect_analyze_group_access_1 (dr_vec_info
+ 	}
+ 
+       /* SLP: create an SLP data structure for every interleaving group of
++	 loads for further analysis in vect_analyse_slp.  */
++      if (DR_IS_READ (dr) && !slp_impossible)
++	{
++	  if (loop_vinfo)
++	    {
++	      LOOP_VINFO_GROUPED_LOADS (loop_vinfo).safe_push (stmt_info);
++	    }
++	  if (bb_vinfo)
++	    {
++	      BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (stmt_info);
++	    }
++	}
++
++      /* SLP: create an SLP data structure for every interleaving group of
+ 	 stores for further analysis in vect_analyse_slp.  */
+       if (DR_IS_WRITE (dr) && !slp_impossible)
+ 	{
+@@ -5334,6 +5351,224 @@ vect_permute_store_chain (vec<tree> dr_c
+     }
+ }
+ 
++/* Encoding the PERM_MASK_FIRST which is the input of
++   calculate_perm_stmt ().  */
++
++static void
++vect_indices_encoding_first (tree vectype, unsigned int array_num,
++		tree &perm_mask_high_first, tree &perm_mask_low_first)
++{
++  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
++  vec_perm_builder sel (nelt, nelt, 1);
++  sel.quick_grow (nelt);
++  unsigned int group_num = nelt / array_num;
++  unsigned int index = 0;
++  unsigned int array = 0;
++  unsigned int group = 0;
++
++  /* The encoding has 1 pattern in the fisrt stage.  */
++  for (array = 0; array < array_num / 2; array++)
++    {
++      for (group = 0; group < group_num * 2; group++)
++	{
++	  sel[index++] = array + array_num * group;
++	}
++    }
++  vec_perm_indices indices (sel, 2, nelt);
++  perm_mask_high_first = vect_gen_perm_mask_checked (vectype, indices);
++
++  index = 0;
++  for (array = array_num / 2; array < array_num; array++)
++    {
++      for (group = 0; group < group_num * 2; group++)
++	{
++	  sel[index++] = array + array_num * group;
++	}
++    }
++  indices.new_vector (sel, 2, nelt);
++  perm_mask_low_first = vect_gen_perm_mask_checked (vectype, indices);
++}
++
++/* Encoding the PERM_MASK which is the input of calculate_perm_stmt ().  */
++
++static void
++vect_indices_encoding (tree vectype, unsigned int array_num,
++		       tree &perm_mask_high, tree &perm_mask_low)
++{
++  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();;
++  vec_perm_builder sel (nelt, nelt, 1);
++  sel.quick_grow (nelt);
++  unsigned int group_num = nelt / array_num;
++  unsigned int index = 0;
++  unsigned int array = 0;
++  unsigned int group = 0;
++
++  /* The encoding has 2 interleaved stepped patterns.  */
++  for (array = 0; array < array_num / 2; array++)
++    {
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = group + group_num * array;
++	}
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = nelt + group + group_num * array;
++	}
++    }
++  vec_perm_indices indices (sel, 2, nelt);
++  perm_mask_high = vect_gen_perm_mask_checked (vectype, indices);
++
++  index = 0;
++  for (array = array_num / 2; array < array_num; array++)
++    {
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = group + group_num * array;
++	}
++      for (group = 0; group < group_num; group++)
++	{
++	  sel[index++] = nelt + group + group_num * array;
++	}
++    }
++  indices.new_vector (sel, 2, nelt);
++  perm_mask_low = vect_gen_perm_mask_checked (vectype, indices);
++}
++
++/*  Function calculate_perm_stmt.
++
++    Create grouped stmt (in the first stage):
++	high_first = VEC_PERM_EXPR <vect1, vect2,
++		{0, array_num, 2*array_num, ..., 2*group*(array_num-1),
++		1, array_num+1, 2*array_num+1, ..., 2*group*(array_num-1)+1,
++		...}>
++	low_first = VEC_PERM_EXPR <vect1, vect2,
++		{array_num/2, array_num/2+array_num, array_num/2+2*array_num,
++		..., array_num/2+2*group*(array_num-1),
++		array_num/2+1, array_num/2+array_num+1,
++		..., array_num/2+2*group*(array_num-1)+1,
++		...}>
++
++    Create interleaving stmt (in the following stages):
++	high = VEC_PERM_EXPR <vect1, vect2, {0, 1, ..., group-1,
++		nelt, nelt+1, ..., nelt+group-1,
++		group, group+1, ..., 2*group-1,
++		nelt+group, nelt+group+1, ..., nelt+2*group-1,
++		...}>
++	low = VEC_PERM_EXPR <vect1, vect2,
++		{nelt/2, nelt/2+1, ..., nelt/2+group-1,
++		nelt*3/2, nelt*3/2+1, ..., nelt*3/2+group-1,
++		nelt/2+group, nelt/2+group+1, ..., nelt/2+2*group-1,
++		nelt*3/2+group, nelt*3/2+group+1, ..., nelt*3/2+2*group-1,
++		...}>  */
++
++static gimple*
++calculate_perm_stmt (unsigned int i, tree name, tree vect1, tree vect2,
++		     tree perm_mask_first, tree perm_mask)
++{
++  gimple *perm_stmt = NULL;
++  if (i == 0)
++    {
++      perm_stmt = gimple_build_assign (name, VEC_PERM_EXPR, vect1,
++				       vect2, perm_mask_first);
++    }
++  else
++    {
++      perm_stmt = gimple_build_assign (name, VEC_PERM_EXPR, vect1,
++				       vect2, perm_mask);
++    }
++  return perm_stmt;
++}
++
++/* Function vect_transpose_store_chain.
++
++   Given a chain of interleaved stores in DR_CHAIN of LENGTH and ARRAY_NUM.
++   LENGTH must be a power of 2.  Generate interleave_high/low stmts to reorder
++   the data correctly for the stores.  Return the final references for stores
++   in RESULT_CHAIN.  This function is similar to vect_permute_store_chain (),
++   we interleave the contents of the vectors in their order.
++
++   E.g., LENGTH is 4 and the scalar type is short, i.e., VF is 8.
++   ARRAY_NUM is 4, GROUP_NUM is 2, i.e., it contains 4 arrays, and we
++   interleave every 2 elements of the one vector in their order.
++   The input is 4 vectors each containing 8 elements, .
++   We assign a number to each element, the input sequence is:
++
++   1st vec:   0  1  2  3  4  5  6  7
++   2nd vec:   8  9 10 11 12 13 14 15
++   3rd vec:  16 17 18 19 20 21 22 23
++   4th vec:  24 25 26 27 28 29 30 31
++
++   The first stage of the sequence should be:
++
++   1st vec:   0  4  1  5  2  6  3  7
++   2nd vec:   8 12  9 13 10 14 11 15
++   3rd vec:  16 20 17 21 18 22 19 23
++   4th vec:  24 28 25 29 26 30 27 31
++
++   The following stage sequence should be: (every group (2) elements)
++
++   1st vec:   0  4 16 20  1  5 17 21
++   2nd vec:   2  6 18 22  3  7 19 23
++   3rd vec:   8 12 24 28  9 13 25 29
++   4th vec:  10 14 26 30 11 15 27 31
++
++   1st vec:   0  4  8 12 16 20 24 28
++   2nd vec:   1  5  9 13 17 21 25 29
++   3rd vec:   2  6 10 14 18 22 26 30
++   4th vec:   3  7 11 15 19 23 27 31
++
++   The output sequence is the final following stage sequence.  */
++
++void
++vect_transpose_store_chain (vec<tree> dr_chain, unsigned int length,
++			  unsigned int array_num, stmt_vec_info stmt_info,
++			  gimple_stmt_iterator *gsi, vec<tree> *result_chain)
++{
++  gimple *perm_stmt = NULL;
++  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
++  tree perm_mask_low_first = NULL;
++  tree perm_mask_high_first = NULL;
++  tree perm_mask_low = NULL;
++  tree perm_mask_high = NULL;
++  unsigned int log_length = exact_log2 (length);
++  result_chain->quick_grow (length);
++  memcpy (result_chain->address (), dr_chain.address (),
++	  length * sizeof (tree));
++
++  /* Only power of 2 is supported.  */
++  gcc_assert (pow2p_hwi (length));
++
++  /* The encoding has 2 types, one for grouped pattern in the fisrt stage,
++     another one for the following grouped interleaved stepped patterns.  */
++  gcc_assert (array_num != 0);
++  vect_indices_encoding_first (vectype, array_num, perm_mask_high_first,
++			       perm_mask_low_first);
++  vect_indices_encoding (vectype, array_num, perm_mask_high, perm_mask_low);
++
++  for (unsigned int perm_time = 0; perm_time < log_length; perm_time++)
++    {
++      for (unsigned int index = 0; index < length / 2; index++)
++	{
++	  tree vect1 = dr_chain[index];
++	  tree vect2 = dr_chain[index + length / 2];
++
++	  tree high = make_temp_ssa_name (vectype, NULL, "vect_inter_high");
++	  perm_stmt = calculate_perm_stmt (perm_time, high, vect1, vect2,
++					perm_mask_high_first, perm_mask_high);
++	  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
++	  (*result_chain)[2 * index] = high;
++
++	  tree low = make_temp_ssa_name (vectype, NULL, "vect_inter_low");
++	  perm_stmt = calculate_perm_stmt (perm_time, low, vect1, vect2,
++					perm_mask_low_first, perm_mask_low);
++	  vect_finish_stmt_generation (stmt_info, perm_stmt, gsi);
++	  (*result_chain)[2 * index+1] = low;
++	}
++      memcpy (dr_chain.address (), result_chain->address (),
++	      length * sizeof (tree));
++    }
++}
++
+ /* Function vect_setup_realignment
+ 
+    This function is called when vectorizing an unaligned load using
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2021-12-02 14:17:26.569356600 +0800
++++ b/gcc/tree-vect-loop.c	2021-12-02 15:00:50.984607700 +0800
+@@ -2509,6 +2509,7 @@ vect_analyze_loop (struct loop *loop, ve
+   unsigned n_stmts = 0;
+   machine_mode autodetected_vector_mode = VOIDmode;
+   opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
++  opt_loop_vec_info fail_loop_vinfo = opt_loop_vec_info::success (NULL);
+   machine_mode next_vector_mode = VOIDmode;
+   poly_uint64 lowest_th = 0;
+   unsigned vectorized_loops = 0;
+@@ -2597,6 +2598,13 @@ vect_analyze_loop (struct loop *loop, ve
+       if (res)
+ 	{
+ 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
++	  /* Under loop-distribution, we only need to get loop_vinfo, do not
++	     conduct further operations.  */
++	  if (loop->processing_ldist)
++	    {
++	      loop->aux = (loop_vec_info) loop_vinfo;
++	      return loop_vinfo;
++	    }
+ 	  vectorized_loops++;
+ 
+ 	  /* Once we hit the desired simdlen for the first time,
+@@ -2672,7 +2680,19 @@ vect_analyze_loop (struct loop *loop, ve
+ 	}
+       else
+ 	{
+-	  delete loop_vinfo;
++	  /* If current analysis shows LOOP is unable to vectorize, loop_vinfo
++	     will be deleted.  If LOOP is under ldist analysis, backup it before
++	     it is deleted and return it if all modes are analyzed and still
++	     fail to vectorize.  */
++	  if (loop->processing_ldist && (mode_i == vector_modes.length ()
++	      || autodetected_vector_mode == VOIDmode))
++	    {
++	      fail_loop_vinfo = loop_vinfo;
++	    }
++	  else
++	    {
++	      delete loop_vinfo;
++	    }
+ 	  if (fatal)
+ 	    {
+ 	      gcc_checking_assert (first_loop_vinfo == NULL);
+@@ -2721,6 +2741,14 @@ vect_analyze_loop (struct loop *loop, ve
+       return first_loop_vinfo;
+     }
+ 
++  /* Return loop_vinfo for ldist if loop is unvectorizable.  */
++  if (loop->processing_ldist && (mode_i == vector_modes.length ()
++      || autodetected_vector_mode == VOIDmode))
++    {
++      loop->aux = (loop_vec_info) fail_loop_vinfo;
++      return fail_loop_vinfo;
++    }
++
+   return opt_loop_vec_info::propagate_failure (res);
+ }
+ 
+diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+--- a/gcc/tree-vectorizer.h	2021-12-02 14:17:26.569356600 +0800
++++ b/gcc/tree-vectorizer.h	2021-12-02 14:33:29.454408100 +0800
+@@ -292,6 +292,21 @@ struct vec_info_shared {
+   vec<ddr_p> ddrs;
+ };
+ 
++/* Information about offset in vectorizable_load.  */
++struct offset_info {
++  tree offset;
++  tree byte_offset;
++  tree dataref_offset;
++};
++
++/* Information about vectype in vectorizable_load.  */
++struct vectype_info {
++  tree vectype;
++  tree ltype;
++  tree lvectype;
++  tree ref_type;
++};
++
+ /* Vectorizer state common between loop and basic-block vectorization.  */
+ struct vec_info {
+   typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
+@@ -329,6 +344,14 @@ struct vec_info {
+      stmt in the chain.  */
+   auto_vec<stmt_vec_info> grouped_stores;
+ 
++  /* All interleaving chains of loads, represented by the first
++     stmt in the chain.  */
++  auto_vec<stmt_vec_info> grouped_loads;
++
++  /* All interleaving chains of stores (before transposed), represented by all
++     stmt in the chain.  */
++  auto_vec<vec<stmt_vec_info> > scalar_stores;
++
+   /* Cost data used by the target cost model.  */
+   void *target_cost_data;
+ 
+@@ -694,6 +717,8 @@ typedef struct _loop_vec_info : public v
+ #define LOOP_VINFO_CHECK_NONZERO(L)        (L)->check_nonzero
+ #define LOOP_VINFO_LOWER_BOUNDS(L)         (L)->lower_bounds
+ #define LOOP_VINFO_GROUPED_STORES(L)       (L)->grouped_stores
++#define LOOP_VINFO_GROUPED_LOADS(L)        (L)->grouped_loads
++#define LOOP_VINFO_SCALAR_STORES(L)        (L)->scalar_stores
+ #define LOOP_VINFO_SLP_INSTANCES(L)        (L)->slp_instances
+ #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
+ #define LOOP_VINFO_REDUCTIONS(L)           (L)->reductions
+@@ -755,6 +780,22 @@ typedef struct _bb_vec_info : public vec
+   basic_block bb;
+   gimple_stmt_iterator region_begin;
+   gimple_stmt_iterator region_end;
++
++  /* True, if bb_vinfo can goto vect_analyze_slp.  */
++  bool before_slp;
++
++  /* True, if bb_vinfo is a transposed version.  */
++  bool transposed;
++
++  /* The cost of the scalar iterations.  */
++  int scalar_cost;
++
++  /* The cost of the vector prologue and epilogue, including peeled
++     iterations and set-up code.  */
++  int vec_outside_cost;
++
++  /* The cost of the vector loop body.  */
++  int vec_inside_cost;
+ } *bb_vec_info;
+ 
+ #define BB_VINFO_BB(B)               (B)->bb
+@@ -763,6 +804,13 @@ typedef struct _bb_vec_info : public vec
+ #define BB_VINFO_DATAREFS(B)         (B)->shared->datarefs
+ #define BB_VINFO_DDRS(B)             (B)->shared->ddrs
+ #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data
++#define BB_VINFO_GROUPED_LOADS(B)    (B)->grouped_loads
++#define BB_VINFO_SCALAR_STORES(B)    (B)->scalar_stores
++#define BB_VINFO_VEC_OUTSIDE_COST(B) (B)->vec_outside_cost
++#define BB_VINFO_VEC_INSIDE_COST(B)  (B)->vec_inside_cost
++#define BB_VINFO_SCALAR_COST(B)      (B)->scalar_cost
++#define BB_VINFO_SLP_TRANSPOSED(B)   (B)->transposed
++#define BB_VINFO_BEFORE_SLP(B)       (B)->before_slp
+ 
+ static inline bb_vec_info
+ vec_info_for_bb (basic_block bb)
+@@ -997,6 +1045,16 @@ struct _stmt_vec_info {
+   stmt_vec_info next_element;
+   /* The size of the group.  */
+   unsigned int size;
++
++  /* The groupsize before transposed.  */
++  unsigned int size_before_transpose;
++  /* If true, the stmt_info is slp transposed.  */
++  bool slp_transpose;
++
++  /* Mark the group store number for rebuild interleaving chain
++     during transpose phase.  Value -1 represents unable to transpose.  */
++  int group_number;
++
+   /* For stores, number of stores from this group seen. We vectorize the last
+      one.  */
+   unsigned int store_count;
+@@ -1202,6 +1260,12 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_
+ #define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
+ #define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
+ 
++#define DR_GROUP_SLP_TRANSPOSE(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->slp_transpose)
++#define DR_GROUP_SIZE_TRANS(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->size_before_transpose)
++#define DR_GROUP_NUMBER(S) \
++  (gcc_checking_assert ((S)->dr_aux.dr), (S)->group_number)
+ #define DR_GROUP_FIRST_ELEMENT(S) \
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
+ #define DR_GROUP_NEXT_ELEMENT(S) \
+@@ -1585,6 +1649,17 @@ vect_get_scalar_dr_size (dr_vec_info *dr
+   return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
+ }
+ 
++/* Compare two unsigned int A and B.
++   Sorting them from Small to Large.  */
++
++static inline int
++cmp_for_group_num (const void *a_, const void *b_)
++{
++  unsigned int a = *(unsigned int *)const_cast<void *>(a_);
++  unsigned int b = *(unsigned int *)const_cast<void *>(b_);
++  return a < b ? -1 : 1;
++}
++
+ /* Source location + hotness information. */
+ extern dump_user_location_t vect_location;
+ 
+@@ -1737,6 +1812,9 @@ extern bool vect_grouped_load_supported
+ extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
+ extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info,
+                                     gimple_stmt_iterator *, vec<tree> *);
++extern void vect_transpose_store_chain (vec<tree>, unsigned int, unsigned int,
++					stmt_vec_info, gimple_stmt_iterator *,
++					vec<tree> *);
+ extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *,
+ 				    tree *, enum dr_alignment_support, tree,
+                                     struct loop **);
+@@ -1800,6 +1878,7 @@ extern void vect_free_slp_instance (slp_
+ extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
+ 					  gimple_stmt_iterator *, poly_uint64,
+ 					  slp_instance, bool, unsigned *);
++extern bool vect_transform_back_slp_grouped_stores (bb_vec_info, stmt_vec_info);
+ extern bool vect_slp_analyze_operations (vec_info *);
+ extern void vect_schedule_slp (vec_info *);
+ extern opt_result vect_analyze_slp (vec_info *, unsigned);
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2021-12-02 14:17:26.645739200 +0800
++++ b/gcc/tree-vect-slp.c	2021-12-02 20:34:02.118565100 +0800
+@@ -2366,11 +2366,13 @@ vect_analyze_slp_instance (vec_info *vin
+ 
+   /* For basic block SLP, try to break the group up into multiples of the
+      vector size.  */
++  bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (vinfo);
+   unsigned HOST_WIDE_INT const_nunits;
+   if (is_a <bb_vec_info> (vinfo)
+       && STMT_VINFO_GROUPED_ACCESS (stmt_info)
+       && DR_GROUP_FIRST_ELEMENT (stmt_info)
+-      && nunits.is_constant (&const_nunits))
++      && nunits.is_constant (&const_nunits)
++	  && !bb_vinfo->transposed)
+     {
+       /* We consider breaking the group only on VF boundaries from the existing
+ 	 start.  */
+@@ -2407,6 +2409,872 @@ vect_analyze_slp_instance (vec_info *vin
+   return false;
+ }
+ 
++static inline bool
++is_const_assign (stmt_vec_info store_elem)
++{
++  if (store_elem == NULL)
++    {
++      gcc_unreachable ();
++    }
++  return TREE_CONSTANT (gimple_assign_rhs1 (store_elem->stmt));
++}
++
++/* Push inits to INNERMOST_INITS and check const assign.  */
++
++static bool
++record_innermost (vec<tree> &innermost_inits,
++		  vec<tree> &innermost_offsets,
++		  stmt_vec_info first_element)
++{
++  if (!first_element)
++    {
++      return false;
++    }
++  stmt_vec_info next_info = first_element;
++  while (next_info)
++    {
++      /* No need to vectorize constant assign in a transposed version.  */
++      if (is_const_assign (next_info))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			      "no need to vectorize, store is const assign: %G",
++			      next_info->stmt);
++	    }
++	  return false;
++	}
++      innermost_inits.safe_push (STMT_VINFO_DR_INIT (next_info));
++      innermost_offsets.safe_push (STMT_VINFO_DR_OFFSET (next_info));
++      next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++    }
++  return true;
++}
++
++/* Compare inits to INNERMOST_INITS, return FALSE if inits do not match
++   the first grouped_store.  And check const assign meanwhile.  */
++
++static bool
++compare_innermost (vec<tree> &innermost_inits,
++		   vec<tree> &innermost_offsets,
++		   stmt_vec_info first_element)
++{
++  if (!first_element || innermost_inits.length () != first_element->size)
++    {
++      return false;
++    }
++  stmt_vec_info next_info = first_element;
++  unsigned int i = 0;
++  while (next_info)
++    {
++      if (is_const_assign (next_info))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			      "no need to vectorize, store is const assign: %G",
++			      next_info->stmt);
++	    }
++	  return false;
++	}
++      if (innermost_inits[i] != STMT_VINFO_DR_INIT (next_info)
++	  || innermost_offsets[i] != STMT_VINFO_DR_OFFSET (next_info))
++	{
++	  return false;
++	}
++      next_info = DR_GROUP_NEXT_ELEMENT (next_info);
++      i++;
++    }
++  return true;
++}
++
++/* Check if grouped stores are of same type.
++   input: t1/t2 = TREE_TYPE (gimple_assign_lhs (first_element->stmt))
++   output: 0 if same, 1 or -1 else.  */
++
++static int
++tree_type_cmp (const tree t1, const tree t2)
++{
++  gcc_checking_assert (t1 != NULL || t2 != NULL);
++  if (t1 != t2)
++    {
++      if (TREE_CODE (t1) != TREE_CODE (t2))
++	{
++	  return TREE_CODE (t1) > TREE_CODE (t2) ? 1 : -1;
++	}
++      if (TYPE_UNSIGNED (t1) != TYPE_UNSIGNED (t2))
++	{
++	  return TYPE_UNSIGNED (t1) > TYPE_UNSIGNED (t2) ? 1 : -1;
++	}
++      if (TYPE_PRECISION (t1) != TYPE_PRECISION (t2))
++	{
++	  return TYPE_PRECISION (t1) > TYPE_PRECISION (t2) ? 1 : -1;
++	}
++    }
++  return 0;
++}
++
++static int
++check_same_store_type (stmt_vec_info grp1, stmt_vec_info grp2)
++{
++  if (grp1 == grp2)
++    {
++      return 0;
++    }
++  if (grp1->size != grp2->size)
++    {
++      return grp1->size > grp2->size ? 1 : -1;
++    }
++  tree lhs1 = gimple_assign_lhs (grp1->stmt);
++  tree lhs2 = gimple_assign_lhs (grp2->stmt);
++  if (TREE_CODE (lhs1) != TREE_CODE (lhs2))
++    {
++      return TREE_CODE (lhs1) > TREE_CODE (lhs2) ? 1 : -1;
++    }
++  tree grp_type1 = TREE_TYPE (gimple_assign_lhs (grp1->stmt));
++  tree grp_type2 = TREE_TYPE (gimple_assign_lhs (grp2->stmt));
++  int cmp = tree_type_cmp (grp_type1, grp_type2);
++  return cmp;
++}
++
++/* Sort grouped stores according to group_size and store_type.
++   output: 0 if same, 1 if grp1 > grp2, -1 otherwise.  */
++
++static int
++grouped_store_cmp (const void *grp1_, const void *grp2_)
++{
++  stmt_vec_info grp1 = *(stmt_vec_info *)const_cast<void *>(grp1_);
++  stmt_vec_info grp2 = *(stmt_vec_info *)const_cast<void *>(grp2_);
++  return check_same_store_type (grp1, grp2);
++}
++
++/* Transpose analysis is only conducted on AArch64 machines with NEON register
++   using interleaving instruction, e.g. st4, zip, etc.  Those instructions
++   require simd length 128 bits or 64 bits.  */
++
++static inline bool
++check_filling_reg (unsigned int grp_size, stmt_vec_info current_element)
++{
++  if (grp_size == 0)
++    {
++      return false;
++    }
++  /* If the gimple STMT was already vectorized in vect pass, it's unable to
++     conduct transpose analysis, skip it.  */
++  if (TREE_CODE (TREE_TYPE (gimple_get_lhs (current_element->stmt)))
++      == VECTOR_TYPE || TREE_CODE (TREE_TYPE
++      (gimple_assign_rhs1 (current_element->stmt))) == VECTOR_TYPE)
++    {
++      return false;
++    }
++  unsigned int store_precision = TYPE_PRECISION (TREE_TYPE
++				 (gimple_get_lhs (current_element->stmt)));
++  return store_precision != 0 && pow2p_hwi (grp_size)
++	  && (grp_size * store_precision % 128 == 0
++	  || grp_size * store_precision == 64);
++}
++
++/* Check if previous groups are suitable to transpose, if not, set their
++   group number to -1, reduce grp_num and clear current_groups.
++   Otherwise, just clear current_groups.  */
++
++static void
++check_and_clear_groups (vec<stmt_vec_info> current_groups,
++			unsigned int &grp_num)
++{
++  stmt_vec_info first_element;
++  if (current_groups.length () == 1
++      || (current_groups.length () != 0
++	  && !pow2p_hwi (current_groups.length ())))
++    {
++      while (current_groups.length () != 0)
++	{
++	  first_element = current_groups.pop ();
++	  first_element->group_number = -1;
++	}
++      --grp_num;
++    }
++  else
++    {
++      while (current_groups.length ())
++	{
++	  current_groups.pop ();
++	}
++    }
++}
++
++/* Set grouped_stores with similar MEM_REF to the same group and mark their
++   grp_num.  Groups with same grp_num consist the minimum unit to analyze
++   transpose.  Return num of such units.  */
++
++static unsigned
++vect_prepare_transpose (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info current_element = NULL;
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  unsigned int grp_num = 0;
++  /* Use arrays to record MEM_REF data in different GROUPED_STORES.  */
++  auto_vec<tree> innermost_inits;
++  auto_vec<tree> innermost_offsets;
++
++  /* A set of stmt_vec_info with same store type.  Analyze them if their size
++     is suitable to transpose.  */
++  auto_vec<stmt_vec_info> current_groups;
++
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, current_element)
++    {
++      /* Compare current grouped_store to the first one if first_element exists,
++	 push current_element to current_groups if they are similar on innermost
++	 behavior of MEM_REF.  */
++      if (first_element != NULL
++	  && !check_same_store_type (first_element, current_element)
++	  && compare_innermost (innermost_inits, innermost_offsets,
++				current_element))
++	{
++	  current_groups.safe_push (current_element);
++	  current_element->group_number = grp_num;
++	  /* If current_element is the last element in grouped_stores, continue
++	     will exit the loop and leave the last group unanalyzed.  */
++	  if (i == bb_vinfo->grouped_stores.length () - 1)
++	    {
++	      check_and_clear_groups (current_groups, grp_num);
++	    }
++	  continue;
++	}
++      check_and_clear_groups (current_groups, grp_num);
++      innermost_inits.release ();
++      innermost_offsets.release ();
++      /* Beginning of a new group to analyze whether they are able to consist
++	 a unit to conduct transpose analysis.  */
++      first_element = NULL;
++      if ((TREE_CODE (gimple_get_lhs (current_element->stmt)) == ARRAY_REF)
++	  && (check_filling_reg (current_element->size, current_element)
++	  && record_innermost (innermost_inits, innermost_offsets,
++			       current_element)))
++	{
++	  first_element = current_element;
++	  current_groups.safe_push (current_element);
++	  current_element->group_number = ++grp_num;
++	  if (i == bb_vinfo->grouped_stores.length () - 1)
++	    {
++	      check_and_clear_groups (current_groups, grp_num);
++	    }
++	  continue;
++	}
++      current_element->group_number = -1;
++    }
++  return grp_num;
++}
++
++/* Transpose analysis is only conducted on AArch64 machines with NEON register
++   using interleaving instruction, e.g. st4, zip, etc.  */
++
++static bool
++check_aarch64 ()
++{
++  auto_vector_modes vector_modes;
++  auto_vector_modes aarch_modes;
++  targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
++  aarch_modes.safe_push (V16QImode);
++  aarch_modes.safe_push (V8QImode);
++  aarch_modes.safe_push (V4HImode);
++  aarch_modes.safe_push (V2SImode);
++  if (vector_modes.length () != aarch_modes.length ())
++    {
++       return false;
++    }
++  for (unsigned i = 0; i < vector_modes.length (); i++)
++    {
++      if (vector_modes[i] != aarch_modes[i])
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Return a flag to transpose grouped stores before building slp tree.
++   Add bool may_transpose in class vec_info.  */
++
++static bool
++vect_may_transpose (bb_vec_info bb_vinfo)
++{
++  if (!check_aarch64 ())
++    {
++      return false;
++    }
++  if (bb_vinfo->grouped_stores.length () < 2)
++    {
++      return false;
++    }
++  DUMP_VECT_SCOPE ("analyze if grouped stores may transpose to slp");
++  /* Sort grouped_stores according to size and type for function
++     vect_prepare_transpose ().  */
++  bb_vinfo->grouped_stores.qsort (grouped_store_cmp);
++
++  int groups = vect_prepare_transpose (bb_vinfo);
++  if (dump_enabled_p ())
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "%d groups to analyze transposed slp.\n", groups);
++  return groups != 0;
++}
++
++/* Get the base address of STMT_INFO.  */
++
++static tree
++get_op_base_address (stmt_vec_info stmt_info)
++{
++  struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
++  tree op = DR_BASE_ADDRESS (dr);
++  while (TREE_OPERAND_LENGTH (op) > 0)
++    {
++      op = TREE_OPERAND (op, 0);
++    }
++  return op;
++}
++
++/* Compare the UID of the two stmt_info STMTINFO_A and STMTINFO_B.
++   Sorting them from Small to Large.  */
++
++static int
++dr_group_cmp (const void *stmtinfo_a_, const void *stmtinfo_b_)
++{
++  stmt_vec_info stmtinfo_a
++	= *(stmt_vec_info *) const_cast<void *> (stmtinfo_a_);
++  stmt_vec_info stmtinfo_b
++	= *(stmt_vec_info *) const_cast<void *> (stmtinfo_b_);
++
++  /* Stabilize sort.  */
++  if (stmtinfo_a == stmtinfo_b)
++    {
++      return 0;
++    }
++  return gimple_uid (stmtinfo_a->stmt) < gimple_uid (stmtinfo_b->stmt) ? -1 : 1;
++}
++
++/* Find the grouped loads which are required to merge.  */
++
++static auto_vec<stmt_vec_info>
++vect_slp_grouped_load_find (bb_vec_info bb_vinfo, vec<bool> &visited)
++{
++  unsigned int i = 0;
++  stmt_vec_info merge_first_element = NULL;
++  stmt_vec_info first_element = NULL;
++  auto_vec<stmt_vec_info> res;
++  tree opa = NULL;
++  unsigned int grp_size_a = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, i, first_element)
++    {
++      if (visited[i])
++	{
++	  continue;
++	}
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || !pow2p_hwi (DR_GROUP_SIZE (first_element)))
++	{
++	  if (!visited[i] && merge_first_element == NULL)
++	    {
++	      visited[i] = true;
++	      res.safe_push (first_element);
++	      return res;
++	    }
++	}
++      if (merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	  opa = get_op_base_address (first_element);
++	  grp_size_a = DR_GROUP_SIZE (first_element);
++	  res.safe_push (first_element);
++	  visited[i] = true;
++	  continue;
++	}
++
++      /* If the two first elements are of the same base address and group size,
++	 these two grouped loads need to be merged.  */
++      tree opb = get_op_base_address (first_element);
++      unsigned int grp_size_b = DR_GROUP_SIZE (first_element);
++      if (opa == opb && grp_size_a == grp_size_b)
++	{
++	  res.safe_push (first_element);
++	  visited[i] = true;
++	}
++    }
++  return res;
++}
++
++/* Merge the grouped loads that are found from
++   vect_slp_grouped_load_find ().  */
++
++static stmt_vec_info
++vect_slp_grouped_load_merge (vec<stmt_vec_info> res)
++{
++  stmt_vec_info stmt_info = res[0];
++  if (res.length () == 1)
++    {
++      return stmt_info;
++    }
++  unsigned int i = 0;
++  unsigned int size = 0;
++  unsigned int new_group_size = 0;
++  stmt_vec_info first_element = NULL;
++  stmt_vec_info merge_first_element = NULL;
++  stmt_vec_info last_element = NULL;
++  FOR_EACH_VEC_ELT (res, i, first_element)
++    {
++      if (merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	  last_element = merge_first_element;
++	  size = DR_GROUP_SIZE (merge_first_element);
++	}
++      new_group_size += size;
++      if (last_element != first_element
++	  && !DR_GROUP_NEXT_ELEMENT (last_element))
++	{
++	  DR_GROUP_NEXT_ELEMENT (last_element) = first_element;
++	}
++      for (stmt_info = first_element; stmt_info;
++	  stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  DR_GROUP_FIRST_ELEMENT (stmt_info) = merge_first_element;
++	  /* Store the gap from the previous member of the group.  If there is
++	     no gap in the access, DR_GROUP_GAP is always 1.  */
++	  DR_GROUP_GAP (stmt_info) = 1;
++	  DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info);
++	  last_element = stmt_info;
++	}
++    }
++  DR_GROUP_SIZE (merge_first_element) = new_group_size;
++  DR_GROUP_SLP_TRANSPOSE (merge_first_element) = true;
++  /* For the first element of a interleaving chain,
++     when there is no gap, this gap should be 0.  */
++  DR_GROUP_GAP (merge_first_element) = 0;
++  DR_GROUP_NEXT_ELEMENT (last_element) = NULL;
++  return merge_first_element;
++}
++
++/* Merge the grouped loads.  */
++
++static bool
++vect_merge_slp_grouped_loads (bb_vec_info bb_vinfo)
++{
++  if (bb_vinfo->grouped_loads.length () <= 0)
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "The number of grouped loads is 0.\n");
++	}
++      return false;
++    }
++  bb_vinfo->grouped_loads.qsort (dr_group_cmp);
++  auto_vec<bool> visited (bb_vinfo->grouped_loads.length ());
++  auto_vec<stmt_vec_info> grouped_loads_merge;
++  for (unsigned int i = 0; i < bb_vinfo->grouped_loads.length (); i++)
++    {
++      visited.safe_push (false);
++    }
++  while (1)
++    {
++      /* Find grouped loads which are required to merge.  */
++      auto_vec<stmt_vec_info> res
++		= vect_slp_grouped_load_find (bb_vinfo, visited);
++      if (res.is_empty ())
++	{
++	  break;
++	}
++      /* Merge the required grouped loads into one group.  */
++      grouped_loads_merge.safe_push (vect_slp_grouped_load_merge (res));
++    }
++  if (grouped_loads_merge.length () == bb_vinfo->grouped_loads.length ())
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "No grouped loads need to be merged.\n");
++	}
++      return false;
++    }
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Merging grouped loads successfully.\n");
++    }
++  BB_VINFO_GROUPED_LOADS (bb_vinfo).release ();
++  for (unsigned int i = 0; i < grouped_loads_merge.length (); i++)
++    {
++      BB_VINFO_GROUPED_LOADS (bb_vinfo).safe_push (grouped_loads_merge[i]);
++    }
++  return true;
++}
++
++/* Find the first elements of the grouped stores
++   which are required to transpose.  */
++
++static auto_vec<stmt_vec_info>
++vect_slp_grouped_store_find (bb_vec_info bb_vinfo, vec<bool> &visited)
++{
++  auto_vec<stmt_vec_info> res;
++  stmt_vec_info first_element = NULL;
++  stmt_vec_info merge_first_element = NULL;
++  unsigned int k = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      if (visited[k])
++	{
++	  continue;
++	}
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || first_element->group_number == -1)
++	{
++	  if (!visited[k] && merge_first_element == NULL)
++	    {
++	      visited[k] = true;
++	      res.safe_push (first_element);
++	      return res;
++	    }
++	}
++      if (first_element->group_number != -1
++	  && merge_first_element == NULL)
++	{
++	  merge_first_element = first_element;
++	}
++      if (merge_first_element->group_number == first_element->group_number)
++	{
++	  visited[k] = true;
++	  res.safe_push (first_element);
++	}
++    }
++  return res;
++}
++
++/* Transpose and merge the grouped stores.  */
++
++static stmt_vec_info
++vect_slp_grouped_store_transform (vec<stmt_vec_info> res_queue)
++{
++  stmt_vec_info stmt_info = res_queue[0];
++  if (res_queue.length () == 1)
++    {
++      return stmt_info;
++    }
++  stmt_vec_info rearrange_first_element = stmt_info;
++  stmt_vec_info last_element = rearrange_first_element;
++  DR_GROUP_FIRST_ELEMENT (rearrange_first_element) = rearrange_first_element;
++
++  unsigned int size = DR_GROUP_SIZE (rearrange_first_element);
++  unsigned int new_group_size = size * res_queue.length ();
++
++  while (!res_queue.is_empty ())
++    {
++      stmt_vec_info stmt_info = res_queue[0];
++      res_queue.ordered_remove (0);
++      if (DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  res_queue.safe_push (DR_GROUP_NEXT_ELEMENT (stmt_info));
++	}
++      DR_GROUP_FIRST_ELEMENT (stmt_info) = rearrange_first_element;
++      DR_GROUP_NEXT_ELEMENT (last_element) = stmt_info;
++      /* Store the gap from the previous member of the group.  If there is no
++	 gap in the access, DR_GROUP_GAP is always 1.  */
++      DR_GROUP_GAP (stmt_info) = 1;
++      DR_GROUP_SIZE_TRANS (stmt_info) = DR_GROUP_SIZE (stmt_info);
++      last_element = stmt_info;
++    }
++
++  DR_GROUP_NEXT_ELEMENT (last_element) = NULL;
++  /* For the first element of a interleaving chain,
++     when there is no gap, this gap should be 0.  */
++  DR_GROUP_GAP (rearrange_first_element) = 0;
++  DR_GROUP_SIZE (rearrange_first_element) = new_group_size;
++  DR_GROUP_SLP_TRANSPOSE (rearrange_first_element) = true;
++  return rearrange_first_element;
++}
++
++static void
++get_scalar_stores (bb_vec_info bb_vinfo)
++{
++  unsigned int k = 0;
++  stmt_vec_info first_element = NULL;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      /* Filter the grouped store which is unnecessary for transposing.  */
++      if (!STMT_VINFO_GROUPED_ACCESS (first_element)
++	  || first_element->group_number == -1)
++	{
++	  continue;
++	}
++      vec<stmt_vec_info> tmp_scalar_store;
++      tmp_scalar_store.create (DR_GROUP_SIZE (first_element));
++      for (stmt_vec_info stmt_info = first_element; stmt_info;
++	  stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
++	{
++	  tmp_scalar_store.safe_push (stmt_info);
++	}
++      BB_VINFO_SCALAR_STORES (bb_vinfo).safe_push (tmp_scalar_store);
++    }
++}
++
++/* Transform the grouped stores.  */
++
++static bool
++vect_transform_slp_grouped_stores (bb_vec_info bb_vinfo)
++{
++  if (bb_vinfo->grouped_stores.length () <= 0)
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			    "No grouped stores need to be merged.\n");
++	}
++      return false;
++    }
++
++  bb_vinfo->grouped_stores.qsort (dr_group_cmp);
++  auto_vec<stmt_vec_info> grouped_stores_merge;
++  auto_vec<bool> visited (bb_vinfo->grouped_stores.length ());
++  unsigned int i = 0;
++  for (i = 0; i < bb_vinfo->grouped_stores.length (); i++)
++    {
++      visited.safe_push (false);
++    }
++
++  /* Get scalar stores for the following transposition recovery.  */
++  get_scalar_stores (bb_vinfo);
++
++  while (1)
++    {
++      /* Find grouped stores which are required to transpose and merge.  */
++      auto_vec<stmt_vec_info> res
++		= vect_slp_grouped_store_find (bb_vinfo, visited);
++      if (res.is_empty ())
++	{
++	  break;
++	}
++      /* Transpose and merge the required grouped stores into one group.  */
++      grouped_stores_merge.safe_push
++		(vect_slp_grouped_store_transform (res));
++    }
++
++  BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
++  for (i = 0; i < grouped_stores_merge.length (); i++)
++    {
++      BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_merge[i]);
++    }
++
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		       "Merging grouped stores successfully.\n");
++    }
++  return true;
++}
++
++/* A helpful function of vect_transform_back_slp_grouped_stores ().  */
++
++static auto_vec<stmt_vec_info>
++vect_transform_back_slp_grouped_store (bb_vec_info bb_vinfo,
++				       stmt_vec_info first_stmt_info)
++{
++  auto_vec<stmt_vec_info> grouped_stores_split;
++  for (unsigned int i = 0; i < bb_vinfo->scalar_stores.length (); i++)
++    {
++      vec<stmt_vec_info> scalar_tmp = bb_vinfo->scalar_stores[i];
++      if (scalar_tmp.length () > 1
++	  && scalar_tmp[0]->group_number != first_stmt_info->group_number)
++	{
++	  continue;
++	}
++      stmt_vec_info cur_stmt_info = NULL;
++      stmt_vec_info cur_first_stmt_info = NULL;
++      stmt_vec_info last_stmt_info = NULL;
++      unsigned int k = 0;
++      FOR_EACH_VEC_ELT (scalar_tmp, k, cur_stmt_info)
++	{
++	  if (k == 0)
++	    {
++	      cur_first_stmt_info = cur_stmt_info;
++	      last_stmt_info = cur_stmt_info;
++	    }
++	  DR_GROUP_FIRST_ELEMENT (cur_stmt_info) = cur_first_stmt_info;
++	  DR_GROUP_NEXT_ELEMENT (last_stmt_info) = cur_stmt_info;
++	  last_stmt_info = cur_stmt_info;
++	}
++      DR_GROUP_SIZE (cur_first_stmt_info) = k;
++      DR_GROUP_NEXT_ELEMENT (last_stmt_info) = NULL;
++      if (first_stmt_info != cur_first_stmt_info)
++	{
++	  DR_GROUP_SLP_TRANSPOSE (cur_first_stmt_info) = false;
++	  DR_GROUP_NUMBER (cur_first_stmt_info) = -1;
++	}
++      grouped_stores_split.safe_push (cur_first_stmt_info);
++    }
++  return grouped_stores_split;
++}
++
++/* Transform grouped store back.  */
++
++bool
++vect_transform_back_slp_grouped_stores (bb_vec_info bb_vinfo,
++					stmt_vec_info first_stmt_info)
++{
++  if (first_stmt_info->group_number == -1)
++    {
++      return true;
++    }
++  /* Transform back.  */
++  auto_vec<stmt_vec_info> grouped_stores_split
++	= vect_transform_back_slp_grouped_store (bb_vinfo, first_stmt_info);
++
++  /* Add the remaining grouped stores to grouped_stores_split.  */
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element)
++    {
++      if (first_element->group_number != first_stmt_info->group_number)
++	{
++	  grouped_stores_split.safe_push (first_element);
++	}
++    }
++  DR_GROUP_SLP_TRANSPOSE (first_stmt_info) = false;
++  DR_GROUP_NUMBER (first_stmt_info) = -1;
++  BB_VINFO_GROUPED_STORES (bb_vinfo).release ();
++  for (i = 0; i < grouped_stores_split.length (); i++)
++    {
++      BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (grouped_stores_split[i]);
++    }
++  return true;
++}
++
++/* Function check_for_slp_vectype
++
++   Restriction for grouped stores by checking their vectype.
++   If the vectype of the grouped store is changed, it need transform back.
++   If all grouped stores need to be transformed back, return FALSE.  */
++
++static bool
++check_for_slp_vectype (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info first_element = NULL;
++  unsigned int i = 0;
++  unsigned int count = 0;
++  auto_vec<stmt_vec_info> grouped_stores_check;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, i, first_element)
++    {
++      grouped_stores_check.safe_push (first_element);
++    }
++  FOR_EACH_VEC_ELT (grouped_stores_check, i, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element)
++	  && first_element->group_number != -1)
++	{
++	  unsigned int group_size_b
++			= DR_GROUP_SIZE_TRANS (first_element);
++	  tree vectype = STMT_VINFO_VECTYPE (first_element);
++	  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
++	  if (nunits.to_constant () > group_size_b)
++	    {
++	      count++;
++	      /* If the vectype is changed, this grouped store need
++		 to be transformed back.  */
++	      vect_transform_back_slp_grouped_stores (bb_vinfo, first_element);
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				  "No supported: only supported for"
++				  " group_size geq than nunits.\n");
++		}
++	    }
++	}
++    }
++  if (count == grouped_stores_check.length ())
++    {
++      return false;
++    }
++  return true;
++}
++
++/* Function check_for_dr_alignment
++
++   Check the alignment of the slp instance loads.
++   Return FALSE if a load cannot be vectorized.  */
++
++static bool
++check_for_dr_alignment (slp_instance instance)
++{
++  slp_tree node = NULL;
++  unsigned int i = 0;
++  FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), i, node)
++    {
++      stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
++      dr_vec_info *first_dr_info = STMT_VINFO_DR_INFO (first_stmt_info);
++      enum dr_alignment_support supportable_dr_alignment
++	= vect_supportable_dr_alignment (first_dr_info, false);
++      if (supportable_dr_alignment == dr_explicit_realign_optimized
++	  || supportable_dr_alignment == dr_explicit_realign)
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Initialize slp_transpose flag before transposing.  */
++
++static void
++init_stmt_info_slp_transpose (bb_vec_info bb_vinfo)
++{
++  stmt_vec_info first_element = NULL;
++  unsigned int k = 0;
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_stores, k, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element))
++	{
++	  DR_GROUP_SLP_TRANSPOSE (first_element) = false;
++	}
++    }
++  FOR_EACH_VEC_ELT (bb_vinfo->grouped_loads, k, first_element)
++    {
++      if (STMT_VINFO_GROUPED_ACCESS (first_element))
++	{
++	  DR_GROUP_SLP_TRANSPOSE (first_element) = false;
++	}
++    }
++}
++
++/* Check if the stmts can be transposed in SLP.  */
++
++static bool
++vect_analyze_transpose (bb_vec_info bb_vinfo)
++{
++  DUMP_VECT_SCOPE ("vect_analyze_transpose");
++
++  if (!vect_may_transpose (bb_vinfo))
++    {
++      return false;
++    }
++
++  /* For basic block SLP, try to merge the grouped stores and loads
++     into one group.  */
++  init_stmt_info_slp_transpose (bb_vinfo);
++  if (vect_transform_slp_grouped_stores (bb_vinfo)
++      && vect_merge_slp_grouped_loads (bb_vinfo))
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			  "Analysis succeeded with SLP transposed.\n");
++	}
++      return true;
++    }
++  if (dump_enabled_p ())
++    {
++      dump_printf_loc (MSG_NOTE, vect_location,
++		     "Analysis failed with SLP transposed.\n");
++    }
++  return false;
++}
+ 
+ /* Check if there are stmts in the loop can be vectorized using SLP.  Build SLP
+    trees of packed scalar stmts if SLP is possible.  */
+@@ -3067,7 +3935,11 @@ vect_bb_vectorization_profitable_p (bb_v
+ 
+   vec_outside_cost = vec_prologue_cost + vec_epilogue_cost;
+ 
+-  if (dump_enabled_p ())
++  BB_VINFO_VEC_INSIDE_COST (bb_vinfo) = vec_inside_cost;
++  BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo) = vec_outside_cost;
++  BB_VINFO_SCALAR_COST (bb_vinfo) = scalar_cost;
++
++  if (!unlimited_cost_model (NULL) && dump_enabled_p ())
+     {
+       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
+       dump_printf (MSG_NOTE, "  Vector inside of basic block cost: %d\n",
+@@ -3182,6 +4054,22 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
+ 
+   vect_pattern_recog (bb_vinfo);
+ 
++  /* Transpose grouped stores and loads for better vectorizable version.  */
++  if (bb_vinfo->transposed)
++    {
++      if (!vect_analyze_transpose (bb_vinfo))
++	{
++	  if (dump_enabled_p ())
++	    {
++	       dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			       "not vectorized: unhandled slp transposed in "
++			       "basic block.\n");
++	    }
++	  return false;
++	}
++    }
++  bb_vinfo->before_slp = true;
++
+   /* Check the SLP opportunities in the basic block, analyze and build SLP
+      trees.  */
+   if (!vect_analyze_slp (bb_vinfo, n_stmts))
+@@ -3197,6 +4085,20 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
+       return false;
+     }
+ 
++  /* Check if the vectype is suitable for SLP transposed.  */
++  if (bb_vinfo->transposed && !check_for_slp_vectype (bb_vinfo))
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "Failed to SLP transposed in the basic block.\n");
++	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "not vectorized: vectype is not suitable for "
++			   "SLP transposed in basic block.\n");
++	}
++      return false;
++    }
++
+   vect_record_base_alignments (bb_vinfo);
+ 
+   /* Analyze and verify the alignment of data references and the
+@@ -3229,6 +4131,26 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
+   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
+     return false;
+ 
++  /* Check if the alignment is suitable for SLP transposed.  */
++  if (bb_vinfo->transposed)
++    {
++      for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); i++)
++	{
++	  if (!check_for_dr_alignment (instance))
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "Failed to SLP transposed in the basic block.\n");
++		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			   "not vectorized: alignment is not suitable for "
++			   "SLP transposed in basic block.\n");
++		}
++	      return false;
++	    }
++	}
++    }
++
+   if (!vect_slp_analyze_operations (bb_vinfo))
+     {
+       if (dump_enabled_p ())
+@@ -3254,6 +4176,90 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
+   return true;
+ }
+ 
++static bool
++may_new_transpose_bbvinfo (bb_vec_info bb_vinfo_ori, bool res_ori)
++{
++  /* If the option turns off or the slp analysis is broken before
++     vect_analyze_slp, we don't try to analyze transposed SLP version.  */
++  if (!flag_tree_slp_transpose_vectorize
++       || !BB_VINFO_BEFORE_SLP (bb_vinfo_ori))
++    {
++      return false;
++    }
++  else
++    {
++      /* Caculate the cost of bb_vinfo_ori.  */
++      if (unlimited_cost_model (NULL))
++	{
++	  vect_bb_vectorization_profitable_p (bb_vinfo_ori);
++	}
++
++      /* If bb_vinfo_ori can't be vectorized
++	 or vec cost and scalar cost are not much difference,
++	 we try to new a bb_vinfo of the transposed version.  */
++      if (!res_ori
++	  || BB_VINFO_SCALAR_COST (bb_vinfo_ori)
++	  < 4 * (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori)
++	  + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori)))
++	{
++	  return true;
++	}
++      return false;
++  }
++}
++
++static bool
++may_chose_transpose_bbvinfo (bb_vec_info bb_vinfo_trans, bool res_trans,
++			    bb_vec_info bb_vinfo_ori, bool res_ori)
++{
++  /* Caculate the cost of bb_vinfo_trans.  */
++  if (unlimited_cost_model (NULL))
++    {
++      vect_bb_vectorization_profitable_p (bb_vinfo_trans);
++    }
++  int diff_bb_cost = -1;
++  int diff_bb_cost_trans = -1;
++  if (res_ori)
++    {
++      diff_bb_cost = BB_VINFO_SCALAR_COST (bb_vinfo_ori)
++		     - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_ori)
++		     - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_ori);
++    }
++  if (res_trans)
++    {
++      diff_bb_cost_trans = BB_VINFO_SCALAR_COST (bb_vinfo_trans)
++			   - BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans)
++			   - BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans);
++    }
++  /* If the cost of original version and transposed version
++     is smaller, bb_vinfo_ori is chosen.  */
++  if (!res_trans
++      || (res_ori && res_trans && diff_bb_cost >= diff_bb_cost_trans)
++      || (res_trans && BB_VINFO_SCALAR_COST (bb_vinfo_trans)
++      < (BB_VINFO_VEC_INSIDE_COST (bb_vinfo_trans)
++      + BB_VINFO_VEC_OUTSIDE_COST (bb_vinfo_trans))))
++    {
++      init_stmt_info_slp_transpose (bb_vinfo_ori);
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "Basic block part vectorized "
++			   "using original version.\n");
++	}
++      return false;
++    }
++  else
++    {
++      if (dump_enabled_p ())
++	{
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "Basic block part vectorized "
++			   "using transposed version.\n");
++	}
++      return true;
++   }
++}
++
+ /* Subroutine of vect_slp_bb.  Try to vectorize the statements between
+    REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
+    on success.  The region has N_STMTS statements and has the datarefs
+@@ -3266,6 +4272,8 @@ vect_slp_bb_region (gimple_stmt_iterator
+ 		    unsigned int n_stmts)
+ {
+   bb_vec_info bb_vinfo;
++  bb_vec_info bb_vinfo_ori;
++  bb_vec_info bb_vinfo_trans;
+   auto_vector_modes vector_modes;
+ 
+   /* Autodetect first vector size we try.  */
+@@ -3280,17 +4288,66 @@ vect_slp_bb_region (gimple_stmt_iterator
+     {
+       bool vectorized = false;
+       bool fatal = false;
+-      bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
++      bool res_bb_vinfo_ori = false;
++      bool res_bb_vinfo_trans = false;
++      bool transpose_bb = false;
+ 
++      /* New a bb_vinfo of the original version.  */
++      bb_vinfo_ori = new _bb_vec_info (region_begin, region_end, &shared);
+       bool first_time_p = shared.datarefs.is_empty ();
+-      BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
++      BB_VINFO_DATAREFS (bb_vinfo_ori) = datarefs;
+       if (first_time_p)
+-	bb_vinfo->shared->save_datarefs ();
++	{
++	  bb_vinfo_ori->shared->save_datarefs ();
++	}
++      else
++	{
++	  bb_vinfo_ori->shared->check_datarefs ();
++	}
++      bb_vinfo_ori->vector_mode = next_vector_mode;
++      bb_vinfo_ori->transposed = false;
++      bb_vinfo_ori->before_slp = false;
++
++      res_bb_vinfo_ori = vect_slp_analyze_bb_1 (bb_vinfo_ori, n_stmts, fatal);
++
++      if (may_new_transpose_bbvinfo (bb_vinfo_ori, res_bb_vinfo_ori))
++	{
++	  transpose_bb = true;
++	  bool fatal_trans = false;
++	  bb_vinfo_trans
++	    = new _bb_vec_info (region_begin, region_end, &shared);
++	  bool first_time_p = shared.datarefs.is_empty ();
++	  BB_VINFO_DATAREFS (bb_vinfo_trans) = datarefs;
++	  if (first_time_p)
++	    {
++	      bb_vinfo_trans->shared->save_datarefs ();
++	    }
++	  else
++	    {
++	      bb_vinfo_trans->shared->check_datarefs ();
++	    }
++	  bb_vinfo_trans->vector_mode = next_vector_mode;
++	  bb_vinfo_trans->transposed = true;
++
++	  res_bb_vinfo_trans
++	    = vect_slp_analyze_bb_1 (bb_vinfo_trans, n_stmts, fatal_trans);
++	  if (may_chose_transpose_bbvinfo (bb_vinfo_trans,res_bb_vinfo_trans,
++	      bb_vinfo_ori, res_bb_vinfo_ori))
++	    {
++	      bb_vinfo = bb_vinfo_trans;
++	      fatal = fatal_trans;
++	    }
++	  else
++	    {
++	      bb_vinfo = bb_vinfo_ori;
++	    }
++	}
+       else
+-	bb_vinfo->shared->check_datarefs ();
+-      bb_vinfo->vector_mode = next_vector_mode;
++	{
++	  bb_vinfo = bb_vinfo_ori;
++	}
+ 
+-      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
++      if ((res_bb_vinfo_ori || res_bb_vinfo_trans)
+ 	  && dbg_cnt (vect_slp))
+ 	{
+ 	  if (dump_enabled_p ())
+@@ -3342,7 +4399,12 @@ vect_slp_bb_region (gimple_stmt_iterator
+ 	    mode_i += 1;
+ 	  }
+ 
+-      delete bb_vinfo;
++      bb_vinfo = NULL;
++      delete bb_vinfo_ori;
++      if (transpose_bb)
++	{
++	  delete bb_vinfo_trans;
++	}
+ 
+       if (mode_i < vector_modes.length ()
+ 	  && VECTOR_MODE_P (autodetected_vector_mode)
+diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2021-12-02 14:32:40.394348600 +0800
++++ b/gcc/tree-vect-stmts.c	2021-12-02 20:41:58.256721300 +0800
+@@ -2231,6 +2231,268 @@ vect_get_store_rhs (stmt_vec_info stmt_i
+   gcc_unreachable ();
+ }
+ 
++/* Function VECTOR_VECTOR_COMPOSITION_TYPE
++
++   This function returns a vector type which can be composed with NETLS pieces,
++   whose type is recorded in PTYPE.  VTYPE should be a vector type, and has the
++   same vector size as the return vector.  It checks target whether supports
++   pieces-size vector mode for construction firstly, if target fails to, check
++   pieces-size scalar mode for construction further.  It returns NULL_TREE if
++   fails to find the available composition.
++
++   For example, for (vtype=V16QI, nelts=4), we can probably get:
++     - V16QI with PTYPE V4QI.
++     - V4SI with PTYPE SI.
++     - NULL_TREE.  */
++
++static tree
++vector_vector_composition_type (tree vtype, poly_uint64 nelts, tree *ptype)
++{
++  gcc_assert (VECTOR_TYPE_P (vtype));
++  gcc_assert (known_gt (nelts, 0U));
++
++  machine_mode vmode = TYPE_MODE (vtype);
++  if (!VECTOR_MODE_P (vmode))
++    return NULL_TREE;
++
++  poly_uint64 vbsize = GET_MODE_BITSIZE (vmode);
++  unsigned int pbsize;
++  if (constant_multiple_p (vbsize, nelts, &pbsize))
++    {
++      /* First check if vec_init optab supports construction from
++	 vector pieces directly.  */
++      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vtype));
++      poly_uint64 inelts = pbsize / GET_MODE_BITSIZE (elmode);
++      machine_mode rmode;
++      if (related_vector_mode (vmode, elmode, inelts).exists (&rmode)
++	  && (convert_optab_handler (vec_init_optab, vmode, rmode)
++	      != CODE_FOR_nothing))
++	{
++	  *ptype = build_vector_type (TREE_TYPE (vtype), inelts);
++	  return vtype;
++	}
++
++      /* Otherwise check if exists an integer type of the same piece size and
++	 if vec_init optab supports construction from it directly.  */
++      if (int_mode_for_size (pbsize, 0).exists (&elmode)
++	  && related_vector_mode (vmode, elmode, nelts).exists (&rmode)
++	  && (convert_optab_handler (vec_init_optab, rmode, elmode)
++	      != CODE_FOR_nothing))
++	{
++	  *ptype = build_nonstandard_integer_type (pbsize, 1);
++	  return build_vector_type (*ptype, nelts);
++	}
++    }
++
++  return NULL_TREE;
++}
++
++/* Find the loop with header containing STMT.  */
++
++static loop_p
++find_in_all_loops (gimple *stmt)
++{
++  loop_p loop;
++  FOR_EACH_LOOP (loop, 0)
++    {
++      basic_block bb = loop->header;
++      for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
++	   gsi_next (&gsi))
++	{
++	  if (gsi_stmt (gsi) == stmt)
++	    {
++	      return loop;
++	    }
++	}
++    }
++  return NULL;
++}
++
++/* Check succeedor BB, BB without load is regarded as empty BB.  Ignore empty
++   BB in DFS.  */
++
++static unsigned
++get_bb_insns (basic_block bb, vec<gimple *> &stmts)
++{
++  unsigned insns = 0;
++  for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
++       !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++      if (is_gimple_debug (stmt))
++	{
++	  continue;
++	}
++      if (is_gimple_assign (stmt) && gimple_has_mem_ops (stmt)
++	  && !gimple_has_volatile_ops (stmt)
++	  && TREE_CODE (gimple_assign_rhs1 (stmt)) == MEM_REF)
++	{
++	  insns++;
++	  stmts.safe_push (stmt);
++	}
++    }
++  return insns;
++}
++
++static bool
++check_same_base (vec<data_reference_p> *datarefs, data_reference_p dr)
++{
++  for (unsigned ui = 0; ui < datarefs->length (); ui++)
++    {
++      tree op1 = TREE_OPERAND (DR_BASE_OBJECT (dr), 0);
++      tree op2 = TREE_OPERAND (DR_BASE_OBJECT ((*datarefs)[ui]), 0);
++      if (TREE_CODE (op1) != TREE_CODE (op2))
++	{
++	  continue;
++	}
++      if (TREE_CODE (op1) == ADDR_EXPR)
++	{
++	  op1 = TREE_OPERAND (op1, 0);
++	  op2 = TREE_OPERAND (op2, 0);
++	}
++      enum tree_code code = TREE_CODE (op1);
++      switch (code)
++	{
++	case VAR_DECL:
++	  if (DECL_NAME (op1) == DECL_NAME (op2)
++	      && DR_IS_READ ((*datarefs)[ui]))
++	    {
++	      return true;
++	    }
++	  break;
++	case SSA_NAME:
++	  if (SSA_NAME_VERSION (op1) == SSA_NAME_VERSION (op2)
++	      && DR_IS_READ ((*datarefs)[ui]))
++	    {
++	      return true;
++	    }
++	  break;
++	default:
++	  break;
++	}
++    }
++  return false;
++}
++
++/* Iterate all load STMTS, if staisfying same base vectorized stmt, then return,
++   Otherwise, set false to SUCCESS.  */
++
++static void
++check_vec_use (loop_vec_info loop_vinfo, vec<gimple *> &stmts,
++		   gimple *stmt, bool &success)
++{
++  stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
++  if (stmt_vinfo == NULL)
++    {
++      success = false;
++      return;
++    }
++  if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
++    {
++      success = false;
++      return;
++    }
++  unsigned ui = 0;
++  gimple *candidate = NULL;
++  FOR_EACH_VEC_ELT (stmts, ui, candidate)
++    {
++      if (TREE_CODE (TREE_TYPE (gimple_get_lhs (candidate))) != VECTOR_TYPE)
++	{
++	  continue;
++	}
++      loop_p cand_loop = find_in_all_loops (candidate);
++      if (cand_loop == NULL)
++	{
++	  continue;
++	}
++      auto_vec<data_reference_p> datarefs;
++      tree res = find_data_references_in_bb
++		  (cand_loop, cand_loop->header, &datarefs);
++      if (res == chrec_dont_know)
++	{
++	  success = false;
++	  return;
++	}
++      if (check_same_base (&datarefs, stmt_vinfo->dr_aux.dr))
++	{
++	  return;
++	}
++    }
++  success = false;
++}
++
++/* Deep first search from present BB.  If succeedor has load STMTS,
++   stop further searching.  */
++
++static void
++dfs_check_bb (loop_vec_info loop_vinfo, basic_block bb, gimple *stmt,
++			bool &success, vec<basic_block> &visited_bbs)
++{
++  if (bb == cfun->cfg->x_exit_block_ptr)
++    {
++      success = false;
++      return;
++    }
++  if (!success || visited_bbs.contains (bb) || bb == loop_vinfo->loop->latch)
++    {
++      return;
++    }
++  visited_bbs.safe_push (bb);
++  auto_vec<gimple *> stmts;
++  unsigned insns = get_bb_insns (bb, stmts);
++  /* Empty BB.  */
++  if (insns == 0)
++    {
++      edge e;
++      edge_iterator ei;
++      FOR_EACH_EDGE (e, ei, bb->succs)
++	{
++	  dfs_check_bb (loop_vinfo, e->dest, stmt, success, visited_bbs);
++	  if (!success)
++	    {
++	      return;
++	    }
++	}
++      return;
++    }
++  /* Non-empty BB.  */
++  check_vec_use (loop_vinfo, stmts, stmt, success);
++}
++
++/* For grouped store, if all succeedors of present BB have vectorized load
++   from same base of store.  If so, set memory_access_type using
++   VMAT_CONTIGUOUS_PERMUTE instead of VMAT_LOAD_STORE_LANES.  */
++
++static bool
++conti_perm (gimple *stmt, loop_vec_info loop_vinfo)
++{
++  if (gimple_code (stmt) != GIMPLE_ASSIGN)
++    {
++      return false;
++    }
++  stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (stmt);
++  if (DR_IS_READ (stmt_vinfo->dr_aux.dr))
++    {
++      return false;
++    }
++  tree lhs = gimple_get_lhs (stmt);
++  if (TREE_CODE (lhs) != ARRAY_REF)
++    {
++      return false;
++    }
++  basic_block bb = stmt->bb;
++  bool success = true;
++  auto_vec<basic_block> visited_bbs;
++  visited_bbs.safe_push (bb);
++  edge e;
++  edge_iterator ei;
++  FOR_EACH_EDGE (e, ei, bb->succs)
++    {
++      dfs_check_bb (loop_vinfo, e->dest, stmt, success, visited_bbs);
++    }
++  return success;
++}
++
+ /* A subroutine of get_load_store_type, with a subset of the same
+    arguments.  Handle the case where STMT_INFO is part of a grouped load
+    or store.
+@@ -2386,11 +2648,14 @@ get_group_load_store_type (stmt_vec_info
+ 	    }
+ 
+ 	  /* If that fails, try using permuting loads.  */
+-	  if (*memory_access_type == VMAT_ELEMENTWISE
+-	      && (vls_type == VLS_LOAD
+-		  ? vect_grouped_load_supported (vectype, single_element_p,
+-						 group_size)
+-		  : vect_grouped_store_supported (vectype, group_size)))
++	  if ((*memory_access_type == VMAT_ELEMENTWISE
++	       || (!loop_vinfo->loop->processing_ldist
++		   && *memory_access_type == VMAT_LOAD_STORE_LANES
++		   && conti_perm (stmt_info->stmt, loop_vinfo)))
++	       && (vls_type == VLS_LOAD
++		   ? vect_grouped_load_supported (vectype, single_element_p,
++						  group_size)
++		   : vect_grouped_store_supported (vectype, group_size)))
+ 	    {
+ 	      *memory_access_type = VMAT_CONTIGUOUS_PERMUTE;
+ 	      overrun_p = would_overrun_p;
+@@ -6419,6 +6684,146 @@ get_group_alias_ptr_type (stmt_vec_info
+   return reference_alias_ptr_type (DR_REF (first_dr));
+ }
+ 
++/* Function vect_permute_store_chains
++
++    Call function vect_permute_store_chain ().
++    Given a chain of interleaved stores in DR_CHAIN, generate
++    interleave_high/low stmts to reorder the data correctly for the stores.
++    Return the final references for stores in RESULT_CHAIN.  */
++
++static void
++vect_permute_store_chains (vec<tree> dr_chain, unsigned int num_each,
++			   stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
++			   vec<tree> *result_chain, unsigned int group)
++{
++  unsigned int k = 0;
++  unsigned int t = 0;
++  auto_vec<tree> dr_chain_transposed (num_each);
++  auto_vec<tree> result_chain_transposed (num_each);
++  for (k = 0; k < group; k++)
++    {
++      for (t = k; t < dr_chain.length (); t = t + group)
++	{
++	  dr_chain_transposed.quick_push (dr_chain[t]);
++	}
++      vect_permute_store_chain (dr_chain_transposed, num_each, stmt_info,
++				gsi, &result_chain_transposed);
++      for (t = 0; t < num_each; t++)
++	{
++	  result_chain->quick_push (result_chain_transposed[t]);
++	}
++      dr_chain_transposed.create (num_each);
++      result_chain_transposed.create (num_each);
++    }
++}
++
++/* Function transpose_oprnd_store
++
++    Calcute the transposed results from VEC_OPRNDS.  */
++
++static void
++transpose_oprnd_store (vec<tree>vec_oprnds, vec<tree> *result_chain,
++		 unsigned int vec_num, unsigned int const_nunits,
++		 unsigned int array_num, stmt_vec_info first_stmt_info,
++		 gimple_stmt_iterator *gsi)
++{
++  unsigned int group_for_transform = 0;
++  unsigned int num_each = 0;
++
++  /* Transpose back for vec_oprnds.  */
++  /* vec = {vec1, vec2, ...}  */
++  if (array_num < const_nunits
++      && const_nunits % array_num == 0)
++    {
++      vect_transpose_store_chain (vec_oprnds,
++				  vec_num, array_num,
++				  first_stmt_info,
++				  gsi, result_chain);
++    }
++   /* vec1 = {vec_part1}, vec2 = {vec_part2}, ...  */
++  else if (array_num >= const_nunits
++	   && array_num % const_nunits == 0)
++    {
++      group_for_transform = array_num / const_nunits;
++      num_each = vec_oprnds.length () / group_for_transform;
++      vect_permute_store_chains (vec_oprnds,
++				 num_each, first_stmt_info,
++				 gsi, result_chain,
++				 group_for_transform);
++    }
++}
++
++static dr_vec_info*
++get_dr_info (stmt_vec_info stmt_info)
++{
++  dr_vec_info *dr_info = STMT_VINFO_DR_INFO (stmt_info);
++  if (dr_info->misalignment == DR_MISALIGNMENT_UNINITIALIZED)
++    {
++      SET_DR_MISALIGNMENT (dr_info, DR_MISALIGNMENT_UNKNOWN);
++    }
++  return dr_info;
++}
++
++static unsigned
++dr_align_vect_store (dr_vec_info* cur_first_dr_info,
++		     unsigned HOST_WIDE_INT &align)
++{
++  unsigned misalign = 0;
++  align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info));
++  if (aligned_access_p (cur_first_dr_info))
++    {
++      ;
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      align = dr_alignment (vect_dr_behavior (cur_first_dr_info));
++    }
++  else
++    {
++      misalign = DR_MISALIGNMENT (cur_first_dr_info);
++    }
++  return misalign;
++}
++
++static stmt_vec_info
++add_new_stmt_vect_store (tree vectype, tree dataref_ptr, tree dataref_offset,
++	      tree ref_type, dr_vec_info *cur_first_dr_info, tree vec_oprnd,
++	      gimple_stmt_iterator *gsi, stmt_vec_info stmt_info)
++{
++  /* Data align.  */
++  unsigned HOST_WIDE_INT align;
++  unsigned misalign = dr_align_vect_store (cur_first_dr_info, align);
++
++  if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME)
++    {
++      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
++    }
++
++  /* Get data_ref.  */
++  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
++  tree data_ref = fold_build2 (MEM_REF, vectype, dataref_ptr, offset);
++  if (aligned_access_p (cur_first_dr_info))
++    {
++      ;
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref),
++						align * BITS_PER_UNIT);
++    }
++  else
++    {
++      tree elem_type = TREE_TYPE (vectype);
++      TREE_TYPE (data_ref) = build_aligned_type (TREE_TYPE (data_ref),
++						TYPE_ALIGN (elem_type));
++    }
++  /* Add new stmt.  */
++  vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr));
++  gassign *new_stmt = gimple_build_assign (data_ref, vec_oprnd);
++  stmt_vec_info new_stmt_info
++		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++  return new_stmt_info;
++}
+ 
+ /* Function vectorizable_store.
+ 
+@@ -7281,6 +7686,15 @@ vectorizable_store (stmt_vec_info stmt_i
+ 	  else if (STMT_VINFO_GATHER_SCATTER_P (stmt_info))
+ 	    vect_get_gather_scatter_ops (loop, stmt_info, &gs_info,
+ 					 &dataref_ptr, &vec_offset);
++	  /* If the stmt_info need to be transposed recovery, dataref_ptr
++	     will be caculated later.  */
++	  else if (memory_access_type == VMAT_CONTIGUOUS
++		&& is_a <bb_vec_info> (vinfo)
++		&& STMT_VINFO_GROUPED_ACCESS (stmt_info)
++		&& DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      dataref_ptr = NULL_TREE;
++	    }
+ 	  else
+ 	    dataref_ptr
+ 	      = vect_create_data_ref_ptr (first_stmt_info, aggr_type,
+@@ -7372,6 +7786,74 @@ vectorizable_store (stmt_vec_info stmt_i
+ 	}
+       else
+ 	{
++	  /* group_size: the size of group after merging.
++	     group_size_b: the size of group before merging, and only
++			   group_size_b >= const_nunits is supported.
++	     array_num: the number of arrays.
++	     const_nunits: TYPE_VECTOR_SUBPARTS (vectype).
++	     ncontinues: group_size_b / const_nunits, it means the number of
++			 times an array is stored in memory.  */
++	  if (slp && is_a <bb_vec_info> (vinfo)
++	      && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	      && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      if (dump_enabled_p ())
++		{
++		  dump_printf_loc (MSG_NOTE, vect_location,
++				   "vectorizable_store for slp transpose.\n");
++		}
++	      /* Transpose back for grouped stores.  */
++	      if (!vect_transform_back_slp_grouped_stores
++		  (bb_vinfo, first_stmt_info))
++		{
++		  return false;
++		}
++
++	      result_chain.create (vec_oprnds.length ());
++	      unsigned int const_nunits = nunits.to_constant ();
++	      unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info);
++	      unsigned int array_num = group_size / group_size_b;
++	      transpose_oprnd_store (vec_oprnds, &result_chain, vec_num,
++			const_nunits, array_num, first_stmt_info, gsi);
++
++	      /* For every store group, not for every vec, because merging
++	      and rearranging have changed the data reference access.  */
++	      gcc_assert (group_size_b >= const_nunits);
++	      unsigned int ncontinues = group_size_b / const_nunits;
++
++	      unsigned int k = 0;
++	      for (i = 0; i < array_num; i++)
++		{
++		  stmt_vec_info first_stmt_b;
++		  BB_VINFO_GROUPED_STORES (vinfo).iterate (i, &first_stmt_b);
++		  bool simd_lane_access_p
++			 = STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_b) != 0;
++		  tree ref_type = get_group_alias_ptr_type (first_stmt_b);
++		  dataref_ptr
++		    = vect_create_data_ref_ptr (first_stmt_b, aggr_type,
++			simd_lane_access_p ? loop : NULL, offset, &dummy, gsi,
++			&ptr_incr, simd_lane_access_p, NULL_TREE, bump);
++		  dr_vec_info *cur_first_dr_info = get_dr_info (first_stmt_b);
++		  for (unsigned int t = 0; t < ncontinues; t++)
++		    {
++		      vec_oprnd = result_chain[k];
++		      k++;
++		      if (t > 0)
++			{
++			  /* Bump the vector pointer.  */
++			  dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr,
++						gsi, first_stmt_b, bump);
++			}
++		      new_stmt_info = add_new_stmt_vect_store (vectype,
++			dataref_ptr, dataref_offset, ref_type,
++			cur_first_dr_info, vec_oprnd, gsi, first_stmt_b);
++		    }
++		}
++	      oprnds.release ();
++	      result_chain.release ();
++	      vec_oprnds.release ();
++	      return true;
++	    }
+ 	  new_stmt_info = NULL;
+ 	  if (grouped_store)
+ 	    {
+@@ -7630,6 +8112,439 @@ hoist_defs_of_uses (stmt_vec_info stmt_i
+   return true;
+ }
+ 
++static tree
++calculate_new_type (tree vectype, unsigned int const_nunits,
++		      unsigned int group_size_b, unsigned int &nloads,
++		      unsigned int &ncontinues, tree &lvectype)
++{
++  tree ltype = TREE_TYPE (vectype);
++  /* nloads is the number of ARRAYs in a vector.
++     vectemp = {a[], b[], ...}  */
++  if (group_size_b < const_nunits)
++    {
++      tree ptype;
++      tree vtype
++	= vector_vector_composition_type (vectype,
++					  const_nunits / group_size_b,
++					  &ptype);
++      if (vtype != NULL_TREE)
++	{
++	  nloads = const_nunits / group_size_b;
++	  lvectype = vtype;
++	  ltype = ptype;
++	  ncontinues = 1;
++	}
++    }
++  /* ncontinues is the number of vectors from an ARRAY.
++     vectemp1 = {a[0], a[1], ...}
++     ...
++     vectempm = {a[k], a[k+1], ...}  */
++  else
++    {
++      nloads = 1;
++      ltype = vectype;
++      ncontinues = group_size_b / const_nunits;
++    }
++  ltype = build_aligned_type (ltype, TYPE_ALIGN (TREE_TYPE (vectype)));
++  return ltype;
++}
++
++static vec<unsigned>
++generate_old_load_permutations (slp_tree slp_node, unsigned int group_size)
++{
++  /* Generate the new load_permutations for the new dr_chain.  */
++  unsigned i = 0;
++  unsigned k = 0;
++
++  vec<unsigned> old_load_permutation;
++  old_load_permutation.create (group_size);
++  /* Save the old load_permutation.  */
++  if (SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
++    {
++      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (slp_node), i, k)
++	{
++	  old_load_permutation.safe_push (k);
++	}
++    }
++  else
++    {
++      for (unsigned i = 0; i < group_size; i++)
++	{
++	  old_load_permutation.safe_push (i);
++	}
++    }
++  return old_load_permutation;
++}
++
++static auto_vec<unsigned>
++generate_new_load_permutation_mapping (unsigned slp_node_length,
++		vec<unsigned> &group_el,
++		vec<unsigned> old_load_permutation,
++		unsigned int group_size_b, unsigned &new_group_size)
++{
++  /* group_num_vec: only store the different group number;
++     group_from: store the group number for every stmt in slp_node.  */
++  auto_vec<unsigned> group_num_vec;
++  auto_vec<unsigned> group_from (slp_node_length);
++  /* Caculate which group are the stmts in the slp_node from.  */
++  unsigned i = 0;
++  unsigned k = 0;
++  FOR_EACH_VEC_ELT (old_load_permutation, i, k)
++    {
++      unsigned int t0 = k / group_size_b;
++      if (!group_num_vec.contains (t0))
++	{
++	  group_num_vec.safe_push (t0);
++	}
++      group_from.safe_push (t0);
++    }
++  group_num_vec.qsort (cmp_for_group_num);
++  /* n_groups: the size of groups.  */
++  unsigned int n_groups = group_num_vec.length ();
++  new_group_size = n_groups * group_size_b;
++  for (i = 0; i < n_groups; i++)
++    {
++      group_el.safe_push (group_num_vec[i] * group_size_b);
++    }
++  /* A new mapping from group_ind_vec to group_from.  */
++  auto_vec<unsigned> group_ind_vec (n_groups);
++  for (k = 0; k < n_groups; k++)
++    {
++      group_ind_vec.safe_push (k);
++    }
++  for (i = 0; i < slp_node_length; i++)
++    {
++      for (k = 0; k < n_groups; k++)
++	{
++	  if (group_from[i] == group_num_vec[k])
++	    {
++	      group_from[i] = group_ind_vec[k];
++	      break;
++	    }
++	}
++    }
++  return group_from;
++}
++
++static vec<unsigned>
++generate_new_load_permutation (slp_tree slp_node,
++			       vec<unsigned> old_load_permutation,
++			       bool &this_load_permuted,
++			       vec<unsigned> group_from,
++			       unsigned int group_size_b)
++{
++  unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++  /* Generate the new load permutation from the new mapping.  */
++  vec<unsigned> new_load_permutation;
++  new_load_permutation.create (slp_node_length);
++  unsigned i = 0;
++  unsigned k = 0;
++  FOR_EACH_VEC_ELT (old_load_permutation, i, k)
++    {
++      /* t1 is the new permutation of k in the old permutation.
++	 t1 = base_address + offset:
++	 base_address = group_from[i] * group_size_b;
++	 offset = k % group_size_b.  */
++      unsigned int t1
++	= group_from[i] * group_size_b + k % group_size_b;
++      new_load_permutation.safe_push (t1);
++      if (t1 != k)
++	{
++	  this_load_permuted = true;
++	}
++    }
++  return new_load_permutation;
++}
++
++static bool
++is_slp_perm (bool slp_perm, bool this_load_permuted, poly_uint64 nunits,
++	     unsigned int group_size, stmt_vec_info first_stmt_info)
++{
++  /* Calculate the unrolling factor based on the smallest type.  */
++  poly_uint64 unrolling_factor
++    = exact_div (common_multiple (nunits, group_size), group_size);
++  if (!slp_perm && !this_load_permuted
++      && (known_eq (unrolling_factor, 1U)
++      || (group_size == DR_GROUP_SIZE (first_stmt_info)
++      && DR_GROUP_GAP (first_stmt_info) == 0)))
++    {
++      slp_perm = false;
++    }
++  else
++    {
++      slp_perm = true;
++    }
++  return slp_perm;
++}
++
++static vec<unsigned>
++generate_load_permutation (slp_tree slp_node, unsigned &new_group_size,
++			  unsigned int group_size, unsigned int group_size_b,
++			  bool &this_load_permuted,
++			  vec<unsigned> &group_el)
++{
++  /* Generate the new load_permutations for the new dr_chain.  */
++  vec<unsigned> old_load_permutation
++    = generate_old_load_permutations (slp_node, group_size);
++
++  /* Caculate which group are the stmts in the slp_node from.  */
++  unsigned slp_node_length = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++  auto_vec<unsigned> group_from
++    = generate_new_load_permutation_mapping (slp_node_length, group_el,
++    					    old_load_permutation,
++					    group_size_b, new_group_size);
++
++  /* Generate the new load permutation from the new mapping.
++     If this_load_permuted is true, we need execute slp permutation
++     by using new load permutation.  */
++  vec<unsigned> new_load_permutation = generate_new_load_permutation
++			(slp_node, old_load_permutation, this_load_permuted,
++			 group_from, group_size_b);
++  old_load_permutation.release ();
++  return new_load_permutation;
++}
++
++static unsigned int
++dr_align_vect_load (dr_vec_info* cur_first_dr_info,
++		   unsigned HOST_WIDE_INT &align,
++		   enum dr_alignment_support alignment_support_scheme)
++{
++  unsigned int misalign = 0;
++
++  align = known_alignment (DR_TARGET_ALIGNMENT (cur_first_dr_info));
++  if (alignment_support_scheme == dr_aligned)
++    {
++      gcc_assert (aligned_access_p (cur_first_dr_info));
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      align = dr_alignment (vect_dr_behavior (cur_first_dr_info));
++    }
++  else
++    {
++      misalign = DR_MISALIGNMENT (cur_first_dr_info);
++    }
++  return misalign;
++}
++
++static stmt_vec_info
++add_new_stmt_vect_load (tree vectype, tree dataref_ptr, tree dataref_offset,
++  tree ref_type, tree ltype, dr_vec_info *cur_first_dr_info,
++  gimple_stmt_iterator *gsi, stmt_vec_info stmt_info, gassign *(&new_stmt))
++{
++  /* Data align.  */
++  enum dr_alignment_support alignment_support_scheme
++	= vect_supportable_dr_alignment (cur_first_dr_info, false);
++  unsigned HOST_WIDE_INT align;
++  unsigned int misalign = dr_align_vect_load (cur_first_dr_info, align,
++					      alignment_support_scheme);
++  if (dataref_offset == NULL_TREE && TREE_CODE (dataref_ptr) == SSA_NAME)
++    {
++      set_ptr_info_alignment (get_ptr_info (dataref_ptr), align, misalign);
++    }
++
++  /* Get data_ref.  */
++  tree offset = dataref_offset ? dataref_offset : build_int_cst (ref_type, 0);
++  tree data_ref = fold_build2 (MEM_REF, ltype, dataref_ptr, offset);
++  if (alignment_support_scheme == dr_aligned)
++    {
++      ;
++    }
++  else if (DR_MISALIGNMENT (cur_first_dr_info) == -1)
++    {
++      TREE_TYPE (data_ref)
++	= build_aligned_type (TREE_TYPE (data_ref),
++			      align * BITS_PER_UNIT);
++    }
++  else
++    {
++      tree elem_type = TREE_TYPE (vectype);
++      TREE_TYPE (data_ref)
++	= build_aligned_type (TREE_TYPE (data_ref),
++			      TYPE_ALIGN (elem_type));
++    }
++
++  /* Add new stmt.  */
++  vect_copy_ref_info (data_ref, DR_REF (cur_first_dr_info->dr));
++  new_stmt = gimple_build_assign (make_ssa_name (ltype), data_ref);
++  stmt_vec_info new_stmt_info
++	= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++  return new_stmt_info;
++}
++
++static void
++push_new_stmt_to_dr_chain (bool slp, bool slp_perm, stmt_vec_info new_stmt_info,
++			  vec<tree> &dr_chain, slp_tree slp_node)
++{
++  if (slp)
++    {
++      if (slp_perm)
++	{
++	  dr_chain.quick_push (gimple_assign_lhs (new_stmt_info->stmt));
++	}
++      else
++	{
++	  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
++	}
++    }
++}
++
++static stmt_vec_info
++add_new_stmt_for_nloads_greater_than_one (tree lvectype,
++			tree vectype, vec<constructor_elt, va_gc> *v,
++			stmt_vec_info stmt_info, gimple_stmt_iterator *gsi)
++{
++  tree vec_inv = build_constructor (lvectype, v);
++  tree new_temp = vect_init_vector (stmt_info, vec_inv, lvectype, gsi);
++  vec_info *vinfo = stmt_info->vinfo;
++  stmt_vec_info new_stmt_info = vinfo->lookup_def (new_temp);
++  if (lvectype != vectype)
++    {
++      gassign *new_stmt = gimple_build_assign (make_ssa_name (vectype),
++					       VIEW_CONVERT_EXPR,
++					       build1 (VIEW_CONVERT_EXPR,
++						       vectype, new_temp));
++      new_stmt_info = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++    }
++  return new_stmt_info;
++}
++
++static stmt_vec_info
++get_first_stmt_info_before_transpose (stmt_vec_info first_stmt_info,
++			unsigned int group_el, unsigned int group_size)
++{
++  stmt_vec_info first_stmt_info_before = NULL;
++  stmt_vec_info last_stmt_info = NULL;
++  first_stmt_info_before = first_stmt_info;
++  last_stmt_info = first_stmt_info_before;
++  unsigned int count = 0;
++  while (count < group_el && group_el < group_size)
++    {
++      last_stmt_info = DR_GROUP_NEXT_ELEMENT (last_stmt_info);
++      count++;
++    }
++  first_stmt_info_before = last_stmt_info;
++  return first_stmt_info_before;
++}
++
++
++/* Function new_vect_stmt_for_nloads.
++
++   New a VEC_STMT when nloads Arrays are merged into a vector.
++
++   nloads is the number of ARRAYs in a vector.
++   vectemp = {a[], b[], ...}  */
++
++
++static void
++new_vect_stmt_for_nloads (unsigned int ncopies, unsigned int nloads,
++			 vec<unsigned> group_el, stmt_vec_info stmt_info,
++			 offset_info* offset_info, vectype_info* vectype_info,
++			 vect_memory_access_type memory_access_type, bool slp,
++			 bool slp_perm, vec<tree>& dr_chain, slp_tree slp_node,
++			 gimple_stmt_iterator *gsi)
++{
++  vec<constructor_elt, va_gc> *v = NULL;
++  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
++  unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
++  stmt_vec_info first_stmt_info_b = NULL;
++  stmt_vec_info new_stmt_info = NULL;
++  tree dataref_ptr = NULL_TREE;
++  tree dummy;
++  gimple *ptr_incr = NULL;
++  unsigned int n = 0;
++  for (unsigned int i = 0; i < ncopies; i++)
++    {
++      vec_alloc (v, nloads);
++      for (unsigned int t = 0; t < nloads; t++)
++	{
++	  first_stmt_info_b = get_first_stmt_info_before_transpose
++				(first_stmt_info, group_el[n++], group_size);
++	  dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b);
++	  tree bump = vect_get_data_ptr_increment (cur_first_dr_info,
++			vectype_info->ltype, memory_access_type);
++	  bool simd_lane_access_p
++		= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0;
++
++	  /* Create dataref_ptr which is point to init_address.  */
++	  dataref_ptr = vect_create_data_ref_ptr (first_stmt_info_b,
++		vectype_info->ltype, NULL, offset_info->offset, &dummy, gsi,
++		&ptr_incr, simd_lane_access_p, offset_info->byte_offset, bump);
++
++	  gassign *new_stmt = NULL;
++	  new_stmt_info = add_new_stmt_vect_load (vectype_info->vectype,
++			dataref_ptr, offset_info->dataref_offset,
++			vectype_info->ref_type, vectype_info->ltype,
++			cur_first_dr_info, gsi, first_stmt_info_b, new_stmt);
++
++	  CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, gimple_assign_lhs (new_stmt));
++	}
++	new_stmt_info = add_new_stmt_for_nloads_greater_than_one
++				(vectype_info->lvectype, vectype_info->vectype,
++				 v, first_stmt_info_b, gsi);
++	push_new_stmt_to_dr_chain (slp, slp_perm, new_stmt_info,
++				   dr_chain, slp_node);
++    }
++}
++
++/* Function new_vect_stmt_for_ncontinues.
++
++   New the VEC_STMTs when an Array is divided into several vectors.
++
++   ncontinues is the number of vectors from an ARRAY.
++   vectemp1 = {a[0], a[1], ...}
++   ...
++   vectempm = {a[k], a[k+1], ...}  */
++
++static void
++new_vect_stmt_for_ncontinues (unsigned int ncontinues, vec<unsigned> group_el,
++			 stmt_vec_info stmt_info, offset_info* offset_info,
++			 vectype_info* vectype_info, gimple_stmt_iterator *gsi,
++			 vect_memory_access_type memory_access_type, bool slp,
++			 bool slp_perm, vec<tree>& dr_chain, slp_tree slp_node)
++{
++  stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
++  unsigned int group_size = DR_GROUP_SIZE (first_stmt_info);
++  stmt_vec_info new_stmt_info = NULL;
++  tree dataref_ptr = NULL_TREE;
++  tree dummy;
++  gimple *ptr_incr = NULL;
++  unsigned int n_groups = group_el.length ();
++  for (unsigned int i = 0; i < n_groups; i++)
++    {
++      stmt_vec_info first_stmt_info_b = get_first_stmt_info_before_transpose
++				(first_stmt_info, group_el[i], group_size);
++      dr_vec_info* cur_first_dr_info = get_dr_info (first_stmt_info_b);
++      tree bump = vect_get_data_ptr_increment (cur_first_dr_info,
++			vectype_info->ltype, memory_access_type);
++      bool simd_lane_access_p
++	= STMT_VINFO_SIMD_LANE_ACCESS_P (first_stmt_info_b) != 0;
++      for (unsigned int k = 0; k < ncontinues; k++)
++	{
++	  /* Create dataref_ptr which is point to init_address.  */
++	  if (k == 0)
++	    {
++	      dataref_ptr = vect_create_data_ref_ptr (first_stmt_info_b,
++				vectype_info->ltype, NULL, offset_info->offset,
++				&dummy, gsi, &ptr_incr, simd_lane_access_p,
++				offset_info->byte_offset, bump);
++	    }
++	  else
++	    {
++	      dataref_ptr = bump_vector_ptr (dataref_ptr, ptr_incr,
++					  gsi, first_stmt_info_b, bump);
++	    }
++	  gassign *new_stmt = NULL;
++	  new_stmt_info = add_new_stmt_vect_load (vectype_info->vectype,
++			dataref_ptr, offset_info->dataref_offset,
++			vectype_info->ref_type, vectype_info->ltype,
++			cur_first_dr_info, gsi, first_stmt_info_b, new_stmt);
++	  push_new_stmt_to_dr_chain (slp, slp_perm, new_stmt_info,
++	  			    dr_chain, slp_node);
++	}
++    }
++}
++
+ /* vectorizable_load.
+ 
+    Check if STMT_INFO reads a non scalar data-ref (array/pointer/structure)
+@@ -8456,6 +9371,9 @@ vectorizable_load (stmt_vec_info stmt_in
+   tree vec_mask = NULL_TREE;
+   prev_stmt_info = NULL;
+   poly_uint64 group_elt = 0;
++  unsigned new_group_size = 0;
++  vec<unsigned> new_load_permutation;
++
+   for (j = 0; j < ncopies; j++)
+     {
+       stmt_vec_info new_stmt_info = NULL;
+@@ -8477,6 +9395,14 @@ vectorizable_load (stmt_vec_info stmt_in
+ 	      dataref_ptr = unshare_expr (DR_BASE_ADDRESS (first_dr_info->dr));
+ 	      dataref_offset = build_int_cst (ref_type, 0);
+ 	    }
++	  /* If the stmt_info need to be transposed recovery, dataref_ptr
++	     will be caculated later.  */
++	  else if (slp && is_a <bb_vec_info> (vinfo)
++	      && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	      && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      dataref_ptr = NULL_TREE;
++	    }
+ 	  else if (diff_first_stmt_info)
+ 	    {
+ 	      dataref_ptr
+@@ -8593,6 +9519,51 @@ vectorizable_load (stmt_vec_info stmt_in
+ 	  /* Record that VEC_ARRAY is now dead.  */
+ 	  vect_clobber_variable (stmt_info, gsi, vec_array);
+ 	}
++      else if (slp && is_a <bb_vec_info> (vinfo)
++	      && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	      && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	{
++	  if (dump_enabled_p ())
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "vectorizable_load for slp transpose.\n");
++	    }
++	  unsigned int group_size_b = DR_GROUP_SIZE_TRANS (first_stmt_info);
++	  unsigned int const_nunits = nunits.to_constant ();
++	  unsigned int nloads = const_nunits;
++	  unsigned int ncontinues = group_size_b;
++	  tree lvectype = vectype;
++	  tree ltype = calculate_new_type (vectype, const_nunits,
++				group_size_b, nloads, ncontinues, lvectype);
++	  bool this_load_permuted = false;
++	  auto_vec<unsigned> group_el;
++	  new_load_permutation = generate_load_permutation (slp_node,
++	  			  new_group_size, group_size, group_size_b,
++				  this_load_permuted, group_el);
++	  slp_perm = is_slp_perm (slp_perm, this_load_permuted, nunits,
++  			  group_size, first_stmt_info);
++
++	  /* ncopies: the number of vec that need to be loaded from memmory.  */
++	  unsigned int ncopies = new_group_size / const_nunits;
++	  offset_info offset_info = {offset, byte_offset, dataref_offset};
++	  vectype_info vectype_info = {vectype, ltype, lvectype, ref_type};
++	  if (slp_perm)
++	    {
++	       dr_chain.create (ncopies);
++	    }
++	  if (nloads > 1 && ncontinues == 1)
++	    {
++	      new_vect_stmt_for_nloads (ncopies, nloads, group_el,
++		stmt_info, &offset_info, &vectype_info, memory_access_type,
++		slp, slp_perm, dr_chain, slp_node, gsi);
++	    }
++	  else
++	    {
++	      new_vect_stmt_for_ncontinues (ncontinues, group_el,
++		stmt_info, &offset_info, &vectype_info, gsi, memory_access_type,
++		slp, slp_perm, dr_chain, slp_node);
++	    }
++	}
+       else
+ 	{
+ 	  for (i = 0; i < vec_num; i++)
+@@ -8892,9 +9863,37 @@ vectorizable_load (stmt_vec_info stmt_in
+ 	}
+ 
+       if (slp && !slp_perm)
+-	continue;
++	{
++	  continue;
++	}
+ 
+-      if (slp_perm)
++      /* Using the new load permutation to generate vector permute statements
++	 from a list of loads in DR_CHAIN.  */
++      if (slp && slp_perm && is_a <bb_vec_info> (vinfo)
++	  && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++	  && DR_GROUP_SLP_TRANSPOSE (DR_GROUP_FIRST_ELEMENT (stmt_info)))
++	{
++	  unsigned n_perms;
++	  stmt_vec_info stmt_info_ = SLP_TREE_SCALAR_STMTS (slp_node)[0];
++	  unsigned int old_size = DR_GROUP_SIZE (stmt_info);
++	  DR_GROUP_SIZE (stmt_info_) = new_group_size;
++	  vec<unsigned> old_load_permutation
++			  = SLP_TREE_LOAD_PERMUTATION (slp_node);
++	  SLP_TREE_LOAD_PERMUTATION (slp_node) = new_load_permutation;
++	  bool perm_load_success = vect_transform_slp_perm_load (slp_node,
++						  dr_chain, gsi, vf,
++						  slp_node_instance, false,
++						  &n_perms);
++	  DR_GROUP_SIZE (stmt_info_) = old_size;
++	  SLP_TREE_LOAD_PERMUTATION (slp_node) = old_load_permutation;
++	  new_load_permutation.release ();
++	  if (!perm_load_success)
++	    {
++	      dr_chain.release ();
++	      return false;
++	    }
++	}
++      else if (slp_perm)
+         {
+ 	  unsigned n_perms;
+           if (!vect_transform_slp_perm_load (slp_node, dr_chain, gsi, vf,
diff --git a/struct-reorder-fields.patch b/struct-reorder-fields.patch
new file mode 100644
index 0000000000000000000000000000000000000000..bbb70c9a93f8b3460a7d90946f43f298194ca4dc
--- /dev/null
+++ b/struct-reorder-fields.patch
@@ -0,0 +1,5318 @@
+diff -Nurp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2020-12-29 16:27:25.292000000 +0800
++++ b/gcc/common.opt	2021-01-06 09:59:52.572000000 +0800
+@@ -1793,6 +1793,10 @@ fipa-matrix-reorg
+ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+ 
++fipa-reorder-fields
++Common Report Var(flag_ipa_reorder_fields) Init(0) Optimization
++Perform structure fields reorder optimizations.
++
+ fipa-struct-reorg
+ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
+ Perform structure layout optimizations.
+diff -Nurp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+--- a/gcc/doc/invoke.texi	2020-12-29 16:27:25.452000000 +0800
++++ b/gcc/doc/invoke.texi	2021-01-06 09:59:52.576000000 +0800
+@@ -422,6 +422,7 @@ Objective-C and Objective-C++ Dialects}.
+ -finline-functions  -finline-functions-called-once  -finline-limit=@var{n} @gol
+ -finline-small-functions  -fipa-cp  -fipa-cp-clone @gol
+ -fipa-bit-cp  -fipa-vrp  -fipa-pta  -fipa-profile  -fipa-pure-const @gol
++-fipa-reorder-fields @gol
+ -fipa-struct-reorg @gol
+ -fipa-reference  -fipa-reference-addressable @gol
+ -fipa-stack-alignment  -fipa-icf  -fira-algorithm=@var{algorithm} @gol
+@@ -9330,6 +9331,17 @@ Enabled by default at @option{-O} and hi
+ Reduce stack alignment on call sites if possible.
+ Enabled by default.
+ 
++@item -fipa-reorder-fields
++@opindex fipa-reorder-fields
++Perform structure reorganization optimization, that change C-like structures
++layout in order to better utilize spatial locality.  This transformation is
++affective for programs containing arrays of structures.  Available in two
++compilation modes: profile-based (enabled with @option{-fprofile-generate})
++or static (which uses built-in heuristics).  It works only in whole program
++mode, so it requires @option{-fwhole-program} to be enabled.
++
++With this flag, the program debug info reflects a new structure layout.
++
+ @item -fipa-struct-reorg
+ @opindex fipa-struct-reorg
+ Perform structure reorganization optimization, that change C-like structures
+diff -Nurp a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def
+--- a/gcc/ipa-struct-reorg/escapes.def	2020-12-29 16:27:25.060000000 +0800
++++ b/gcc/ipa-struct-reorg/escapes.def	2021-02-09 10:51:15.272000000 +0800
+@@ -58,5 +58,8 @@ DEF_ESCAPE (escape_ptr_ptr, "Type is use
+ DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]")
+ DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance")
+ DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt")
++DEF_ESCAPE (escape_via_orig_escape, "Type escapes via a original escape type")
++DEF_ESCAPE (escape_instance_field, "Type escapes via a field of instance")
++DEF_ESCAPE (escape_via_empty_no_orig, "Type escapes via empty and no original")
+ 
+ #undef DEF_ESCAPE
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2020-12-29 16:27:25.060000000 +0800
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2021-02-25 10:22:52.464000000 +0800
+@@ -160,25 +160,66 @@ handled_type (tree type)
+   return false;
+ }
+ 
++
++/* Get the number of pointer layers.  */
++
++int
++get_ptr_layers (tree expr)
++{
++  int layers = 0;
++  while (POINTER_TYPE_P (expr) || TREE_CODE (expr) == ARRAY_TYPE)
++    {
++      layers++;
++      expr = TREE_TYPE (expr);
++    }
++  return layers;
++}
++
++/* Comparison pointer layers.  */
++
++bool
++cmp_ptr_layers (tree a, tree b)
++{
++  return get_ptr_layers (a) == get_ptr_layers (b);
++}
++
++/*  Return true if the ssa_name comes from the void* parameter.  */
++
++bool
++is_from_void_ptr_parm (tree ssa_name)
++{
++  gcc_assert (TREE_CODE (ssa_name) == SSA_NAME);
++  tree var = SSA_NAME_VAR (ssa_name);
++  return (var && TREE_CODE (var) == PARM_DECL
++	  && VOID_POINTER_P (TREE_TYPE (ssa_name)));
++}
++
+ enum srmode
+ {
+   NORMAL = 0,
+-  COMPLETE_STRUCT_RELAYOUT
++  COMPLETE_STRUCT_RELAYOUT,
++  STRUCT_REORDER_FIELDS
+ };
+ 
+ static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
++bool isptrptr (tree type);
++
++srmode current_mode;
+ 
+ } // anon namespace
+ 
+ namespace struct_reorg {
+ 
++hash_map <tree, auto_vec <tree> > fields_to_finish;
++
+ /* Constructor of srfunction. */
+ 
+ srfunction::srfunction (cgraph_node *n)
+   : node (n),
+     old (NULL),
+     newnode (NULL),
+-    newf (NULL)
++    newf (NULL),
++    is_safe_func (false)
+ {
+ }
+ 
+@@ -330,12 +371,13 @@ srtype::add_field_site (srfield *field)
+ 
+ /* Constructor of DECL. */
+ 
+-srdecl::srdecl (srtype *tp, tree decl, int argnum)
++srdecl::srdecl (srtype *tp, tree decl, int argnum, tree orig_type)
+   : type (tp),
+     decl (decl),
+     func (NULL_TREE),
+     argumentnum (argnum),
+-    visited (false)
++    visited (false),
++    orig_type (orig_type)
+ {
+   if (TREE_CODE (decl) == SSA_NAME)
+     func = current_function_decl;
+@@ -359,17 +401,25 @@ srfunction::find_decl (tree decl)
+ /* Record DECL of the TYPE with argument num ARG. */
+ 
+ srdecl *
+-srfunction::record_decl (srtype *type, tree decl, int arg)
++srfunction::record_decl (srtype *type, tree decl, int arg, tree orig_type)
+ {
+-  // Search for the decl to see if it is already there.
++  /* Search for the decl to see if it is already there.  */
+   srdecl *decl1 = find_decl (decl);
+ 
+   if (decl1)
+-    return decl1;
++    {
++      /* Added the orig_type information.  */
++      if (!decl1->orig_type && orig_type && isptrptr (orig_type))
++	{
++	  decl1->orig_type = orig_type;
++	}
++      return decl1;
++    }
+ 
+   gcc_assert (type);
+ 
+-  decl1 = new srdecl (type, decl, arg);
++  orig_type = isptrptr (TREE_TYPE (decl)) ? TREE_TYPE (decl) : orig_type;
++  decl1 = new srdecl (type, decl, arg, isptrptr (orig_type)? orig_type : NULL);
+   decls.safe_push(decl1);
+   return decl1;
+ }
+@@ -433,31 +483,27 @@ srtype::dump (FILE *f)
+   print_generic_expr (f, type);
+   fprintf (f, "(%d) { ", TYPE_UID (type));
+   if (escapes != does_not_escape)
+-    fprintf (f, " escapes = \"%s\"\n", escape_reason ());
+-  fprintf (f, " fields = { ");
++    {
++      fprintf (f, "escapes = \"%s\"", escape_reason ());
++    }
++  fprintf (f, "\nfields = {\n");
+   FOR_EACH_VEC_ELT (fields, i, field)
+     {
+-      if (i == 0)
+-	fprintf (f, "\n  ");
+-      else
+-	fprintf (f, "\n,  ");
+       field->dump (f);
+     }
+-  fprintf (f, " }\n ");
+-  fprintf (f, "\n accesses = {");
++  fprintf (f, "}\n ");
++
++  fprintf (f, "\naccesses = {\n");
+   FOR_EACH_VEC_ELT (accesses, i, access)
+     {
+-      fprintf (f, "\n");
+       access->dump (f);
+     }
+-  fprintf (f, " }\n ");
+-  fprintf (f, "\n functions = {");
++  fprintf (f, "}\n ");
++
++  fprintf (f, "\nfunctions = {\n");
+   FOR_EACH_VEC_ELT (functions, i, fn)
+-    {
+-      fprintf (f, "  \n");
+-      fn->simple_dump (f);
+-    }
+-  fprintf (f, "\n }\n");
++    fn->simple_dump (f);
++  fprintf (f, "}\n");
+   fprintf (f, "}\n");
+ }
+ 
+@@ -467,6 +513,10 @@ void
+ srtype::simple_dump (FILE *f)
+ {
+   print_generic_expr (f, type);
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      fprintf (f, "(%d)", TYPE_UID (type));
++    }
+ }
+ 
+ /* Analyze the type and decide what to be done with it. */
+@@ -504,6 +554,12 @@ srfield::create_new_fields (tree newtype
+ 			    tree newfields[max_split],
+ 			    tree newlast[max_split])
+ {
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      create_new_reorder_fields (newtype, newfields, newlast);
++      return;
++    }
++
+   tree nt[max_split];
+ 
+   for (unsigned i = 0; i < max_split; i++)
+@@ -512,7 +568,7 @@ srfield::create_new_fields (tree newtype
+   if (type == NULL)
+     nt[0] = fieldtype;
+   else
+-    memcpy (nt, type->newtype, sizeof(type->newtype));
++    memcpy (nt, type->newtype, sizeof (type->newtype));
+ 
+   for (unsigned i = 0; i < max_split && nt[i] != NULL; i++)
+     {
+@@ -552,6 +608,117 @@ srfield::create_new_fields (tree newtype
+ 
+ }
+ 
++/* Reorder fields.  */
++
++void
++srfield::reorder_fields (tree newfields[max_split], tree newlast[max_split],
++			 tree &field)
++{
++  /* Reorder fields in descending.
++     newfields: always stores the first member of the chain
++		and with the largest size.
++     field: indicates the node to be inserted.  */
++  if (newfields[clusternum] == NULL)
++    {
++      newfields[clusternum] = field;
++      newlast[clusternum] = field;
++    }
++  else
++    {
++      tree tmp = newfields[clusternum];
++      if (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field)))
++	  > tree_to_uhwi (TYPE_SIZE (TREE_TYPE (tmp))))
++	{
++	  DECL_CHAIN (field) = tmp;
++	  newfields[clusternum] = field;
++	}
++      else
++	{
++	  while (DECL_CHAIN (tmp)
++		 && (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (field)))
++		     <= tree_to_uhwi (
++				TYPE_SIZE (TREE_TYPE (DECL_CHAIN (tmp))))))
++	    {
++	      tmp = DECL_CHAIN (tmp);
++	    }
++
++	  /* now tmp size > field size
++	     insert field: tmp -> xx ==> tmp -> field -> xx.  */
++	  DECL_CHAIN (field) = DECL_CHAIN (tmp); // field -> xx
++	  DECL_CHAIN (tmp) = field; // tmp -> field
++	}
++    }
++}
++
++/* Create the new reorder fields for this field.
++   newtype[max_split]: srtype's member variable,
++   newfields[max_split]: created by create_new_type func,
++   newlast[max_split]: created by create_new_type func.  */
++
++void
++srfield::create_new_reorder_fields (tree newtype[max_split],
++				    tree newfields[max_split],
++				    tree newlast[max_split])
++{
++  /* newtype, corresponding to newtype[max_split] in srtype.  */
++  tree nt = NULL_TREE;
++  if (type == NULL)
++    {
++      /* Common var.  */
++      nt = fieldtype;
++    }
++  else
++    {
++      /* RECORD_TYPE var.  */
++      if (type->has_escaped ())
++	{
++	  nt = type->type;
++	}
++      else
++	{
++	  nt = type->newtype[0];
++	}
++    }
++  tree field = make_node (FIELD_DECL);
++
++  /* Used for recursive types.
++     fields_to_finish: hase_map in the format of "type: {fieldA, fieldB}",
++     key : indicates the original type,
++     vaule: filed that need to be updated to newtype.  */
++  if (nt == NULL)
++    {
++      nt = make_node (RECORD_TYPE);
++      auto_vec <tree> &fields
++	= fields_to_finish.get_or_insert (inner_type (type->type));
++      fields.safe_push (field);
++    }
++
++  DECL_NAME (field) = DECL_NAME (fielddecl);
++  if (type == NULL)
++    {
++      /* Common members do not need to reconstruct.
++	 Otherwise, int* -> int** or void* -> void**.  */
++      TREE_TYPE (field) = nt;
++    }
++  else
++    {
++      TREE_TYPE (field)
++	      = reconstruct_complex_type (TREE_TYPE (fielddecl), nt);
++    }
++  DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl);
++  SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++  DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl);
++  TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl);
++  DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl);
++  TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl);
++  DECL_CONTEXT (field) = newtype[clusternum];
++
++  reorder_fields (newfields, newlast, field);
++
++  /* srfield member variable, which stores the new field decl.  */
++  newfield[0] = field;
++}
++
+ /* Create the new TYPE corresponding to THIS type. */
+ 
+ bool
+@@ -587,7 +754,8 @@ srtype::create_new_type (void)
+   /* If the fields' types did have a change or
+      we are not splitting the struct into two clusters,
+      then just return false and don't change the type. */
+-  if (!createnewtype && maxclusters == 0)
++  if (!createnewtype && maxclusters == 0
++      && current_mode != STRUCT_REORDER_FIELDS)
+     {
+       newtype[0] = type;
+       return false;
+@@ -596,6 +764,7 @@ srtype::create_new_type (void)
+   /* Should have at most max_split clusters.  */
+   gcc_assert (maxclusters < max_split);
+ 
++  /* Record the first member of the field chain.  */
+   tree newfields[max_split];
+   tree newlast[max_split];
+ 
+@@ -622,7 +791,8 @@ srtype::create_new_type (void)
+       sprintf(id, "%d", i);
+       if (tname) 
+ 	{
+-          name = concat (tname, ".reorg.", id, NULL);
++	  name = concat (tname, current_mode == STRUCT_REORDER_FIELDS
++			 ? ".reorder." : ".reorg.", id, NULL);
+           TYPE_NAME (newtype[i]) = get_identifier (name);
+           free (name);
+         }
+@@ -634,7 +804,6 @@ srtype::create_new_type (void)
+       f->create_new_fields (newtype, newfields, newlast);
+     }
+ 
+-
+   /* No reason to warn about these structs since the warning would
+      have happened already.  */
+   int save_warn_padded = warn_padded;
+@@ -654,6 +823,7 @@ srtype::create_new_type (void)
+       for (unsigned i = 0; i < maxclusters; i++)
+ 	{
+ 	  print_generic_expr (dump_file, newtype[i]);
++	  fprintf (dump_file, "(%d)", TYPE_UID (newtype[i]));
+ 	  fprintf (dump_file, "\n");
+ 	}
+     }
+@@ -712,7 +882,12 @@ srfunction::create_new_decls (void)
+ 	  tree newinner[max_split];
+ 	  memset (newinner, 0, sizeof(newinner));
+ 	  for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
+-	    newtype1[j] = reconstruct_complex_type (TREE_TYPE (decls[i]->decl), type->newtype[j]);
++	    {
++	      newtype1[j] = reconstruct_complex_type (
++		      isptrptr (decls[i]->orig_type) ? decls[i]->orig_type
++		      : TREE_TYPE (decls[i]->decl),
++		      type->newtype[j]);
++	    }
+ 	  if (inner)
+ 	    {
+ 	      srdecl *in = find_decl (inner);
+@@ -757,7 +932,8 @@ srfunction::create_new_decls (void)
+ 	      sprintf(id, "%d", j);
+ 	      if (tname)
+ 	        {
+-	          name = concat (tname, ".reorg.", id, NULL);
++		  name = concat (tname, current_mode == STRUCT_REORDER_FIELDS
++					? ".reorder." : ".reorg.", id, NULL);
+ 		  new_name = get_identifier (name);
+ 		  free (name);
+ 		}
+@@ -781,7 +957,6 @@ srfunction::create_new_decls (void)
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	{
+ 	  fprintf (dump_file, "Created New decls for decl:\n");
+-	  fprintf (dump_file, "\n");
+ 	  decls[i]->dump (dump_file);
+ 	  fprintf (dump_file, "\n");
+ 	  for (unsigned j = 0; j < max_split && decls[i]->newdecl[j]; j++)
+@@ -808,7 +983,7 @@ srfield::dump (FILE *f)
+   fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset);
+   fprintf (f, ", type = ");
+   print_generic_expr (f, fieldtype);
+-  fprintf (f, "\n}\n");
++  fprintf (f, "}\n");
+ }
+ 
+ 
+@@ -843,7 +1018,7 @@ sraccess::dump (FILE *f)
+   fprintf (f, " in function: %s/%d", node->name (), node->order);
+   fprintf (f, ", stmt:\n");
+   print_gimple_stmt (f, stmt, 0);
+-  fprintf (f, "\n }\n");
++  fprintf (f, "}\n");
+   
+ }
+ 
+@@ -970,8 +1145,7 @@ public:
+   // Constructors
+   ipa_struct_reorg(void)
+     : current_function (NULL),
+-      done_recording (false),
+-      current_mode (NORMAL)
++      done_recording (false)
+   {
+   }
+ 
+@@ -983,11 +1157,12 @@ public:
+   auto_vec_del<srfunction> functions;
+   srglobal globals;
+   srfunction *current_function;
++  hash_set <cgraph_node *> safe_functions;
+ 
+   bool done_recording;
+-  srmode current_mode;
+ 
+   void dump_types (FILE *f);
++  void dump_newtypes (FILE *f);
+   void dump_types_escaped (FILE *f);
+   void dump_functions (FILE *f);
+   void record_accesses (void);
+@@ -995,6 +1170,8 @@ public:
+   bool walk_field_for_cycles (srtype*);
+   void prune_escaped_types (void);
+   void propagate_escape (void);
++  void propagate_escape_via_original (void);
++  void propagate_escape_via_empty_with_no_original (void);
+   void analyze_types (void);
+   void clear_visited (void);
+   bool create_new_types (void);
+@@ -1005,8 +1182,11 @@ public:
+   void create_new_args (cgraph_node *new_node);
+   unsigned rewrite_functions (void);
+   srdecl *record_var (tree decl, escape_type escapes = does_not_escape, int arg = -1);
++  void record_safe_func_with_void_ptr_parm (void);
+   srfunction *record_function (cgraph_node *node);
+   srfunction *find_function (cgraph_node *node);
++  void record_field_type (tree field, srtype *base_srtype);
++  void record_struct_field_types (tree base_type, srtype *base_srtype);
+   srtype *record_type (tree type);
+   void process_union (tree type);
+   srtype *find_type (tree type);
+@@ -1017,7 +1197,7 @@ public:
+   void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt);
+   void mark_expr_escape(tree, escape_type, gimple *stmt);
+   bool handled_allocation_stmt (gimple *stmt);
+-  tree allocate_size (srtype *t, gimple *stmt);
++  tree allocate_size (srtype *t, srdecl *decl, gimple *stmt);
+ 
+   void mark_decls_in_as_not_needed (tree fn);
+ 
+@@ -1029,15 +1209,22 @@ public:
+   bool rewrite_phi (gphi *);
+   bool rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_missing_decl = false);
+   bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], tree newrhs[max_split]);
+-  bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create = false, bool can_escape = false);
++  bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type,
++		       srfield *&field, bool &realpart, bool &imagpart,
++		       bool &address, bool& escape_from_base,
++		       bool should_create = false, bool can_escape = false);
+   bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t);
+ 
+   void check_alloc_num (gimple *stmt, srtype *type);
++  void check_definition_assign (srdecl *decl, vec<srdecl*> &worklist);
++  void check_definition_call (srdecl *decl, vec<srdecl*> &worklist);
+   void check_definition (srdecl *decl, vec<srdecl*>&);
+   void check_uses (srdecl *decl, vec<srdecl*>&);
+   void check_use (srdecl *decl, gimple *stmt, vec<srdecl*>&);
+-  void check_type_and_push (tree newdecl, srtype *type, vec<srdecl*> &worklist, gimple *stmt);
++  void check_type_and_push (tree newdecl, srdecl *decl,
++  			    vec<srdecl*> &worklist, gimple *stmt);
+   void check_other_side (srdecl *decl, tree other, gimple *stmt, vec<srdecl*> &worklist);
++  void check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt);
+ 
+   void find_vars (gimple *stmt);
+   void find_var (tree expr, gimple *stmt);
+@@ -1692,9 +1879,45 @@ ipa_struct_reorg::dump_types (FILE *f)
+   srtype *type;
+   FOR_EACH_VEC_ELT (types, i, type)
+     {
++      fprintf (f, "======= the %dth type: ======\n", i);
+       type->dump(f);
++      fprintf (f, "\n");
++    }
++}
++
++/* Dump all of the created newtypes to file F.  */
++
++void
++ipa_struct_reorg::dump_newtypes (FILE *f)
++{
++    unsigned i = 0;
++    srtype *type = NULL;
++    FOR_EACH_VEC_ELT (types, i, type)
++    {
++	if (type->has_escaped ())
++	  {
++	    continue;
++	  }
++	fprintf (f, "======= the %dth newtype: ======\n", i);
++	fprintf (f, "type : ");
++	print_generic_expr (f, type->newtype[0]);
++	fprintf (f, "(%d) ", TYPE_UID (type->newtype[0]));
++	fprintf (f, "{ ");
++	fprintf (f, "\nfields = {\n");
++
++	for (tree field = TYPE_FIELDS (TYPE_MAIN_VARIANT (type->newtype[0]));
++				       field; field = DECL_CHAIN (field))
++	  {
++	    fprintf (f, "field (%d) ", DECL_UID (field));
++	    fprintf (f, "{");
++	    fprintf (f, "type = ");
++	    print_generic_expr (f, TREE_TYPE (field));
++	    fprintf (f, "}\n");
++	  }
++	fprintf (f, "}\n ");
++
++	fprintf (f, "\n");
+     }
+-  fprintf (f, "\n");
+ }
+ 
+ /* Dump all of the recorded types to file F. */
+@@ -1724,7 +1947,6 @@ ipa_struct_reorg::dump_functions (FILE *
+   unsigned i;
+   srfunction *fn;
+ 
+-  fprintf (f, "\n\n");
+   globals.dump (f);
+   fprintf (f, "\n\n");
+   FOR_EACH_VEC_ELT (functions, i, fn)
+@@ -1790,6 +2012,10 @@ bool isarraytype (tree type)
+ 
+ bool isptrptr (tree type)
+ {
++  if (type == NULL)
++    {
++      return false;
++    }
+   bool firstptr = false;
+   while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
+     {
+@@ -1804,6 +2030,645 @@ bool isptrptr (tree type)
+   return false;
+ }
+ 
++/* Adding node to map and stack.  */
++
++bool
++add_node (tree node, int layers, hash_map <tree, int> &map,
++	  auto_vec <tree> &stack)
++{
++  if (TREE_CODE (node) != SSA_NAME)
++    {
++      return false;
++    }
++  if (map.get (node) == NULL)
++    {
++       if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "	");
++	  fprintf (dump_file, "add node: \t\t");
++	  print_generic_expr (dump_file, node);
++	  fprintf (dump_file, ",\t\tptr layers: %d: \n", layers);
++	}
++      map.put (node, layers);
++      stack.safe_push (node);
++    }
++  else if (*map.get (node) != layers)
++    {
++      return false;
++    }
++  return true;
++}
++
++/* Check the number of pointer layers of the gimple phi in definition.  */
++
++bool
++check_def_phi (tree def_node, hash_map <tree, int> &ptr_layers)
++{
++  bool res = true;
++  gimple *def_stmt = SSA_NAME_DEF_STMT (def_node);
++  for (unsigned j = 0; j < gimple_phi_num_args (def_stmt); j++)
++  {
++    tree phi_node = gimple_phi_arg_def (def_stmt, j);
++    if (integer_zerop (phi_node))
++      {
++	continue;
++      }
++    if (ptr_layers.get (phi_node) == NULL)
++      {
++	return false;
++      }
++    res &= *ptr_layers.get (def_node) == *ptr_layers.get (phi_node);
++  }
++  return res;
++}
++
++/* Check the number of pointer layers of the gimple assign in definition.  */
++
++bool
++check_def_assign (tree def_node, hash_map <tree, int> &ptr_layers)
++{
++  bool res = true;
++  gimple *def_stmt = SSA_NAME_DEF_STMT (def_node);
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (def_stmt);
++  tree_code rhs_code = gimple_assign_rhs_code (def_stmt);
++  tree rhs1 = gimple_assign_rhs1 (def_stmt);
++  tree rhs1_base = TREE_CODE (rhs1) == MEM_REF ? TREE_OPERAND (rhs1, 0) : rhs1;
++  if (ptr_layers.get (rhs1_base) == NULL)
++    {
++      return false;
++    }
++  if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS)
++    {
++      if (TREE_CODE (rhs1) == SSA_NAME)
++	{
++	  res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1);
++	}
++      else if (TREE_CODE (rhs1) == MEM_REF)
++	{
++	  res = *ptr_layers.get (def_node)
++		== *ptr_layers.get (TREE_OPERAND (rhs1, 0));
++	}
++      else
++	{
++	  return false;
++	}
++    }
++  else if (rhs_class == GIMPLE_BINARY_RHS)
++    {
++      if (rhs_code == POINTER_PLUS_EXPR)
++	{
++	  res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1);
++	}
++      else if (rhs_code == BIT_AND_EXPR)
++	{
++	  res = *ptr_layers.get (def_node) == *ptr_layers.get (rhs1);
++	}
++      else
++	{
++	  return false;
++	}
++    }
++  else
++    {
++      return false;
++    }
++  return res;
++}
++
++/* Check node definition.  */
++
++bool
++check_node_def (hash_map <tree, int> &ptr_layers)
++{
++  bool res = true;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\n======== check node definition ========\n");
++    }
++  for (unsigned i = 1; i < num_ssa_names; ++i)
++    {
++      tree name = ssa_name (i);
++      if (name && ptr_layers.get (name) != NULL)
++	{
++	  gimple *def_stmt = SSA_NAME_DEF_STMT (name);
++	  if (dump_file && (dump_flags & TDF_DETAILS)
++	      && gimple_code (def_stmt) != GIMPLE_DEBUG)
++	    {
++	      print_gimple_stmt (dump_file, def_stmt, 0);
++	    }
++
++	  if (gimple_code (def_stmt) == GIMPLE_PHI)
++	    {
++	      res = check_def_phi (name, ptr_layers);
++	    }
++	  else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
++	    {
++	      res = check_def_assign (name, ptr_layers);
++	    }
++	  else if (gimple_code (def_stmt) == GIMPLE_NOP)
++	    {
++	      continue;
++	    }
++	  else
++	    {
++	      return false;
++	    }
++	}
++    }
++  return res;
++}
++
++/* Check pointer usage.  */
++
++bool
++check_record_ptr_usage (gimple *use_stmt, tree &current_node,
++			hash_map <tree, int> &ptr_layers,
++			auto_vec <tree> &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++  tree lhs = gimple_assign_lhs (use_stmt);
++  if (rhs_class != GIMPLE_SINGLE_RHS
++      || (TREE_CODE (rhs1) != COMPONENT_REF && TREE_CODE (rhs1) != SSA_NAME)
++      || (TREE_CODE (lhs) != MEM_REF && TREE_CODE (lhs) != SSA_NAME))
++    {
++      return false;
++    }
++
++  bool res = true;
++  /* MEM[(long int *)a_1] = _57; (record).
++     If lhs is ssa_name, lhs cannot be the current node.
++     _283 = _282->flow; (No record).  */
++  if (TREE_CODE (rhs1) == SSA_NAME)
++    {
++      tree tmp = (rhs1 != current_node) ? rhs1 : lhs;
++      if (TREE_CODE (tmp) == MEM_REF)
++	{
++	  res = add_node (TREE_OPERAND (tmp, 0),
++			  *ptr_layers.get (current_node) + 1,
++			  ptr_layers, ssa_name_stack);
++	}
++      else
++	{
++	  res = add_node (tmp, *ptr_layers.get (current_node),
++			  ptr_layers, ssa_name_stack);
++	}
++    }
++  else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == COMPONENT_REF)
++    {
++      res = !POINTER_TYPE_P (TREE_TYPE (rhs1));
++    }
++  else
++    {
++      res = false;
++    }
++  return res;
++}
++
++/* Check and record a single node.  */
++
++bool
++check_record_single_node (gimple *use_stmt, tree &current_node,
++			  hash_map <tree, int> &ptr_layers,
++			  auto_vec <tree> &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++  tree lhs = gimple_assign_lhs (use_stmt);
++  gcc_assert (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS);
++
++  if ((TREE_CODE (rhs1) != SSA_NAME && TREE_CODE (rhs1) != MEM_REF)
++      || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (lhs) != MEM_REF))
++    {
++      return false;
++    }
++
++  bool res = true;
++  if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == MEM_REF)
++    {
++      /* _257 = MEM[(struct arc_t * *)_17].  */
++      res = add_node (lhs, *ptr_layers.get (current_node) - 1,
++		      ptr_layers, ssa_name_stack);
++    }
++  else if (TREE_CODE (lhs) == MEM_REF && TREE_CODE (rhs1) == SSA_NAME)
++    {
++      /* MEM[(long int *)a_1] = _57.  */
++      if (rhs1 == current_node)
++	{
++	  res = add_node (TREE_OPERAND (lhs, 0),
++			  *ptr_layers.get (current_node) + 1,
++			  ptr_layers, ssa_name_stack);
++	}
++      else
++	{
++	  res = add_node (rhs1, *ptr_layers.get (current_node) - 1,
++			  ptr_layers, ssa_name_stack);
++	}
++    }
++  else if (TREE_CODE (lhs) == SSA_NAME && TREE_CODE (rhs1) == SSA_NAME)
++    {
++      res = add_node (lhs, *ptr_layers.get (current_node),
++		      ptr_layers, ssa_name_stack);
++    }
++  else
++    {
++      res = false;
++    }
++
++  return res;
++}
++
++/* Check and record multiple nodes.  */
++
++bool
++check_record_mult_node (gimple *use_stmt, tree &current_node,
++			hash_map <tree, int> &ptr_layers,
++			auto_vec <tree> &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  tree_code rhs_code = gimple_assign_rhs_code (use_stmt);
++  tree rhs1 = gimple_assign_rhs1 (use_stmt);
++  tree lhs = gimple_assign_lhs (use_stmt);
++  tree rhs2 = gimple_assign_rhs2 (use_stmt);
++  gcc_assert (rhs_class == GIMPLE_BINARY_RHS);
++
++  if ((rhs_code != POINTER_PLUS_EXPR && rhs_code != POINTER_DIFF_EXPR
++       && rhs_code != BIT_AND_EXPR)
++      || (TREE_CODE (lhs) != SSA_NAME && TREE_CODE (rhs1) != SSA_NAME))
++    {
++      return false;
++    }
++
++  bool res = true;
++  if (rhs_code == POINTER_PLUS_EXPR)
++    {
++      res = add_node (lhs == current_node ? rhs1 : lhs,
++		      *ptr_layers.get (current_node),
++		      ptr_layers, ssa_name_stack);
++    }
++  else if (rhs_code == POINTER_DIFF_EXPR)
++    {
++      res = add_node (rhs1 != current_node ? rhs1 : rhs2,
++		      *ptr_layers.get (current_node),
++		      ptr_layers, ssa_name_stack);
++    }
++  else if (rhs_code == BIT_AND_EXPR)
++    {
++      if (TREE_CODE (rhs2) != INTEGER_CST)
++	{
++	  return false;
++	}
++      res = add_node (lhs == current_node ? rhs1 : lhs,
++			*ptr_layers.get (current_node),
++			ptr_layers, ssa_name_stack);
++    }
++  return res;
++}
++
++/* Check whether gimple assign is correctly used and record node.  */
++
++bool
++check_record_assign (tree &current_node, gimple *use_stmt,
++		     hash_map <tree, int> &ptr_layers,
++		     auto_vec <tree> &ssa_name_stack)
++{
++  gimple_rhs_class rhs_class = gimple_assign_rhs_class (use_stmt);
++  if (*ptr_layers.get (current_node) == 1)
++    {
++      return check_record_ptr_usage (use_stmt, current_node,
++				     ptr_layers, ssa_name_stack);
++    }
++  else if (*ptr_layers.get (current_node) > 1)
++    {
++      if (rhs_class != GIMPLE_BINARY_RHS
++	  && rhs_class != GIMPLE_UNARY_RHS
++	  && rhs_class != GIMPLE_SINGLE_RHS)
++	{
++	  return false;
++	}
++
++      if (rhs_class == GIMPLE_SINGLE_RHS || rhs_class == GIMPLE_UNARY_RHS)
++	{
++	  return check_record_single_node (use_stmt, current_node,
++					   ptr_layers, ssa_name_stack);
++	}
++      else if (rhs_class == GIMPLE_BINARY_RHS)
++	{
++	  return check_record_mult_node (use_stmt, current_node,
++					 ptr_layers, ssa_name_stack);
++	}
++    }
++  else
++    return false;
++
++  return true;
++}
++
++/* Check whether gimple phi is correctly used and record node.  */
++
++bool
++check_record_phi (tree &current_node, gimple *use_stmt,
++		  hash_map <tree, int> &ptr_layers,
++		  auto_vec <tree> &ssa_name_stack)
++{
++  bool res = true;
++  res &= add_node (gimple_phi_result (use_stmt), *ptr_layers.get (current_node),
++		   ptr_layers, ssa_name_stack);
++
++  for (unsigned i = 0; i < gimple_phi_num_args (use_stmt); i++)
++    {
++      if (integer_zerop (gimple_phi_arg_def (use_stmt, i)))
++	{
++	  continue;
++	}
++      res &= add_node (gimple_phi_arg_def (use_stmt, i),
++		       *ptr_layers.get (current_node),
++		       ptr_layers, ssa_name_stack);
++    }
++  return res;
++}
++
++/* Check the use of callee.  */
++
++bool
++check_callee (cgraph_node *node, gimple *stmt,
++	      hash_map <tree, int> &ptr_layers, int input_layers)
++{
++  /* caller main ()
++	    { spec_qsort.constprop (_649, _651); }
++     def    spec_qsort.constprop (void * a, size_t n)
++	    { spec_qsort.constprop (a_1, _139); }  */
++  /* In safe functions, only call itself is allowed.  */
++  if (node->get_edge (stmt)->callee != node)
++    {
++      return false;
++    }
++  tree input_node = gimple_call_arg (stmt, 0);
++  if (ptr_layers.get (input_node) == NULL
++      || *ptr_layers.get (input_node) != input_layers)
++    {
++      return false;
++    }
++  if (SSA_NAME_VAR (input_node) != DECL_ARGUMENTS (node->decl))
++    {
++      return false;
++    }
++
++  for (unsigned i = 1; i < gimple_call_num_args (stmt); i++)
++    {
++      if (ptr_layers.get (gimple_call_arg (stmt, i)) != NULL)
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Check the usage of input nodes and related nodes.  */
++
++bool
++check_node_use (cgraph_node *node, tree current_node,
++		hash_map <tree, int> &ptr_layers,
++		auto_vec <tree> &ssa_name_stack,
++		int input_layers)
++{
++  imm_use_iterator imm_iter;
++  gimple *use_stmt = NULL;
++  bool res = true;
++  /* Use FOR_EACH_IMM_USE_STMT as an indirect edge
++     to search for possible related nodes and push to stack.  */
++  FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, current_node)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS)
++	  && gimple_code (use_stmt) != GIMPLE_DEBUG)
++	{
++	  fprintf (dump_file, "    ");
++	  print_gimple_stmt (dump_file, use_stmt, 0);
++	}
++      /* For other types of gimple, do not record the node.  */
++      if (res)
++	{
++	  if (gimple_code (use_stmt) == GIMPLE_PHI)
++	    {
++	      res = check_record_phi (current_node, use_stmt,
++				      ptr_layers, ssa_name_stack);
++	    }
++	  else if (gimple_code (use_stmt) == GIMPLE_ASSIGN)
++	    {
++	      res = check_record_assign (current_node, use_stmt,
++					 ptr_layers, ssa_name_stack);
++	    }
++	  else if (gimple_code (use_stmt) == GIMPLE_CALL)
++	    {
++	      res = check_callee (node, use_stmt, ptr_layers, input_layers);
++	    }
++	  else if (gimple_code (use_stmt) == GIMPLE_RETURN)
++	    {
++	      res = false;
++	    }
++	}
++    }
++  return res;
++}
++
++/* Preparing the First Node for DFS.  */
++
++bool
++set_init_node (cgraph_node *node, cgraph_edge *caller,
++		hash_map <tree, int> &ptr_layers,
++		auto_vec <tree> &ssa_name_stack, int &input_layers)
++{
++   /* set input_layer
++      caller spec_qsort.constprop (_649, _651)
++				     |-- Obtains the actual ptr layer
++					 from the input node.  */
++  if (caller->call_stmt == NULL
++      || gimple_call_num_args (caller->call_stmt) == 0)
++    {
++      return false;
++    }
++  tree input = gimple_call_arg (caller->call_stmt, 0);
++  if (!(POINTER_TYPE_P (TREE_TYPE (input))
++      || TREE_CODE (TREE_TYPE (input)) == ARRAY_TYPE)
++      || !handled_type (TREE_TYPE (input)))
++    {
++      return false;
++    }
++  input_layers = get_ptr_layers (TREE_TYPE (input));
++
++  /* set initial node
++     def spec_qsort.constprop (void * a, size_t n)
++				      |-- Find the initial ssa_name
++					  from the parameter node.  */
++  tree parm = DECL_ARGUMENTS (node->decl);
++  for (unsigned j = 1; j < num_ssa_names; ++j)
++    {
++      tree name = ssa_name (j);
++      if (!name || has_zero_uses (name) || virtual_operand_p (name))
++	{
++	  continue;
++	}
++      if (SSA_NAME_VAR (name) == parm
++	  && gimple_code (SSA_NAME_DEF_STMT (name)) == GIMPLE_NOP)
++	{
++	  if (!add_node (name, input_layers, ptr_layers, ssa_name_stack))
++	    {
++	      return false;
++	    }
++	}
++    }
++  return !ssa_name_stack.is_empty ();
++}
++
++/* Check the usage of each call.  */
++
++bool
++check_each_call (cgraph_node *node, cgraph_edge *caller)
++{
++  hash_map <tree, int> ptr_layers;
++  auto_vec <tree> ssa_name_stack;
++  int input_layers = 0;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "======== check each call : %s/%u ========\n",
++	       node->name (), node->order);
++    }
++  if (!set_init_node (node, caller, ptr_layers, ssa_name_stack, input_layers))
++    {
++      return false;
++    }
++  int i = 0;
++  while (!ssa_name_stack.is_empty ())
++    {
++      tree current_node = ssa_name_stack.pop ();
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\ncur node %d: \t", i++);
++	  print_generic_expr (dump_file, current_node);
++	  fprintf (dump_file, ",\t\tptr layers: %d: \n",
++		   *ptr_layers.get (current_node));
++	}
++      if (get_ptr_layers (TREE_TYPE (current_node))
++	  > *ptr_layers.get (current_node))
++	{
++	  return false;
++	}
++      if (!check_node_use (node, current_node, ptr_layers, ssa_name_stack,
++			   input_layers))
++	{
++	  return false;
++	}
++    }
++
++  if (!check_node_def (ptr_layers))
++    {
++      return false;
++    }
++  return true;
++}
++
++/* Filter out function: void func (void*, int n),
++   and the function has no static variable, no structure-related variable,
++   and no global variable is used.  */
++
++bool
++filter_func (cgraph_node *node)
++{
++  tree parm = DECL_ARGUMENTS (node->decl);
++  if (!(parm && VOID_POINTER_P (TREE_TYPE (parm))
++	&& VOID_TYPE_P (TREE_TYPE (TREE_TYPE (node->decl)))))
++    {
++      return false;
++    }
++
++  for (parm = DECL_CHAIN (parm); parm; parm = DECL_CHAIN (parm))
++    {
++      if (TREE_CODE (TREE_TYPE (parm)) != INTEGER_TYPE)
++	{
++	  return false;
++	}
++    }
++
++  if (DECL_STRUCT_FUNCTION (node->decl)->static_chain_decl)
++    {
++      return false;
++    }
++
++  tree var = NULL_TREE;
++  unsigned int i = 0;
++  bool res = true;
++  FOR_EACH_LOCAL_DECL (cfun, i, var)
++    {
++      if (TREE_CODE (var) == VAR_DECL && handled_type (TREE_TYPE (var)))
++	{
++	  res = false;
++	}
++    }
++  if (!res)
++    {
++      return false;
++    }
++
++  for (unsigned j = 1; j < num_ssa_names; ++j)
++    {
++      tree name = ssa_name (j);
++      if (!name || has_zero_uses (name) || virtual_operand_p (name))
++	{
++	  continue;
++	}
++      tree var = SSA_NAME_VAR (name);
++      if (var && TREE_CODE (var) == VAR_DECL && is_global_var (var))
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
++/* Check whether the function with the void* parameter and uses the input node
++   safely.
++   In these functions only component_ref can be used to dereference the last
++   layer of the input structure pointer.  The hack operation pointer offset
++   after type cast cannot be used.
++*/
++
++bool
++is_safe_func_with_void_ptr_parm (cgraph_node *node)
++{
++  if (!filter_func (node))
++    {
++      return false;
++    }
++
++  /* Distinguish Recursive Callers
++     normal_callers:    main ()
++			{ spec_qsort.constprop (_649, _651); }
++     definition:	spec_qsort.constprop (void * a, size_t n)
++     recursive_callers: { spec_qsort.constprop (a_1, _139); }  */
++  vec <cgraph_edge *> callers = node->collect_callers ();
++  auto_vec <cgraph_edge *> normal_callers;
++  for (unsigned i = 0; i < callers.length (); i++)
++    {
++      if (callers[i]->caller != node)
++	{
++	  normal_callers.safe_push (callers[i]);
++	}
++    }
++  if (normal_callers.length () == 0)
++    {
++      return false;
++    }
++
++  for (unsigned i = 0; i < normal_callers.length (); i++)
++    {
++      if (!check_each_call (node, normal_callers[i]))
++	{
++	  return false;
++	}
++    }
++  return true;
++}
++
+ /* Return the escape type which corresponds to if
+    this is an volatile type, an array type or a pointer
+    to a pointer type.  */
+@@ -1814,12 +2679,83 @@ escape_type escape_type_volatile_array_o
+     return escape_volatile;
+   if (isarraytype (type))
+     return escape_array;
+-  if (isptrptr (type))
++  if (isptrptr (type) && (current_mode != STRUCT_REORDER_FIELDS))
+     return escape_ptr_ptr;
+   return does_not_escape;
+ }
+ 
+-/* Record TYPE if not already recorded. */
++/* Record field type.  */
++
++void
++ipa_struct_reorg::record_field_type (tree field, srtype *base_srtype)
++{
++  tree field_type = TREE_TYPE (field);
++  /* The uid of the type in the structure is different
++     from that outside the structure.  */
++  srtype *field_srtype = record_type (inner_type (field_type));
++  srfield *field_srfield = base_srtype->find_field (int_byte_position (field));
++  /* We might have an variable sized type which we don't set the handle.  */
++  if (field_srfield)
++    {
++      field_srfield->type = field_srtype;
++      field_srtype->add_field_site (field_srfield);
++    }
++  if (field_srtype == base_srtype && current_mode != COMPLETE_STRUCT_RELAYOUT
++      && current_mode != STRUCT_REORDER_FIELDS)
++    {
++      base_srtype->mark_escape (escape_rescusive_type, NULL);
++    }
++  /* Types of non-pointer field are difficult to track the correctness
++     of the rewrite when it used by the escaped type.  */
++  if (current_mode == STRUCT_REORDER_FIELDS
++      && TREE_CODE (field_type) == RECORD_TYPE)
++    {
++      field_srtype->mark_escape (escape_instance_field, NULL);
++    }
++}
++
++/* Record structure all field types.  */
++
++void
++ipa_struct_reorg::record_struct_field_types (tree base_type,
++					     srtype *base_srtype)
++{
++  for (tree field = TYPE_FIELDS (base_type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++      {
++	tree field_type = TREE_TYPE (field);
++	process_union (field_type);
++	if (TREE_CODE (inner_type (field_type)) == UNION_TYPE
++	    || TREE_CODE (inner_type (field_type)) == QUAL_UNION_TYPE)
++	  {
++	    base_srtype->mark_escape (escape_union, NULL);
++	  }
++	if (isvolatile_type (field_type))
++	  {
++	    base_srtype->mark_escape (escape_volatile, NULL);
++	  }
++	escape_type e = escape_type_volatile_array_or_ptrptr (field_type);
++	if (e != does_not_escape)
++	  {
++	    base_srtype->mark_escape (e, NULL);
++	  }
++	/* Types of non-pointer field are difficult to track the correctness
++	   of the rewrite when it used by the escaped type.  */
++	if (current_mode == STRUCT_REORDER_FIELDS
++	    && TREE_CODE (field_type) == RECORD_TYPE)
++	  {
++	    base_srtype->mark_escape (escape_instance_field, NULL);
++	  }
++	if (handled_type (field_type))
++	  {
++	    record_field_type (field, base_srtype);
++	  }
++      }
++    }
++}
++
++/* Record TYPE if not already recorded.  */
+ 
+ srtype *
+ ipa_struct_reorg::record_type (tree type)
+@@ -1827,7 +2763,7 @@ ipa_struct_reorg::record_type (tree type
+   unsigned typeuid;
+ 
+   /* Get the main variant as we are going
+-     to record that type only. */
++     to record that type only.  */
+   type = TYPE_MAIN_VARIANT (type);
+   typeuid = TYPE_UID (type);
+ 
+@@ -1837,7 +2773,7 @@ ipa_struct_reorg::record_type (tree type
+   if (type1)
+     return type1;
+ 
+-  /* If already done recording just return NULL. */
++  /* If already done recording just return NULL.  */
+   if (done_recording)
+     return NULL;
+ 
+@@ -1845,46 +2781,14 @@ ipa_struct_reorg::record_type (tree type
+     fprintf (dump_file, "Recording new type: %u.\n", typeuid);
+ 
+   type1 = new srtype (type);
+-  types.safe_push(type1);
++  types.safe_push (type1);
+ 
+   /* If the type has an user alignment set,
+-     that means the user most likely already setup the type. */
++     that means the user most likely already setup the type.  */
+   if (TYPE_USER_ALIGN (type))
+     type1->mark_escape (escape_user_alignment, NULL);
+ 
+-  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
+-    {
+-      if (TREE_CODE (field) == FIELD_DECL)
+-        {
+-	  tree t = TREE_TYPE (field);
+-	  process_union (t);
+-	  if (TREE_CODE (inner_type (t)) == UNION_TYPE
+-	      || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE)
+-	    {
+-	      type1->mark_escape (escape_union, NULL);
+-	    }
+-	  if (isvolatile_type (t))
+-	    type1->mark_escape (escape_volatile, NULL);
+-	  escape_type e = escape_type_volatile_array_or_ptrptr (t);
+-	  if (e != does_not_escape)
+-	    type1->mark_escape (e, NULL);
+-	  if (handled_type (t))
+-	    {
+-	      srtype *t1 = record_type (inner_type (t));
+-	      srfield *f = type1->find_field (int_byte_position (field));
+-	      /* We might have an variable sized type which we don't set the handle. */
+-	      if (f)
+-		{
+-		  f->type = t1;
+-		  t1->add_field_site (f);
+-		}
+-	      if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT)
+-		{
+-		  type1->mark_escape (escape_rescusive_type, NULL);
+-		}
+-	    }
+-        }
+-    }
++  record_struct_field_types (type, type1);
+ 
+   return type1;
+ }
+@@ -1892,7 +2796,8 @@ ipa_struct_reorg::record_type (tree type
+ /* Mark TYPE as escaping with ESCAPES as the reason.  */
+ 
+ void
+-ipa_struct_reorg::mark_type_as_escape (tree type, escape_type escapes, gimple *stmt)
++ipa_struct_reorg::mark_type_as_escape (tree type, escape_type escapes,
++				       gimple *stmt)
+ {
+   if (handled_type (type))
+     {
+@@ -1906,7 +2811,7 @@ ipa_struct_reorg::mark_type_as_escape (t
+ }
+ 
+ /* Maybe process the union of type TYPE, such that marking all of the fields'
+-   types as being escaping. */
++   types as being escaping.  */
+ 
+ void
+ ipa_struct_reorg::process_union (tree type)
+@@ -1920,7 +2825,7 @@ ipa_struct_reorg::process_union (tree ty
+ 
+   type = TYPE_MAIN_VARIANT (type);
+ 
+-  /* We already processed this type. */
++  /* We already processed this type.  */
+   if (unions_recorded.add (type))
+     return;
+ 
+@@ -1936,7 +2841,7 @@ ipa_struct_reorg::process_union (tree ty
+ 
+ /*  Used by record_var function as a callback to walk_tree.
+     Mark the type as escaping if it has expressions which
+-    cannot be converted for global initializations. */
++    cannot be converted for global initializations.  */
+ 
+ static tree
+ record_init_types (tree *tp, int *walk_subtrees, void *data)
+@@ -1983,7 +2888,8 @@ ipa_struct_reorg::record_var (tree decl,
+ 
+   process_union (TREE_TYPE (decl));
+ 
+-  /* */
++  /* Only the structure type RECORD_TYPE is recorded.
++     Therefore, the void* type is filtered out.  */
+   if (handled_type (TREE_TYPE (decl)))
+     {
+       type = record_type (inner_type (TREE_TYPE (decl)));
+@@ -1998,7 +2904,7 @@ ipa_struct_reorg::record_var (tree decl,
+       else
+ 	{
+ 	  gcc_assert (current_function);
+-          sd = current_function->record_decl (type, decl, arg);
++	  sd = current_function->record_decl (type, decl, arg);
+ 	}
+ 
+       /* If the variable has the "used" attribute, then treat the type as escaping. */
+@@ -2020,7 +2926,8 @@ ipa_struct_reorg::record_var (tree decl,
+ 
+       /* Separate instance is hard to trace in complete struct
+ 	 relayout optimization.  */
+-      if (current_mode == COMPLETE_STRUCT_RELAYOUT
++      if ((current_mode == COMPLETE_STRUCT_RELAYOUT
++	   || current_mode == STRUCT_REORDER_FIELDS)
+ 	  && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE)
+ 	{
+ 	  e = escape_separate_instance;
+@@ -2088,8 +2995,10 @@ ipa_struct_reorg::find_var (tree expr, g
+   srtype *type;
+   srfield *field;
+   bool realpart, imagpart, address;
++  bool escape_from_base = false;
++  /* The should_create flag is true, the declaration can be recorded.  */
+   get_type_field (expr, base, indirect, type, field,
+-		  realpart, imagpart, address, true, true);
++		  realpart, imagpart, address, escape_from_base, true, true);
+ }
+ 
+ 
+@@ -2106,27 +3015,68 @@ ipa_struct_reorg::find_vars (gimple *stm
+ 	{
+ 	  tree lhs = gimple_assign_lhs (stmt);
+ 	  tree rhs = gimple_assign_rhs1 (stmt);
++
+ 	  find_var (gimple_assign_lhs (stmt), stmt);
++	  /* _2 = MEM[(struct arc_t * *)_1];
++	     records the right value _1 declaration.  */
+ 	  find_var (gimple_assign_rhs1 (stmt), stmt);
+-	  if (TREE_CODE (lhs) == SSA_NAME
++
++	  /* Add a safe func mechanism.  */
++	  bool l_find = true;
++	  bool r_find = true;
++	  if (current_mode == STRUCT_REORDER_FIELDS)
++	    {
++	      l_find = !(current_function->is_safe_func
++			 && TREE_CODE (lhs) == SSA_NAME
++			 && is_from_void_ptr_parm (lhs));
++	      r_find = !(current_function->is_safe_func
++			 && TREE_CODE (rhs) == SSA_NAME
++			 && is_from_void_ptr_parm (rhs));
++	    }
++
++	  if ((TREE_CODE (lhs) == SSA_NAME)
+ 	      && VOID_POINTER_P (TREE_TYPE (lhs))
+-	      && handled_type (TREE_TYPE (rhs)))
++	      && handled_type (TREE_TYPE (rhs)) && l_find)
+ 	    {
+ 	      srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
+ 	      srdecl *d = find_decl (lhs);
+ 	      if (!d && t)
+-		current_function->record_decl (t, lhs, -1);
++		current_function->record_decl (t, lhs, -1,
++			isptrptr (TREE_TYPE (rhs)) ? TREE_TYPE (rhs) : NULL);
+ 	    }
++	  /* void * _1; struct arc * _4;
++	     _4 = _1 + _3; _1 = calloc (100, 40).  */
+ 	  if (TREE_CODE (rhs) == SSA_NAME
+ 	      && VOID_POINTER_P (TREE_TYPE (rhs))
+-	      && handled_type (TREE_TYPE (lhs)))
++	      && handled_type (TREE_TYPE (lhs)) && r_find)
+ 	    {
+ 	      srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
+ 	      srdecl *d = find_decl (rhs);
+ 	      if (!d && t)
+-		current_function->record_decl (t, rhs, -1);
++		current_function->record_decl (t, rhs, -1,
++			isptrptr (TREE_TYPE (lhs)) ? TREE_TYPE (lhs) : NULL);
+ 	    }
+ 	}
++      else if ((current_mode == STRUCT_REORDER_FIELDS)
++	       && (gimple_assign_rhs_code (stmt) == LE_EXPR
++		   || gimple_assign_rhs_code (stmt) == LT_EXPR
++		   || gimple_assign_rhs_code (stmt) == GE_EXPR
++		   || gimple_assign_rhs_code (stmt) == GT_EXPR))
++	{
++	  find_var (gimple_assign_lhs (stmt), stmt);
++	  find_var (gimple_assign_rhs1 (stmt), stmt);
++	  find_var (gimple_assign_rhs2 (stmt), stmt);
++	}
++      /* _23 = _21 - old_arcs_12.  */
++      else if ((current_mode == STRUCT_REORDER_FIELDS)
++	       && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR
++	       && types_compatible_p (
++		  TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs1 (stmt))),
++		  TYPE_MAIN_VARIANT (TREE_TYPE (gimple_assign_rhs2 (stmt)))))
++	{
++	  find_var (gimple_assign_rhs1 (stmt), stmt);
++	  find_var (gimple_assign_rhs2 (stmt), stmt);
++	}
+       else
+ 	{
+ 	  /* Because we won't handle these stmts in rewrite phase,
+@@ -2219,8 +3169,122 @@ ipa_struct_reorg::maybe_record_stmt (cgr
+     }
+ }
+ 
++/* Calculate the multiplier.  */
++
++static bool
++calculate_mult_num (tree arg, tree *num, tree struct_size)
++{
++  gcc_assert (TREE_CODE (arg) == INTEGER_CST);
++  bool sign = false;
++  HOST_WIDE_INT size = TREE_INT_CST_LOW (arg);
++  if (size < 0)
++    {
++      size = -size;
++      sign = true;
++    }
++  tree arg2 = build_int_cst (TREE_TYPE (arg), size);
++  if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size)))
++    {
++      tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size);
++      if (sign)
++	{
++	  number = build_int_cst (TREE_TYPE (number), -tree_to_shwi (number));
++	}
++      *num = number;
++      return true;
++    }
++  return false;
++}
++
++/* Trace and calculate the multiplier of PLUS_EXPR.  */
++
++static bool
++trace_calculate_plus (gimple *size_def_stmt, tree *num, tree struct_size)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR);
++
++  tree num1 = NULL_TREE;
++  tree num2 = NULL_TREE;
++  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
++  if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE)
++    {
++      return false;
++    }
++  if (!is_result_of_mult (arg1, &num2, struct_size) || num2 == NULL_TREE)
++    {
++      return false;
++    }
++  *num = size_binop (PLUS_EXPR, num1, num2);
++  return true;
++}
++
++/* Trace and calculate the multiplier of MULT_EXPR.  */
++
++static bool
++trace_calculate_mult (gimple *size_def_stmt, tree *num, tree struct_size)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR);
++
++  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
++  tree num1 = NULL_TREE;
++
++  if (is_result_of_mult (arg0, &num1, struct_size) && num1 != NULL_TREE)
++    {
++      *num = size_binop (MULT_EXPR, arg1, num1);
++      return true;
++    }
++  if (is_result_of_mult (arg1, &num1, struct_size) && num1 != NULL_TREE)
++    {
++      *num = size_binop (MULT_EXPR, arg0, num1);
++      return true;
++    }
++  *num = NULL_TREE;
++  return false;
++}
++
++/* Trace and calculate the multiplier of NEGATE_EXPR.  */
++
++static bool
++trace_calculate_negate (gimple *size_def_stmt, tree *num, tree struct_size)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NEGATE_EXPR);
++
++  /* _480 = -_479; _479 = _478 * 72.  */
++  tree num1 = NULL_TREE;
++  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++  if (!is_result_of_mult (arg0, &num1, struct_size) || num1 == NULL_TREE)
++    {
++      return false;
++    }
++  tree num0 = build_int_cst (TREE_TYPE (num1), -1);
++  *num = size_binop (MULT_EXPR, num0, num1);
++  return true;
++}
++
++/* Trace and calculate the multiplier of POINTER_DIFF_EXPR.  */
++
++static bool
++trace_calculate_diff (gimple *size_def_stmt, tree *num)
++{
++  gcc_assert (gimple_assign_rhs_code (size_def_stmt) == NOP_EXPR);
++
++  /* _25 = (long unsigned int) _23; _23 = _21 - old_arcs_12.  */
++  tree arg = gimple_assign_rhs1 (size_def_stmt);
++  size_def_stmt = SSA_NAME_DEF_STMT (arg);
++  if (size_def_stmt && is_gimple_assign (size_def_stmt)
++      && gimple_assign_rhs_code (size_def_stmt) == POINTER_DIFF_EXPR)
++    {
++      *num = NULL_TREE;
++      return true;
++    }
++  *num = NULL_TREE;
++  return false;
++}
++
+ /* This function checks whether ARG is a result of multiplication
+-   of some number by STRUCT_SIZE. If yes, the function returns true
++   of some number by STRUCT_SIZE.  If yes, the function returns true
+    and this number is filled into NUM.  */
+ 
+ static bool
+@@ -2231,30 +3295,10 @@ is_result_of_mult (tree arg, tree *num,
+       || integer_zerop (struct_size))
+     return false;
+ 
+-  /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */
++  /* If we have a integer, just check if it is a multiply of STRUCT_SIZE.  */
+   if (TREE_CODE (arg) == INTEGER_CST)
+-    {
+-      bool sign = false;
+-      HOST_WIDE_INT size = TREE_INT_CST_LOW (arg);
+-      if (size < 0)
+-	{
+-	  size = -size;
+-	  sign = true;
+-	}
+-      tree arg2 = build_int_cst (TREE_TYPE (arg), size);
+-      if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size)))
+-	{
+-	  tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size);
+-	  if (sign)
+-	    {
+-	      number = build_int_cst (TREE_TYPE (number),
+-				      -tree_to_shwi (number));
+-	    }
+-	  *num = number;
+-	  return true;
+-	}
+-      return false;
+-    }
++    return calculate_mult_num (arg, num, struct_size);
++
+   gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg);
+ 
+   /* If the allocation statement was of the form
+@@ -2270,43 +3314,28 @@ is_result_of_mult (tree arg, tree *num,
+ 	return false;
+ 
+       // FIXME: this should handle SHIFT also.
+-      if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR)
++      tree_code rhs_code = gimple_assign_rhs_code (size_def_stmt);
++      if (rhs_code == PLUS_EXPR)
+ 	{
+-	  tree num1, num2;
+-	  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
+-	  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
+-	  if (!is_result_of_mult (arg0, &num1, struct_size))
+-	    return false;
+-	  if (!is_result_of_mult (arg1, &num2, struct_size))
+-	    return false;
+-	  *num = size_binop (PLUS_EXPR, num1, num2);
+-	  return true;
++	  return trace_calculate_plus (size_def_stmt, num, struct_size);
+ 	}
+-      if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR)
++      else if (rhs_code == MULT_EXPR)
+ 	{
+-	  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
+-	  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
+-	  tree num1;
+-
+-	  if (is_result_of_mult (arg0, &num1, struct_size))
+-	    {
+-	      *num = size_binop (MULT_EXPR, arg1, num1);
+-	      return true;
+-	    }
+-	  if (is_result_of_mult (arg1, &num1, struct_size))
+-	    {
+-	      *num = size_binop (MULT_EXPR, arg0, num1);
+-	      return true;
+-	    }
+-
+-	  *num = NULL_TREE;
+-	  return false;
++	  return trace_calculate_mult (size_def_stmt, num, struct_size);
+ 	}
+-      else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME)
++      else if (rhs_code == SSA_NAME)
+ 	{
+ 	  arg = gimple_assign_rhs1 (size_def_stmt);
+ 	  size_def_stmt = SSA_NAME_DEF_STMT (arg);
+ 	}
++      else if (rhs_code == NEGATE_EXPR && current_mode == STRUCT_REORDER_FIELDS)
++	{
++	  return trace_calculate_negate (size_def_stmt, num, struct_size);
++	}
++      else if (rhs_code == NOP_EXPR && current_mode == STRUCT_REORDER_FIELDS)
++	{
++	  return trace_calculate_diff (size_def_stmt, num);
++	}
+       else
+ 	{
+ 	  *num = NULL_TREE;
+@@ -2323,10 +3352,17 @@ is_result_of_mult (tree arg, tree *num,
+ bool
+ ipa_struct_reorg::handled_allocation_stmt (gimple *stmt)
+ {
+-  if (current_mode == COMPLETE_STRUCT_RELAYOUT
++  if ((current_mode == STRUCT_REORDER_FIELDS)
++      && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)))
++    {
++      return true;
++    }
++  if ((current_mode == COMPLETE_STRUCT_RELAYOUT)
+       && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
+     return true;
+-  if (current_mode != COMPLETE_STRUCT_RELAYOUT
++  if ((current_mode == NORMAL)
+       && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
+ 	  || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
+ 	  || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)
+@@ -2342,7 +3378,7 @@ ipa_struct_reorg::handled_allocation_stm
+    elements in the array allocated.   */
+ 
+ tree
+-ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt)
++ipa_struct_reorg::allocate_size (srtype *type, srdecl *decl, gimple *stmt)
+ {
+   if (!stmt
+       || gimple_code (stmt) != GIMPLE_CALL
+@@ -2362,6 +3398,12 @@ ipa_struct_reorg::allocate_size (srtype
+ 
+   tree struct_size = TYPE_SIZE_UNIT (type->type);
+ 
++  /* Specify the correct size to relax multi-layer pointer.  */
++  if (TREE_CODE (decl->decl) == SSA_NAME && isptrptr (decl->orig_type))
++    {
++      struct_size = TYPE_SIZE_UNIT (decl->orig_type);
++    }
++
+   tree size = gimple_call_arg (stmt, 0);
+ 
+   if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
+@@ -2371,13 +3413,17 @@ ipa_struct_reorg::allocate_size (srtype
+     {
+       tree arg1;
+       arg1 = gimple_call_arg (stmt, 1);
++
+       /* Check that second argument is a constant equal to the size of structure.  */
+       if (operand_equal_p (arg1, struct_size, 0))
+ 	return size;
+       /* ??? Check that first argument is a constant
+-	 equal to the size of structure.  */
++      equal to the size of structure.  */
++      /* If the allocated number is equal to the value of struct_size,
++	 the value of arg1 is changed to the allocated number.  */
+       if (operand_equal_p (size, struct_size, 0))
+ 	return arg1;
++
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	{
+ 	  fprintf (dump_file, "\ncalloc the correct size:\n");
+@@ -2419,17 +3465,38 @@ ipa_struct_reorg::maybe_mark_or_record_o
+ 
+   if (!d)
+     {
++      /* MEM[(struct arc *)_1].head = _5; _5 = calloc (100, 104).  */
+       if (VOID_POINTER_P (TREE_TYPE (side))
+ 	  && TREE_CODE (side) == SSA_NAME)
+-	current_function->record_decl (type, side, -1);
++	{
++	  /* The type is other, the declaration is side.  */
++	  current_function->record_decl (type, side, -1,
++		find_decl (other) ? find_decl (other)->orig_type : NULL);
++	}
+       else
+-	type->mark_escape (escape_cast_another_ptr, stmt);
++	{
++	  /* *_21 = &MEM[(void *)&perm + 8B].  */
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	}
+     }
+   else if (type != d->type)
+     {
+       type->mark_escape (escape_cast_another_ptr, stmt);
+       d->type->mark_escape (escape_cast_another_ptr, stmt);
+     }
++  /* stop_393 = net.stop_nodes; void *stop;
++     Directly mark the structure pointer type assigned
++     to the void* variable as escape.  */
++  else if (current_mode == STRUCT_REORDER_FIELDS
++	   && TREE_CODE (side) == SSA_NAME
++	   && VOID_POINTER_P (TREE_TYPE (side))
++	   && SSA_NAME_VAR (side)
++	   && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (side))))
++    {
++      mark_type_as_escape (TREE_TYPE (other), escape_cast_void, stmt);
++    }
++
++  check_ptr_layers (side, other, stmt);
+ }
+ 
+ /* Record accesses in an assignment statement STMT.  */
+@@ -2455,7 +3522,11 @@ ipa_struct_reorg::maybe_record_assign (c
+       if (!handled_type (TREE_TYPE (lhs)))
+ 	return;
+       /* Check if rhs2 is a multiplication of the size of the type. */
+-      if (is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)))))
++      /* The size adjustment and judgment of multi-layer pointers
++	 are added.  */
++      if (is_result_of_mult (rhs2, &num, isptrptr (TREE_TYPE (lhs))
++			     ? TYPE_SIZE_UNIT (TREE_TYPE (lhs))
++			     : TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)))))
+ 	{
+ 	  record_stmt_expr (lhs, node, stmt);
+ 	  record_stmt_expr (rhs1, node, stmt);
+@@ -2493,9 +3564,8 @@ ipa_struct_reorg::maybe_record_assign (c
+ }
+ 
+ bool
+-check_mem_ref_offset (tree expr)
++check_mem_ref_offset (tree expr, tree *num)
+ {
+-  tree num = NULL;
+   bool ret = false;
+ 
+   if (TREE_CODE (expr) != MEM_REF)
+@@ -2510,13 +3580,18 @@ check_mem_ref_offset (tree expr)
+     {
+       tmp = TREE_OPERAND (tmp, 0);
+     }
+-  tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp)));
+-  ret = is_result_of_mult (field_off, &num, size);
++  /* Specify the correct size for the multi-layer pointer.  */
++  tree size = isptrptr (TREE_TYPE (tmp))
++			? TYPE_SIZE_UNIT (TREE_TYPE (tmp))
++			: TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp)));
++  ret = is_result_of_mult (field_off, num, size);
+   return ret;
+ }
+ 
+ tree
+-get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, bool &realpart, bool &imagpart, tree &accesstype)
++get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset,
++			 bool &realpart, bool &imagpart,
++			 tree &accesstype, tree *num)
+ {
+   offset = 0;
+   realpart = false;
+@@ -2539,22 +3614,29 @@ get_ref_base_and_offset (tree &e, HOST_W
+ 	{
+ 	  case COMPONENT_REF:
+ 	  {
++	    /* net.arcs = _98; If expr is the lvalue of stmt,
++	       then field type is FIELD_DECL - POINTER_TYPE - RECORD_TYPE.  */
+ 	    tree field = TREE_OPERAND (expr, 1);
+ 	    tree field_off = byte_position (field);
+ 	    if (TREE_CODE (field_off) != INTEGER_CST)
+ 	      return NULL;
+ 	    offset += tree_to_shwi (field_off);
++	    /* net.arcs = _98; If expr is the lvalue of stmt,
++	       then expr type is VAR_DECL - RECORD_TYPE （fetch net） */
+ 	    expr = TREE_OPERAND (expr, 0);
+ 	    accesstype = NULL;
+ 	    break;
+ 	  }
+ 	  case MEM_REF:
+ 	  {
++	    /* _2 = MEM[(struct arc_t * *)_1];
++	       If expr is the right value of stmt，then field_off type is
++	       INTEGER_CST - POINTER_TYPE - POINTER_TYPE - RECORD_TYPE.  */
+ 	    tree field_off = TREE_OPERAND (expr, 1);
+ 	    gcc_assert (TREE_CODE (field_off) == INTEGER_CST);
+ 	    /* So we can mark the types as escaping if different. */
+ 	    accesstype = TREE_TYPE (field_off);
+-	    if (!check_mem_ref_offset (expr))
++	    if (!check_mem_ref_offset (expr, num))
+ 	      {
+ 		offset += tree_to_uhwi (field_off);
+ 	      }
+@@ -2595,8 +3677,13 @@ ipa_struct_reorg::wholeaccess (tree expr
+ }
+ 
+ bool
+-ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create, bool can_escape)
++ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect,
++				  srtype *&type, srfield *&field,
++				  bool &realpart, bool &imagpart, bool &address,
++				  bool& escape_from_base, bool should_create,
++				  bool can_escape)
+ {
++  tree num = NULL_TREE;
+   HOST_WIDE_INT offset;
+   tree accesstype;
+   address = false;
+@@ -2608,7 +3695,9 @@ ipa_struct_reorg::get_type_field (tree e
+       mark_as_bit_field = true;
+     }
+ 
+-  base = get_ref_base_and_offset (expr, offset, realpart, imagpart, accesstype);
++  /* ref is classified into two types: COMPONENT_REF or MER_REF.  */
++  base = get_ref_base_and_offset (expr, offset, realpart, imagpart,
++				  accesstype, &num);
+ 
+   /* Variable access, unkown type. */
+   if (base == NULL)
+@@ -2646,6 +3735,8 @@ ipa_struct_reorg::get_type_field (tree e
+       if (!t)
+ 	return false;
+     }
++  /* If no such decl is finded
++     or orig_type is not added to this decl, then add it.  */
+   else if (!d && accesstype)
+     {
+       if (!should_create)
+@@ -2657,15 +3748,54 @@ ipa_struct_reorg::get_type_field (tree e
+ 	t = record_type (inner_type (accesstype));
+       if (!t || t->has_escaped ())
+ 	return false;
+-      /* If base is not void* mark the type as escaping. */
+-      if (!VOID_POINTER_P (TREE_TYPE (base)))
++      /* If base is not void* mark the type as escaping.
++	 release INTEGER_TYPE cast to struct pointer.
++	 (If t has escpaed above, then directly returns
++	 and doesn't mark escape follow.). */
++      /* _607 = MEM[(struct arc_t * *)pl_100].
++	 then base pl_100：ssa_name  - pointer_type - integer_type.  */
++      if (current_mode == STRUCT_REORDER_FIELDS)
++	{
++	  bool is_int_ptr = POINTER_TYPE_P (TREE_TYPE (base))
++			    && (TREE_CODE (inner_type (TREE_TYPE (base)))
++				== INTEGER_TYPE);
++	  if (!(VOID_POINTER_P (TREE_TYPE (base))
++		|| (current_function->is_safe_func && is_int_ptr)))
++	    {
++	      gcc_assert (can_escape);
++	      t->mark_escape (escape_cast_another_ptr, NULL);
++	      return false;
++	    }
++	  if (TREE_CODE (base) == SSA_NAME
++	      && !(current_function->is_safe_func && is_int_ptr))
++	    {
++	      /* Add a safe func mechanism.  */
++	      if (!(current_function->is_safe_func
++		    && is_from_void_ptr_parm (base)))
++		{
++		  /* Add auxiliary information of the multi-layer pointer
++		     type.  */
++		  current_function->record_decl (t, base, -1,
++				isptrptr (accesstype) ? accesstype : NULL);
++		}
++	    }
++	}
++      else
+ 	{
+-          gcc_assert (can_escape);
+-	  t->mark_escape (escape_cast_another_ptr, NULL);
+-	  return false;
++	  if (!VOID_POINTER_P (TREE_TYPE (base)))
++	    {
++	      gcc_assert (can_escape);
++	      t->mark_escape (escape_cast_another_ptr, NULL);
++	      return false;
++	    }
++	  if (TREE_CODE (base) == SSA_NAME)
++	    {
++	      /* Add auxiliary information of the multi-layer pointer
++		 type.  */
++	      current_function->record_decl (t, base, -1,
++			isptrptr (accesstype) ? accesstype : NULL);
++	    }
+ 	}
+-      if (TREE_CODE (base) == SSA_NAME)
+-	current_function->record_decl (t, base, -1);
+     }
+   else if (!d)
+     return false;
+@@ -2673,7 +3803,10 @@ ipa_struct_reorg::get_type_field (tree e
+     t = d->type;
+ 
+   if (t->has_escaped ())
++  {
++    escape_from_base = true;
+     return false;
++  }
+ 
+   if (mark_as_bit_field)
+     {
+@@ -2699,7 +3832,6 @@ ipa_struct_reorg::get_type_field (tree e
+ 	  print_generic_expr (dump_file, expr);
+ 	  fprintf (dump_file, "\n");
+ 	  print_generic_expr (dump_file, base);
+-	  fprintf (dump_file, "\n");
+ 	}
+       gcc_assert (can_escape);
+       t->mark_escape (escape_unkown_field, NULL);
+@@ -2713,9 +3845,8 @@ ipa_struct_reorg::get_type_field (tree e
+ 	  print_generic_expr (dump_file, f->fieldtype);
+ 	  fprintf (dump_file, "\naccess type = ");
+ 	  print_generic_expr (dump_file, TREE_TYPE (expr));
+-	  fprintf (dump_file, "original expr = ");
++	  fprintf (dump_file, "\noriginal expr = ");
+ 	  print_generic_expr (dump_file, expr);
+-	  fprintf (dump_file, "\n");
+ 	}
+       gcc_assert (can_escape);
+       t->mark_escape (escape_unkown_field, NULL);
+@@ -2737,7 +3868,9 @@ ipa_struct_reorg::mark_expr_escape (tree
+   srtype *type;
+   srfield *field;
+   bool realpart, imagpart, address;
+-  if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart, address))
++  bool escape_from_base = false;
++  if (!get_type_field (expr, base, indirect, type, field,
++		       realpart, imagpart, address, escape_from_base))
+     return;
+ 
+   type->mark_escape (escapes, stmt);
+@@ -2809,6 +3942,7 @@ ipa_struct_reorg::maybe_record_call (cgr
+       return;
+     }
+ 
++  /* get func param it's tree_list.  */
+   argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
+   for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
+     {
+@@ -2816,9 +3950,16 @@ ipa_struct_reorg::maybe_record_call (cgr
+       if (argtype)
+ 	{
+ 	  tree argtypet = TREE_VALUE (argtype);
+-	  if (!free_or_realloc
++	  /* spec_qsort.constprop (_649, _651);
++	     Check the callee func, instead of current func.  */
++	  if (!(free_or_realloc
++		|| (current_mode == STRUCT_REORDER_FIELDS
++		    && safe_functions.contains (
++		       node->get_edge (stmt)->callee)))
+ 	      && VOID_POINTER_P (argtypet))
+-	    mark_type_as_escape (TREE_TYPE (arg), escape_cast_void);
++	    {
++	      mark_type_as_escape (TREE_TYPE (arg), escape_cast_void, stmt);
++	    }
+ 	  else
+ 	    record_stmt_expr (arg, node, stmt);
+ 	}
+@@ -2839,11 +3980,26 @@ ipa_struct_reorg::record_stmt_expr (tree
+   srtype *type;
+   srfield *field;
+   bool realpart, imagpart, address;
+-  if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart, address))
++  bool escape_from_base = false;
++  if (!get_type_field (expr, base, indirect, type, field,
++		       realpart, imagpart, address, escape_from_base))
+     return;
+ 
+-  if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg))
+-    type->mark_escape (escape_non_optimize, stmt);
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      if (!opt_for_fn (current_function_decl, flag_ipa_reorder_fields))
++	{
++	  type->mark_escape (escape_non_optimize, stmt);
++	}
++    }
++  else
++    {
++      if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg))
++	{
++	  type->mark_escape (escape_non_optimize, stmt);
++	}
++    }
++
+ 
+   /* Record it. */
+   type->add_access (new sraccess (stmt, node, type, field));
+@@ -2861,8 +4017,10 @@ ipa_struct_reorg::find_function (cgraph_
+ }
+ 
+ void
+-ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, vec<srdecl*> &worklist, gimple *stmt)
++ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl,
++				       vec<srdecl*> &worklist, gimple *stmt)
+ {
++  srtype *type = decl->type;
+   if (integer_zerop (newdecl))
+     return;
+ 
+@@ -2874,8 +4032,9 @@ ipa_struct_reorg::check_type_and_push (t
+           type->mark_escape (escape_cast_another_ptr, stmt);
+ 	  return;
+ 	}
+-      if (d->type == type)
+-        return;
++      if (d->type == type
++	  && cmp_ptr_layers (TREE_TYPE (newdecl), TREE_TYPE (decl->decl)))
++	return;
+ 
+       srtype *type1 = d->type;
+       type->mark_escape (escape_cast_another_ptr, stmt);
+@@ -2925,7 +4084,9 @@ ipa_struct_reorg::check_type_and_push (t
+   /* Only add to the worklist if the decl is a SSA_NAME.  */
+   if (TREE_CODE (newdecl) == SSA_NAME)
+     worklist.safe_push (d);
+-  if (d->type == type)
++  tree a_decl = d->orig_type ? d->orig_type : TREE_TYPE (newdecl);
++  tree b_decl = decl->orig_type ? decl->orig_type : TREE_TYPE (decl->decl);
++  if (d->type == type && cmp_ptr_layers (a_decl, b_decl))
+     return;
+ 
+   srtype *type1 = d->type;
+@@ -2967,6 +4128,108 @@ ipa_struct_reorg::check_alloc_num (gimpl
+     }
+ }
+ 
++/* Check the definition of gimple assign.  */
++
++void
++ipa_struct_reorg::check_definition_assign (srdecl *decl, vec<srdecl*> &worklist)
++{
++  tree ssa_name = decl->decl;
++  srtype *type = decl->type;
++  gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
++  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
++  /* a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
++	check to make sure the addition was a multiple of the size.
++	check the pointer type too.  */
++  tree rhs = gimple_assign_rhs1 (stmt);
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree num = NULL_TREE;
++      /* Specify the correct size for the multi-layer pointer.  */
++      if (!is_result_of_mult (rhs2, &num, isptrptr (decl->orig_type)
++					  ? TYPE_SIZE_UNIT (decl->orig_type)
++					  : TYPE_SIZE_UNIT (type->type)))
++	{
++	  type->mark_escape (escape_non_multiply_size, stmt);
++	}
++
++      if (TREE_CODE (rhs) == SSA_NAME)
++	{
++	  check_type_and_push (rhs, decl, worklist, stmt);
++	}
++      return;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == MAX_EXPR
++      || gimple_assign_rhs_code (stmt) == MIN_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      if (TREE_CODE (rhs) == SSA_NAME)
++	{
++	  check_type_and_push (rhs, decl, worklist, stmt);
++	}
++      if (TREE_CODE (rhs2) == SSA_NAME)
++	{
++	  check_type_and_push (rhs2, decl, worklist, stmt);
++	}
++      return;
++    }
++
++  /* Casts between pointers and integer are escaping.  */
++  if (gimple_assign_cast_p (stmt))
++    {
++      type->mark_escape (escape_cast_int, stmt);
++      return;
++    }
++
++  /* d) if the name is from a cast/assignment, make sure it is used as
++	that type or void*
++	i) If void* then push the ssa_name into worklist.  */
++  gcc_assert (gimple_assign_single_p (stmt));
++  check_other_side (decl, rhs, stmt, worklist);
++  check_ptr_layers (decl->decl, rhs, stmt);
++}
++
++/* Check the definition of gimple call.  */
++
++void
++ipa_struct_reorg::check_definition_call (srdecl *decl, vec<srdecl*> &worklist)
++{
++  tree ssa_name = decl->decl;
++  srtype *type = decl->type;
++  gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
++  gcc_assert (gimple_code (stmt) == GIMPLE_CALL);
++
++  /* For realloc, check the type of the argument.  */
++  if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++    {
++      check_type_and_push (gimple_call_arg (stmt, 0), decl, worklist, stmt);
++    }
++
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      if (!handled_allocation_stmt (stmt))
++	{
++	  type->mark_escape (escape_return, stmt);
++	}
++      if (!allocate_size (type, decl, stmt))
++	{
++	  type->mark_escape (escape_non_multiply_size, stmt);
++	}
++    }
++  else
++    {
++      if (!handled_allocation_stmt (stmt)
++	  || !allocate_size (type, decl, stmt))
++	{
++	  type->mark_escape (escape_return, stmt);
++	}
++    }
++
++  check_alloc_num (stmt, type);
++  return;
++}
++
+ /*
+   2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
+      a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
+@@ -2992,9 +4255,16 @@ ipa_struct_reorg::check_definition (srde
+       if (var
+ 	  && TREE_CODE (var) == PARM_DECL
+ 	  && VOID_POINTER_P (TREE_TYPE (ssa_name)))
+-        type->mark_escape (escape_cast_void, NULL);
++	{
++	  type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name));
++	}
+       return;
+     }
++  if (current_mode == STRUCT_REORDER_FIELDS && SSA_NAME_VAR (ssa_name)
++      && VOID_POINTER_P (TREE_TYPE (SSA_NAME_VAR (ssa_name))))
++    {
++      type->mark_escape (escape_cast_void, SSA_NAME_DEF_STMT (ssa_name));
++    }
+   gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
+ 
+   /*
+@@ -3003,15 +4273,7 @@ ipa_struct_reorg::check_definition (srde
+   */
+   if (gimple_code (stmt) == GIMPLE_CALL)
+     {
+-      /* For realloc, check the type of the argument. */
+-      if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
+-        check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt);
+-
+-      if (!handled_allocation_stmt (stmt)
+-          || !allocate_size (type, stmt))
+-        type->mark_escape (escape_return, stmt);
+-      check_alloc_num (stmt, type);
+-      return;
++      check_definition_call (decl, worklist);
+     }
+   /* If the SSA_NAME is sourced from an inline-asm, just mark the type as escaping.  */
+   if (gimple_code (stmt) == GIMPLE_ASM)
+@@ -3025,58 +4287,16 @@ ipa_struct_reorg::check_definition (srde
+   if (gimple_code (stmt) == GIMPLE_PHI)
+     {
+       for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
+-	check_type_and_push (gimple_phi_arg_def (stmt, i), type, worklist, stmt);
+-      return;
+-    }
+-
+-  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
+-  /*
+-     a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
+-	check to make sure the addition was a multiple of the size.
+-	check the pointer type too.
+-  */
+-
+-  tree rhs = gimple_assign_rhs1 (stmt);
+-  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
+-    {
+-      tree rhs2 = gimple_assign_rhs2 (stmt);
+-      tree num;
+-      if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type)))
+-        type->mark_escape (escape_non_multiply_size, stmt);
+-
+-      if (TREE_CODE (rhs) == SSA_NAME)
+-        check_type_and_push (rhs, type, worklist, stmt);
+-      return;
+-    }
+-
+-  if (gimple_assign_rhs_code (stmt) == MAX_EXPR
+-      || gimple_assign_rhs_code (stmt) == MIN_EXPR)
+-    {
+-      tree rhs2 = gimple_assign_rhs2 (stmt);
+-      if (TREE_CODE (rhs) == SSA_NAME)
+ 	{
+-	  check_type_and_push (rhs, type, worklist, stmt);
+-	}
+-      if (TREE_CODE (rhs2) == SSA_NAME)
+-	{
+-	  check_type_and_push (rhs2, type, worklist, stmt);
++	  check_type_and_push (gimple_phi_arg_def (stmt, i),
++			       decl, worklist, stmt);
+ 	}
+       return;
+     }
+-
+-  /* Casts between pointers and integer are escaping.  */
+-  if (gimple_assign_cast_p (stmt))
++  if (gimple_code (stmt) == GIMPLE_ASSIGN)
+     {
+-      type->mark_escape (escape_cast_int, stmt);
+-      return;
++      check_definition_assign (decl, worklist);
+     }
+-
+-  /*
+-     d) if the name is from a cast/assignment, make sure it is used as that type or void*
+-	i) If void* then push the ssa_name into worklist
+-  */
+-  gcc_assert (gimple_assign_single_p (stmt));
+-  check_other_side (decl, rhs, stmt, worklist);
+ }
+ 
+ /* Mark the types used by the inline-asm as escaping.  It is unkown what happens inside
+@@ -3108,11 +4328,10 @@ ipa_struct_reorg::check_other_side (srde
+ {
+   srtype *type = decl->type;
+ 
+-  if (TREE_CODE (other) == SSA_NAME
+-      || DECL_P (other)
++  if (TREE_CODE (other) == SSA_NAME || DECL_P (other)
+       || TREE_CODE (other) == INTEGER_CST)
+     {
+-      check_type_and_push (other, type, worklist, stmt);
++      check_type_and_push (other, decl, worklist, stmt);
+       return;
+     }
+ 
+@@ -3138,8 +4357,29 @@ ipa_struct_reorg::check_other_side (srde
+       srtype *type1;
+       srfield *field;
+       bool realpart, imagpart, address;
+-      if (!get_type_field (other, base, indirect, type1, field, realpart, imagpart, address))
+-        type->mark_escape (escape_cast_another_ptr, stmt);
++      bool escape_from_base = false;
++      if (!get_type_field (other, base, indirect, type1, field,
++			   realpart, imagpart, address, escape_from_base))
++	{
++	  if (current_mode == STRUCT_REORDER_FIELDS)
++	    {
++	      /* release INTEGER_TYPE cast to struct pointer.  */
++	      bool cast_from_int_ptr = current_function->is_safe_func && base
++		&& find_decl (base) == NULL && POINTER_TYPE_P (TREE_TYPE (base))
++		&& (TREE_CODE (inner_type (TREE_TYPE (base))) == INTEGER_TYPE);
++
++	      /* Add a safe func mechanism.  */
++	      bool from_void_ptr_parm = current_function->is_safe_func
++		&& TREE_CODE (base) == SSA_NAME && is_from_void_ptr_parm (base);
++
++	      /* release type is used by a type which escapes.  */
++	      if (escape_from_base || cast_from_int_ptr || from_void_ptr_parm)
++		{
++		  return;
++		}
++	    }
++	  type->mark_escape (escape_cast_another_ptr, stmt);
++	}
+ 
+       return;
+     }
+@@ -3151,6 +4391,71 @@ ipa_struct_reorg::check_other_side (srde
+ }
+ 
+ 
++/* Get the expr base.  */
++
++void
++get_base (tree &base, tree expr)
++{
++  if (TREE_CODE (expr) == MEM_REF)
++    {
++      base = TREE_OPERAND (expr, 0);
++    }
++  else if (TREE_CODE (expr) == COMPONENT_REF)
++    {
++      base = TREE_OPERAND (expr, 0);
++      base = (TREE_CODE (base) == MEM_REF) ? TREE_OPERAND (base, 0) : base;
++    }
++  else if (TREE_CODE (expr) == ADDR_EXPR)
++    {
++      base = TREE_OPERAND (expr, 0);
++    }
++}
++
++/* Check whether the number of pointer layers of exprs is equal,
++   marking unequals as escape.  */
++
++void
++ipa_struct_reorg::check_ptr_layers (tree a_expr, tree b_expr, gimple* stmt)
++{
++  if (current_mode != STRUCT_REORDER_FIELDS || current_function->is_safe_func
++      || !POINTER_TYPE_P (TREE_TYPE (a_expr))
++      || !POINTER_TYPE_P (TREE_TYPE (b_expr))
++      || !handled_type (TREE_TYPE (a_expr))
++      || !handled_type (TREE_TYPE (b_expr)))
++    {
++      return;
++    }
++
++  tree a_base = a_expr;
++  tree b_base = b_expr;
++  get_base (a_base, a_expr);
++  get_base (b_base, b_expr);
++
++  srdecl *a = find_decl (a_base);
++  srdecl *b = find_decl (b_base);
++  if (a && b == NULL && TREE_CODE (b_expr) != INTEGER_CST)
++    {
++      a->type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++  else if (b && a == NULL && TREE_CODE (a_expr) != INTEGER_CST)
++    {
++      b->type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++  else if (a == NULL && b == NULL)
++    {
++      return;
++    }
++
++  if (cmp_ptr_layers (TREE_TYPE (a_expr), TREE_TYPE (b_expr)))
++    {
++      return;
++    }
++  a->type->mark_escape (escape_cast_another_ptr, stmt);
++  b->type->mark_escape (escape_cast_another_ptr, stmt);
++}
++
+ void
+ ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec<srdecl*> &worklist)
+ {
+@@ -3165,7 +4470,7 @@ ipa_struct_reorg::check_use (srdecl *dec
+      check to make sure they are used correctly.  */
+   if (gimple_code (stmt) == GIMPLE_PHI)
+     {
+-      check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt);
++      check_type_and_push (gimple_phi_result (stmt), decl, worklist, stmt);
+       return;
+     }
+ 
+@@ -3181,10 +4486,15 @@ ipa_struct_reorg::check_use (srdecl *dec
+       tree rhs2 = gimple_cond_rhs (stmt);
+       tree orhs = rhs1;
+       enum tree_code code = gimple_cond_code (stmt);
+-      if (code != EQ_EXPR && code != NE_EXPR
+-	  && (current_mode != COMPLETE_STRUCT_RELAYOUT
+-	      || (code != LT_EXPR && code != LE_EXPR
+-		  && code != GT_EXPR && code != GE_EXPR)))
++      if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR))
++	   || (current_mode == COMPLETE_STRUCT_RELAYOUT
++	       && (code != EQ_EXPR && code != NE_EXPR
++		   && code != LT_EXPR && code != LE_EXPR
++		   && code != GT_EXPR && code != GE_EXPR))
++	   || (current_mode == STRUCT_REORDER_FIELDS
++	       && (code != EQ_EXPR && code != NE_EXPR
++		   && code != LT_EXPR && code != LE_EXPR
++		   && code != GT_EXPR && code != GE_EXPR)))
+ 	{
+ 	  mark_expr_escape (rhs1, escape_non_eq, stmt);
+ 	  mark_expr_escape (rhs2, escape_non_eq, stmt);
+@@ -3195,7 +4505,7 @@ ipa_struct_reorg::check_use (srdecl *dec
+ 	return;
+       if (TREE_CODE (orhs) != SSA_NAME)
+ 	mark_expr_escape (rhs1, escape_non_eq, stmt);
+-      check_type_and_push (orhs, type, worklist, stmt);
++      check_type_and_push (orhs, decl, worklist, stmt);
+       return;
+     }
+ 
+@@ -3215,9 +4525,14 @@ ipa_struct_reorg::check_use (srdecl *dec
+       tree rhs2 = gimple_assign_rhs2 (stmt);
+       tree orhs = rhs1;
+       enum tree_code code = gimple_assign_rhs_code (stmt);
+-      if (code != EQ_EXPR && code != NE_EXPR
+-	  && (current_mode != COMPLETE_STRUCT_RELAYOUT
+-	      || (code != LT_EXPR && code != LE_EXPR
++      if ((current_mode == NORMAL && (code != EQ_EXPR && code != NE_EXPR))
++	   || (current_mode == COMPLETE_STRUCT_RELAYOUT
++	       && (code != EQ_EXPR && code != NE_EXPR
++		   && code != LT_EXPR && code != LE_EXPR
++		   && code != GT_EXPR && code != GE_EXPR))
++	   || (current_mode == STRUCT_REORDER_FIELDS
++	       && (code != EQ_EXPR && code != NE_EXPR
++		   && code != LT_EXPR && code != LE_EXPR
+ 		  && code != GT_EXPR && code != GE_EXPR)))
+ 	{
+ 	  mark_expr_escape (rhs1, escape_non_eq, stmt);
+@@ -3229,7 +4544,7 @@ ipa_struct_reorg::check_use (srdecl *dec
+ 	return;
+       if (TREE_CODE (orhs) != SSA_NAME)
+ 	mark_expr_escape (rhs1, escape_non_eq, stmt);
+-      check_type_and_push (orhs, type, worklist, stmt);
++      check_type_and_push (orhs, decl, worklist, stmt);
+       return;
+     }
+ 
+@@ -3243,6 +4558,7 @@ ipa_struct_reorg::check_use (srdecl *dec
+ 	  check_other_side (decl, lhs, stmt, worklist);
+ 	  return;
+ 	}
++      check_ptr_layers (lhs, rhs, stmt);
+     }
+ 
+   if (is_gimple_assign (stmt)
+@@ -3252,10 +4568,26 @@ ipa_struct_reorg::check_use (srdecl *dec
+       tree lhs = gimple_assign_lhs (stmt);
+       tree num;
+       check_other_side (decl, lhs, stmt, worklist);
+-      if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type)))
++      check_ptr_layers (lhs, decl->decl, stmt);
++      /* Specify the correct size for the multi-layer pointer.  */
++      if (!is_result_of_mult (rhs2, &num, isptrptr (decl->orig_type)
++					  ? TYPE_SIZE_UNIT (decl->orig_type)
++					  : TYPE_SIZE_UNIT (type->type)))
+         type->mark_escape (escape_non_multiply_size, stmt);
+     }
+ 
++  if (is_gimple_assign (stmt)
++      && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree other = rhs1 == decl->decl ? rhs2 : rhs1;
++
++      check_other_side (decl, other, stmt, worklist);
++      check_ptr_layers (decl->decl, other, stmt);
++      return;
++    }
++
+ }
+ 
+ /*
+@@ -3300,7 +4632,7 @@ ipa_struct_reorg::record_function (cgrap
+ 
+   if (dump_file  && (dump_flags & TDF_DETAILS))
+     fprintf (dump_file, "\nRecording accesses and types from function: %s/%u\n",
+-             node->name (), node->order);
++	     node->name (), node->order);
+ 
+   /* Nodes without a body are not interesting.  Especially do not
+      visit clones at this point for now - we get duplicate decls
+@@ -3319,17 +4651,51 @@ ipa_struct_reorg::record_function (cgrap
+   if (DECL_PRESERVE_P (node->decl))
+     escapes = escape_marked_as_used;
+   else if (!node->local.local)
+-    escapes = escape_visible_function;
++    {
++      if (current_mode != STRUCT_REORDER_FIELDS)
++	{
++	  escapes = escape_visible_function;
++	}
++      if (current_mode == STRUCT_REORDER_FIELDS && node->externally_visible)
++	{
++	  escapes = escape_visible_function;
++	}
++    }
+   else if (!node->local.can_change_signature)
+     escapes = escape_cannot_change_signature;
+   else if (!tree_versionable_function_p (node->decl))
+     escapes = escape_noclonable_function;
+-  else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg))
+-    escapes = escape_non_optimize;
++
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      if (!opt_for_fn (node->decl, flag_ipa_reorder_fields))
++	{
++	  escapes = escape_non_optimize;
++	}
++    }
++  else if (current_mode == NORMAL || current_mode == COMPLETE_STRUCT_RELAYOUT)
++    {
++      if (!opt_for_fn (node->decl, flag_ipa_struct_reorg))
++	{
++	  escapes = escape_non_optimize;
++	}
++    }
+ 
+   basic_block bb;
+   gimple_stmt_iterator si;
+ 
++  /* Add a safe func mechanism.  */
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      current_function->is_safe_func = safe_functions.contains (node);
++      if (dump_file)
++	{
++	  fprintf (dump_file, "\nfunction %s/%u: is_safe_func = %d\n",
++		   node->name (), node->order,
++		   current_function->is_safe_func);
++	}
++    }
++
+   /* Record the static chain decl.  */
+   if (fn->static_chain_decl)
+    {
+@@ -3460,6 +4826,49 @@ ipa_struct_reorg::record_function (cgrap
+ }
+ 
+ 
++/* For a function that contains the void* parameter and passes the structure
++   pointer, check whether the function uses the input node safely.
++   For these functions, the void* parameter and related ssa_name are not
++   recorded in record_function (), and the input structure type is not escaped.
++*/
++
++void
++ipa_struct_reorg::record_safe_func_with_void_ptr_parm ()
++{
++  cgraph_node *node = NULL;
++  FOR_EACH_FUNCTION (node)
++    {
++      if (!node->real_symbol_p ())
++	{
++	  continue;
++	}
++      if (node->definition)
++	{
++	  if (!node->has_gimple_body_p () || node->inlined_to)
++	    {
++	      continue;
++	    }
++	  node->get_body ();
++	  function *fn = DECL_STRUCT_FUNCTION (node->decl);
++	  if (!fn)
++	    {
++	      continue;
++	    }
++	  push_cfun (fn);
++	  if (is_safe_func_with_void_ptr_parm (node))
++	    {
++	      safe_functions.add (node);
++	      if (dump_file && (dump_flags & TDF_DETAILS))
++		{
++		  fprintf (dump_file, "\nfunction %s/%u is safe function.\n",
++			   node->name (), node->order);
++		}
++	    }
++	  pop_cfun ();
++	}
++    }
++}
++
+ /* Record all accesses for all types including global variables. */
+ 
+ void
+@@ -3491,6 +4900,12 @@ ipa_struct_reorg::record_accesses (void)
+       record_var (var->decl, escapes);
+     }
+ 
++  /* Add a safe func mechanism.  */
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      record_safe_func_with_void_ptr_parm ();
++    }
++
+   FOR_EACH_FUNCTION (cnode)
+     {
+       if (!cnode->real_symbol_p ())
+@@ -3503,11 +4918,14 @@ ipa_struct_reorg::record_accesses (void)
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "all types (before pruning):\n");
++      fprintf (dump_file, "\n");
++      fprintf (dump_file, "==============================================\n\n");
++      fprintf (dump_file, "======== all types (before pruning): ========\n\n");
+       dump_types (dump_file);
+-      fprintf (dump_file, "all functions (before pruning):\n");
++      fprintf (dump_file, "======= all functions (before pruning): =======\n");
+       dump_functions (dump_file);
+     }
++  /* If record_var () is called later, new types will not be recorded.  */
+   done_recording = true;
+ }
+ 
+@@ -3531,6 +4949,7 @@ ipa_struct_reorg::walk_field_for_cycles
+     {
+       if (!field->type)
+ 	;
++      /* If there are two members of the same structure pointer type? */
+       else if (field->type->visited
+ 	       || walk_field_for_cycles (field->type))
+ 	{
+@@ -3610,22 +5029,99 @@ ipa_struct_reorg::propagate_escape (void
+     } while (changed);
+ }
+ 
++/* If the original type (with members) has escaped, corresponding to the
++   struct pointer type (empty member) in the structure fields
++   should also marked as escape.  */
++
++void
++ipa_struct_reorg::propagate_escape_via_original (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      for (unsigned j = 0; j < types[i]->fields.length (); j++)
++	{
++	  srfield *field = types[i]->fields[j];
++	  if (handled_type (field->fieldtype) && field->type)
++	    {
++	      for (unsigned k = 0; k < types.length (); k++)
++		{
++		  const char *type1 = get_type_name (field->type->type);
++		  const char *type2 = get_type_name (types[k]->type);
++		  if (type1 == NULL || type2 == NULL)
++		    {
++		      continue;
++		    }
++		  if (type1 == type2 && types[k]->has_escaped ())
++		    {
++		      if (!field->type->has_escaped ())
++			{
++			  field->type->mark_escape (
++				       escape_via_orig_escape, NULL);
++			}
++		      break;
++		    }
++		}
++	    }
++	}
++    }
++}
++
++/* Marks the fileds as empty and does not have the original structure type
++   is escape.  */
++
++void
++ipa_struct_reorg::propagate_escape_via_empty_with_no_original (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      if (types[i]->fields.length () == 0)
++	{
++	  for (unsigned j = 0; j < types.length (); j++)
++	    {
++	      if (i != j && types[j]->fields.length ())
++		{
++		  const char *type1 = get_type_name (types[i]->type);
++		  const char *type2 = get_type_name (types[j]->type);
++		  if (type1 != NULL && type2 != NULL && type1 == type2)
++		    {
++		      break;
++		    }
++		}
++	      if (j == types.length () - 1)
++		{
++		  types[i]->mark_escape (escape_via_empty_no_orig, NULL);
++		}
++	    }
++	}
++    }
++}
++
+ /* Prune the escaped types and their decls from what was recorded.  */
+ 
+ void
+ ipa_struct_reorg::prune_escaped_types (void)
+ {
+-  if (current_mode != COMPLETE_STRUCT_RELAYOUT)
++  if (current_mode != COMPLETE_STRUCT_RELAYOUT
++      && current_mode != STRUCT_REORDER_FIELDS)
+     {
++      /* Detect recusive types and mark them as escaping.  */
+       detect_cycles ();
++      /* If contains or is contained by the escape type,
++	 mark them as escaping.  */
+       propagate_escape ();
+     }
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      propagate_escape_via_original ();
++      propagate_escape_via_empty_with_no_original ();
++    }
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "all types (after prop but before pruning):\n");
++      fprintf (dump_file, "==============================================\n\n");
++      fprintf (dump_file, "all types (after prop but before pruning): \n\n");
+       dump_types (dump_file);
+-      fprintf (dump_file, "all functions (after prop but before pruning):\n");
++      fprintf (dump_file, "all functions (after prop but before pruning): \n");
+       dump_functions (dump_file);
+     }
+ 
+@@ -3673,13 +5169,15 @@ ipa_struct_reorg::prune_escaped_types (v
+       /* Prune functions which don't refer to any variables any more.  */
+       if (function->args.is_empty ()
+ 	  && function->decls.is_empty ()
+-	  && function->globals.is_empty ())
++	  && function->globals.is_empty ()
++	  && current_mode != STRUCT_REORDER_FIELDS)
+ 	{
+ 	  delete function;
+ 	  functions.ordered_remove (i);
+ 	}
+       else
+ 	i++;
++
+     }
+ 
+   /* Prune globals of types that escape, all references to those decls
+@@ -3698,24 +5196,33 @@ ipa_struct_reorg::prune_escaped_types (v
+ 
+   /* Prune types that escape, all references to those types
+      will have been removed in the above loops.  */
+-  for (unsigned i = 0; i < types.length (); )
++  /* The escape type is not deleted in STRUCT_REORDER_FIELDS,
++     Then the type that contains the escaped type fields
++     can find complete information.  */
++  if (current_mode != STRUCT_REORDER_FIELDS)
+     {
+-      srtype *type = types[i];
+-      if (type->has_escaped ())
++      for (unsigned i = 0; i < types.length ();)
+ 	{
+-	  /* All references to this type should have been removed now. */
+-	  delete type;
+-	  types.ordered_remove (i);
++	  srtype *type = types[i];
++	  if (type->has_escaped ())
++	    {
++	      /* All references to this type should have been removed now.  */
++	      delete type;
++	      types.ordered_remove (i);
++	    }
++	  else
++	    {
++	      i++;
++	    }
+ 	}
+-      else
+-	i++;
+     }
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "all types (after pruning):\n");
++      fprintf (dump_file, "==============================================\n\n");
++      fprintf (dump_file, "========= all types (after pruning): =========\n\n");
+       dump_types (dump_file);
+-      fprintf (dump_file, "all functions (after pruning):\n");
++      fprintf (dump_file, "======== all functions (after pruning): ========\n");
+       dump_functions (dump_file);
+     }
+ }
+@@ -3736,6 +5243,8 @@ ipa_struct_reorg::analyze_types (void)
+    is not stored in
+      TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA)))
+    Try to restore B's type information.  */
++/* The traversal and recording of struc fields in
++   the record_type function are supplemented.  */
+ void
+ ipa_struct_reorg::restore_field_type (void)
+ {
+@@ -3744,7 +5253,7 @@ ipa_struct_reorg::restore_field_type (vo
+       for (unsigned j = 0; j < types[i]->fields.length (); j++)
+ 	{
+ 	  srfield *field = types[i]->fields[j];
+-	  if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE)
++	  if (handled_type (field->fieldtype) && field->type)
+ 	    {
+ 	      /* If field type has TYPE_FIELDS information,
+ 		 we do not need to do this.  */
+@@ -3754,7 +5263,10 @@ ipa_struct_reorg::restore_field_type (vo
+ 		}
+ 	      for (unsigned k = 0; k < types.length (); k++)
+ 		{
+-		  if (i == k)
++		  /* In STRUCT_REORDER_FIELDS mode，the recursive fields
++		     should find their own struct type containing complete
++		     information.  */
++		  if (i == k && current_mode != STRUCT_REORDER_FIELDS)
+ 		    {
+ 		      continue;
+ 		    }
+@@ -3785,6 +5297,28 @@ ipa_struct_reorg::create_new_types (void
+   for (unsigned i = 0; i < types.length (); i++)
+     newtypes += types[i]->create_new_type ();
+ 
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      for (unsigned i = 0; i < types.length (); i++)
++	{
++	  auto_vec <tree> *fields = fields_to_finish.get (types[i]->type);
++	  if (fields)
++	    {
++	      for (unsigned j = 0; j < fields->length (); j++)
++		{
++		  tree field = (*fields)[j];
++		  TREE_TYPE (field)
++		  = reconstruct_complex_type (TREE_TYPE (field),
++					      types[i]->newtype[0]);
++		}
++	    }
++	}
++      for (unsigned i = 0; i < types.length (); i++)
++	{
++	  layout_type (types[i]->newtype[0]);
++	}
++    }
++
+   if (dump_file)
+     {
+       if (newtypes)
+@@ -3840,7 +5374,8 @@ ipa_struct_reorg::create_new_args (cgrap
+ 	  print_generic_expr (dump_file, decl);
+ 	  fprintf (dump_file, "\n");
+ 	}
+-      adj.arg_prefix = "struct_reorg";
++      adj.arg_prefix = current_mode == STRUCT_REORDER_FIELDS
++		       ? "struct_reorder" : "struct_reorg";
+       adj.op = IPA_PARM_OP_NONE;
+       for (unsigned j = 0; j < max_split && t->newtype[j]; j++)
+ 	{
+@@ -3883,7 +5418,8 @@ ipa_struct_reorg::create_new_args (cgrap
+   char *name = NULL;
+   if (tname)
+     {
+-      name = concat (tname, ".reorg.0", NULL);
++      name = concat (tname, current_mode == STRUCT_REORDER_FIELDS
++			    ? ".reorder.0" : ".reorg.0", NULL);
+       new_name = get_identifier (name);
+       free (name);
+     }
+@@ -3969,9 +5505,10 @@ ipa_struct_reorg::create_new_functions (
+ 	  fprintf (dump_file, "\n");
+ 	}
+       statistics_counter_event (NULL, "Create new function", 1);
+-      new_node = node->create_version_clone_with_body (vNULL, NULL,
+-						       NULL, false, NULL, NULL,
+-						      "struct_reorg");
++      new_node = node->create_version_clone_with_body (
++				vNULL, NULL, NULL, false, NULL, NULL,
++				current_mode == STRUCT_REORDER_FIELDS
++				? "struct_reorder" : "struct_reorg");
+       new_node->make_local ();
+       f->newnode = new_node;
+       srfunction *n = record_function (new_node);
+@@ -4010,6 +5547,7 @@ ipa_struct_reorg::rewrite_expr (tree exp
+   srfield *f;
+   bool realpart, imagpart;
+   bool address;
++  bool escape_from_base = false;
+ 
+   tree newbase[max_split];
+   memset (newexpr, 0, sizeof(tree[max_split]));
+@@ -4027,7 +5565,8 @@ ipa_struct_reorg::rewrite_expr (tree exp
+       return true;
+     }
+ 
+-  if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart, address))
++  if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart,
++		       address, escape_from_base))
+     return false;
+ 
+   /* If the type is not changed, then just return false. */
+@@ -4085,7 +5624,38 @@ ipa_struct_reorg::rewrite_expr (tree exp
+       if (address)
+         newbase1 = build_fold_addr_expr (newbase1);
+       if (indirect)
+-        newbase1 = build_simple_mem_ref (newbase1);
++	{
++	  if (current_mode == STRUCT_REORDER_FIELDS)
++	    {
++	      /* Supports the MEM_REF offset.
++		 _1 = MEM[(struct arc *)ap_4 + 72B].flow;
++		 Old rewrite：_1 = ap.reorder.0_8->flow;
++		 New rewrite：_1
++		  = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow;
++	      */
++	      HOST_WIDE_INT offset_tmp = 0;
++	      bool realpart_tmp = false;
++	      bool imagpart_tmp = false;
++	      tree accesstype_tmp = NULL_TREE;
++	      tree num = NULL_TREE;
++	      get_ref_base_and_offset (expr, offset_tmp,
++				       realpart_tmp, imagpart_tmp,
++				       accesstype_tmp, &num);
++
++	      tree ptype = TREE_TYPE (newbase1);
++	      /* Specify the correct size for the multi-layer pointer.  */
++	      tree size = isptrptr (ptype) ? TYPE_SIZE_UNIT (ptype) :
++			  TYPE_SIZE_UNIT (inner_type (ptype));
++	      newbase1 = build2 (MEM_REF, TREE_TYPE (ptype), newbase1,
++				 build_int_cst (ptype, (num != NULL)
++				 ? (tree_to_shwi (num) * tree_to_shwi (size))
++				 : 0));
++	    }
++	  else
++	    {
++	      newbase1 = build_simple_mem_ref (newbase1);
++	    }
++	}
+       newexpr[i] = build3 (COMPONENT_REF, TREE_TYPE (f->newfield[i]),
+ 			   newbase1, f->newfield[i], NULL_TREE);
+       if (imagpart)
+@@ -4125,8 +5695,12 @@ ipa_struct_reorg::rewrite_assign (gassig
+       return remove;
+     }
+ 
+-  if (gimple_assign_rhs_code (stmt) == EQ_EXPR
+-      || gimple_assign_rhs_code (stmt) == NE_EXPR)
++  if ((current_mode != STRUCT_REORDER_FIELDS
++       && (gimple_assign_rhs_code (stmt) == EQ_EXPR
++	   || gimple_assign_rhs_code (stmt) == NE_EXPR))
++      || (current_mode == STRUCT_REORDER_FIELDS
++	  && (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
++	      == tcc_comparison)))
+     {
+       tree rhs1 = gimple_assign_rhs1 (stmt);
+       tree rhs2 = gimple_assign_rhs2 (stmt);
+@@ -4134,6 +5708,12 @@ ipa_struct_reorg::rewrite_assign (gassig
+       tree newrhs2[max_split];
+       tree_code rhs_code = gimple_assign_rhs_code (stmt);
+       tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR;
++      if (current_mode == STRUCT_REORDER_FIELDS
++	  && rhs_code != EQ_EXPR && rhs_code != NE_EXPR)
++	{
++	  code = rhs_code;
++	}
++
+       if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2))
+ 	return false;
+       tree newexpr = NULL_TREE;
+@@ -4165,25 +5745,95 @@ ipa_struct_reorg::rewrite_assign (gassig
+ 
+       if (!rewrite_lhs_rhs (lhs, rhs1, newlhs, newrhs))
+ 	return false;
++
+       tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)));
+       tree num;
+       /* Check if rhs2 is a multiplication of the size of the type. */
+       if (!is_result_of_mult (rhs2, &num, size))
+ 	internal_error ("the rhs of pointer was not a multiplicate and it slipped through.");
+ 
+-      num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num);
++      /* Add the judgment of num, support for POINTER_DIFF_EXPR.
++	 _26 = _24 + _25;
++	 _25 = (long unsigned int) _23;
++	 _23 = _21 - old_arcs_12.  */
++      if (current_mode != STRUCT_REORDER_FIELDS
++	  || (current_mode == STRUCT_REORDER_FIELDS && (num != NULL)))
++	{
++	  num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num);
++	}
+       for (unsigned i = 0; i < max_split && newlhs[i]; i++)
+ 	{
+ 	  gimple *new_stmt;
+ 
+-          tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i])));
+-	  newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize);
+-	  new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, newrhs[i], newsize);
++	  if (num != NULL)
++	    {
++	      tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i])));
++	      newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num,
++					 newsize);
++	      new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR,
++					      newrhs[i], newsize);
++	    }
++	  else
++	    {
++	      new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR,
++					      newrhs[i], rhs2);
++	    }
+ 	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ 	  remove = true;
+ 	}
+       return remove;
+     }
++
++  /* Support POINTER_DIFF_EXPR rewriting.  */
++  if (current_mode == STRUCT_REORDER_FIELDS
++      && gimple_assign_rhs_code (stmt) == POINTER_DIFF_EXPR)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree newrhs1[max_split];
++      tree newrhs2[max_split];
++
++      bool r1 = rewrite_expr (rhs1, newrhs1);
++      bool r2 = rewrite_expr (rhs2, newrhs2);
++
++      if (r1 != r2)
++	{
++	  /* Handle NULL pointer specially.  */
++	  if (r1 && !r2 && integer_zerop (rhs2))
++	    {
++	      r2 = true;
++	      for (unsigned i = 0; i < max_split && newrhs1[i]; i++)
++		{
++		  newrhs2[i] = fold_convert (TREE_TYPE (newrhs1[i]), rhs2);
++		}
++	    }
++	  else if (r2 && !r1 && integer_zerop (rhs1))
++	    {
++	      r1 = true;
++	      for (unsigned i = 0; i < max_split && newrhs2[i]; i++)
++		{
++		  newrhs1[i] = fold_convert (TREE_TYPE (newrhs2[i]), rhs1);
++		}
++	    }
++	  else
++	    {
++	      return false;
++	    }
++	}
++      else if (!r1 && !r2)
++	return false;
++
++      /* The two operands always have pointer/reference type.  */
++      for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++)
++	{
++	  gimple_assign_set_rhs1 (stmt, newrhs1[i]);
++	  gimple_assign_set_rhs2 (stmt, newrhs2[i]);
++	  update_stmt (stmt);
++	}
++      remove = false;
++      return remove;
++    }
++
+   if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
+     {
+       tree lhs = gimple_assign_lhs (stmt);
+@@ -4191,24 +5841,24 @@ ipa_struct_reorg::rewrite_assign (gassig
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	{
+-	  fprintf (dump_file, "rewriting stamtenet:\n");
++	  fprintf (dump_file, "\nrewriting stamtenet:\n");
+ 	  print_gimple_stmt (dump_file, stmt, 0);
+-	  fprintf (dump_file, "\n");
+ 	}
+       tree newlhs[max_split];
+       tree newrhs[max_split];
+       if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs))
+ 	{
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+-	    fprintf (dump_file, "\nDid nothing to statement.\n");
++	    fprintf (dump_file, "Did nothing to statement.\n");
+ 	  return false;
+ 	}
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+-	fprintf (dump_file, "\nreplaced with:\n");
++	fprintf (dump_file, "replaced with:\n");
+       for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
+ 	{
+-	  gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs, newrhs[i] ? newrhs[i] : rhs);
++	  gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs,
++						 newrhs[i] ? newrhs[i] : rhs);
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+ 	    {
+ 	      print_gimple_stmt (dump_file, newstmt, 0);
+@@ -4239,7 +5889,7 @@ ipa_struct_reorg::rewrite_call (gcall *s
+       if (!decl || !decl->type)
+ 	return false;
+       srtype *type = decl->type;
+-      tree num = allocate_size (type, stmt);
++      tree num = allocate_size (type, decl, stmt);
+       gcc_assert (num);
+       memset (newrhs1, 0, sizeof(newrhs1));
+ 
+@@ -4259,7 +5909,10 @@ ipa_struct_reorg::rewrite_call (gcall *s
+       /* Go through each new lhs.  */
+       for (unsigned i = 0; i < max_split && decl->newdecl[i]; i++)
+ 	{
+-	  tree newsize = TYPE_SIZE_UNIT (type->type);
++	  /* Specify the correct size for the multi-layer pointer.  */
++	  tree newsize = isptrptr (decl->orig_type)
++			 ? TYPE_SIZE_UNIT (decl->orig_type)
++			 : TYPE_SIZE_UNIT (type->newtype[i]);
+ 	  gimple *g;
+ 	  /* Every allocation except for calloc needs the size multiplied out. */
+ 	  if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
+@@ -4319,6 +5972,25 @@ ipa_struct_reorg::rewrite_call (gcall *s
+   gcc_assert (node);
+   srfunction *f = find_function (node);
+ 
++  /* Add a safe func mechanism.  */
++  if (current_mode == STRUCT_REORDER_FIELDS && f && f->is_safe_func)
++    {
++      tree expr = gimple_call_arg (stmt, 0);
++      tree newexpr[max_split];
++      if (!rewrite_expr (expr, newexpr))
++	{
++	  return false;
++	}
++
++      if (newexpr[1] == NULL)
++	{
++	  gimple_call_set_arg (stmt, 0, newexpr[0]);
++	  update_stmt (stmt);
++	  return false;
++	}
++      return false;
++    }
++
+   /* Did not find the function or had not cloned it return saying don't
+      change the function call. */
+   if (!f || !f->newf)
+@@ -4403,7 +6075,7 @@ ipa_struct_reorg::rewrite_call (gcall *s
+       && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
+     SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
+ 
+-  gsi_replace (gsi, new_stmt, false);
++  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
+ 
+   /* We need to defer cleaning EH info on the new statement to
+      fixup-cfg.  We may not have dominator information at this point
+@@ -4416,8 +6088,7 @@ ipa_struct_reorg::rewrite_call (gcall *s
+       add_stmt_to_eh_lp (new_stmt, lp_nr);
+     }
+ 
+-
+-  return false;
++  return true;
+ }
+ 
+ /* Rewrite the conditional statement STMT.  Return TRUE if the
+@@ -4429,49 +6100,60 @@ ipa_struct_reorg::rewrite_cond (gcond *s
+   tree_code rhs_code = gimple_cond_code (stmt);
+ 
+   /* Handle only equals or not equals conditionals. */
+-  if (rhs_code != EQ_EXPR
+-      && rhs_code != NE_EXPR)
++  if ((current_mode != STRUCT_REORDER_FIELDS
++       && (rhs_code != EQ_EXPR && rhs_code != NE_EXPR))
++      || (current_mode == STRUCT_REORDER_FIELDS
++	  && TREE_CODE_CLASS (rhs_code) != tcc_comparison))
+     return false;
+-  tree rhs1 = gimple_cond_lhs (stmt);
+-  tree rhs2 = gimple_cond_rhs (stmt);
++  tree lhs = gimple_cond_lhs (stmt);
++  tree rhs = gimple_cond_rhs (stmt);
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "COND: Rewriting\n");
++      fprintf (dump_file, "\nCOND: Rewriting\n");
+       print_gimple_stmt (dump_file, stmt, 0);
++      print_generic_expr (dump_file, lhs);
+       fprintf (dump_file, "\n");
+-      print_generic_expr (dump_file, rhs1);
+-      fprintf (dump_file, "\n");
+-      print_generic_expr (dump_file, rhs2);
++      print_generic_expr (dump_file, rhs);
+       fprintf (dump_file, "\n");
+     }
+ 
+-  tree newrhs1[max_split];
+-  tree newrhs2[max_split];
+-  tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR;
+-  if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2))
++  tree newlhs[max_split] = {};
++  tree newrhs[max_split] = {};
++  if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs))
+     {
+       if (dump_file && (dump_flags & TDF_DETAILS))
+-        fprintf (dump_file, "\nDid nothing to statement.\n");
++	{
++	  fprintf (dump_file, "Did nothing to statement.\n");
++	}
+       return false;
+     }
+ 
+-  tree newexpr = NULL_TREE;
+-  for (unsigned i = 0; i < max_split && newrhs1[i]; i++)
+-    {
+-      tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, newrhs1[i], newrhs2[i]);
+-      if (!newexpr)
+-	newexpr = expr;
+-      else
+-	newexpr = gimplify_build2 (gsi, code, boolean_type_node, newexpr, expr);
+-     }
+-
+-  if (newexpr)
++  /*  Old rewrite：if (iterator_600 != 0B)
++		-> _1369 = iterator.reorder.0_1249 != 0B; if (_1369 != 1)
++		   The logic is incorrect.
++      New rewrite：if (iterator_600 != 0B)
++		-> if (iterator.reorder.0_1249 != 0B)；*/
++  for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
+     {
+-      gimple_cond_set_lhs (stmt, newexpr);
+-      gimple_cond_set_rhs (stmt, boolean_true_node);
++      if (newlhs[i])
++	{
++	  gimple_cond_set_lhs (stmt, newlhs[i]);
++	}
++      if (newrhs[i])
++	{
++	  gimple_cond_set_rhs (stmt, newrhs[i]);
++	}
+       update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++      {
++	fprintf (dump_file, "replaced with:\n");
++	print_gimple_stmt (dump_file, stmt, 0);
++	fprintf (dump_file, "\n");
++      }
+     }
++
+   return false;
+ }
+ 
+@@ -4481,6 +6163,11 @@ ipa_struct_reorg::rewrite_cond (gcond *s
+ bool
+ ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
+ {
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      /* Delete debug gimple now.  */
++      return true;
++    }
+   bool remove = false;
+   if (gimple_debug_bind_p (stmt))
+     {
+@@ -4533,7 +6220,7 @@ ipa_struct_reorg::rewrite_phi (gphi *phi
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "\nrewriting PHI:");
++      fprintf (dump_file, "\nrewriting PHI:\n");
+       print_gimple_stmt (dump_file, phi, 0);
+     }
+ 
+@@ -4544,7 +6231,17 @@ ipa_struct_reorg::rewrite_phi (gphi *phi
+     {
+       tree newrhs[max_split];
+       phi_arg_d rhs = *gimple_phi_arg (phi, i);
+-      rewrite_expr (rhs.def, newrhs);
++
++      /* Handling the NULL phi Node.  */
++      bool r = rewrite_expr (rhs.def, newrhs);
++      if (!r && integer_zerop (rhs.def))
++	{
++	  for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	    {
++	      newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs.def);
++	    }
++	}
++
+       for (unsigned j = 0; j < max_split && newlhs[j]; j++)
+ 	{
+ 	  SET_PHI_ARG_DEF (newphi[j], i, newrhs[j]);
+@@ -4555,7 +6252,7 @@ ipa_struct_reorg::rewrite_phi (gphi *phi
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "\ninto\n:");
++      fprintf (dump_file, "into:\n");
+       for (unsigned i = 0; i < max_split && newlhs[i]; i++)
+ 	{
+ 	  print_gimple_stmt (dump_file, newphi[i], 0);
+@@ -4630,12 +6327,58 @@ ipa_struct_reorg::rewrite_functions (voi
+   /* Create new types, if we did not create any new types,
+      then don't rewrite any accesses. */
+   if (!create_new_types ())
+-    return 0;
++    {
++      if (current_mode == STRUCT_REORDER_FIELDS)
++	{
++	  for (unsigned i = 0; i < functions.length (); i++)
++	    {
++	      srfunction *f = functions[i];
++	      cgraph_node *node = f->node;
++	      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
++	      if (dump_file && (dump_flags & TDF_DETAILS))
++		{
++		  fprintf (dump_file, "\nNo rewrite:\n");
++		  dump_function_to_file (current_function_decl, dump_file,
++			dump_flags | TDF_VOPS);
++		}
++	      pop_cfun ();
++	    }
++	}
++      return 0;
++    }
++
++  if (current_mode == STRUCT_REORDER_FIELDS && dump_file)
++    {
++      fprintf (dump_file, "=========== all created newtypes: ===========\n\n");
++      dump_newtypes (dump_file);
++    }
+ 
+   if (functions.length ())
+     {
+       retval = TODO_remove_functions;
+       create_new_functions ();
++      if (current_mode == STRUCT_REORDER_FIELDS)
++	{
++	  prune_escaped_types ();
++	}
++    }
++
++  if (current_mode == STRUCT_REORDER_FIELDS)
++    {
++      for (unsigned i = 0; i < functions.length (); i++)
++	{
++	  srfunction *f = functions[i];
++	  cgraph_node *node = f->node;
++	  push_cfun (DECL_STRUCT_FUNCTION (node->decl));
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "==== Before create decls: %dth_%s ====\n\n",
++		       i, f->node->name ());
++	      dump_function_to_file (current_function_decl, dump_file,
++				     dump_flags | TDF_VOPS);
++	    }
++	  pop_cfun ();
++	}
+     }
+ 
+   create_new_decls ();
+@@ -4658,8 +6401,12 @@ ipa_struct_reorg::rewrite_functions (voi
+ 
+       if (dump_file && (dump_flags & TDF_DETAILS))
+ 	{
+-	  fprintf (dump_file, "\nBefore rewrite:\n");
+-          dump_function_to_file (current_function_decl, dump_file, dump_flags | TDF_VOPS);
++	  fprintf (dump_file, "\nBefore rewrite: %dth_%s\n",
++		   i, f->node->name ());
++	  dump_function_to_file (current_function_decl, dump_file,
++				 dump_flags | TDF_VOPS);
++	  fprintf (dump_file, "\n======== Start to rewrite: %dth_%s ========\n",
++		   i, f->node->name ());
+ 	}
+       FOR_EACH_BB_FN (bb, cfun)
+ 	{
+@@ -4727,10 +6474,12 @@ ipa_struct_reorg::rewrite_functions (voi
+ 
+       free_dominance_info (CDI_DOMINATORS);
+ 
+-      if (dump_file && (dump_flags & TDF_DETAILS))
++      if (dump_file)
+ 	{
+-	  fprintf (dump_file, "\nAfter rewrite:\n");
+-          dump_function_to_file (current_function_decl, dump_file, dump_flags | TDF_VOPS);
++	  fprintf (dump_file, "\nAfter rewrite: %dth_%s\n",
++		   i, f->node->name ());
++	  dump_function_to_file (current_function_decl, dump_file,
++				 dump_flags | TDF_VOPS);
+ 	}
+ 
+       pop_cfun ();
+@@ -4784,9 +6533,9 @@ ipa_struct_reorg::execute (enum srmode m
+ {
+   unsigned int ret = 0;
+ 
+-  if (mode == NORMAL)
++  if (mode == NORMAL || mode == STRUCT_REORDER_FIELDS)
+     {
+-      current_mode = NORMAL;
++      current_mode = mode;
+       /* If there is a top-level inline-asm,
+ 	 the pass immediately returns.  */
+       if (symtab->first_asm_symbol ())
+@@ -4795,7 +6544,10 @@ ipa_struct_reorg::execute (enum srmode m
+ 	}
+       record_accesses ();
+       prune_escaped_types ();
+-      analyze_types ();
++      if (current_mode == NORMAL)
++	{
++	  analyze_types ();
++	}
+ 
+       ret = rewrite_functions ();
+     }
+@@ -4864,6 +6616,47 @@ pass_ipa_struct_reorg::gate (function *)
+ 	  && flag_lto_partition == LTO_PARTITION_ONE);
+ }
+ 
++const pass_data pass_data_ipa_reorder_fields =
++{
++  SIMPLE_IPA_PASS, // type
++  "reorder_fields", // name
++  OPTGROUP_NONE, // optinfo_flags
++  TV_IPA_REORDER_FIELDS, // tv_id
++  0, // properties_required
++  0, // properties_provided
++  0, // properties_destroyed
++  0, // todo_flags_start
++  0, // todo_flags_finish
++};
++
++class pass_ipa_reorder_fields : public simple_ipa_opt_pass
++{
++public:
++  pass_ipa_reorder_fields (gcc::context *ctxt)
++    : simple_ipa_opt_pass (pass_data_ipa_reorder_fields, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *);
++  virtual unsigned int execute (function *)
++  {
++    unsigned int ret = 0;
++    ret = ipa_struct_reorg ().execute (STRUCT_REORDER_FIELDS);
++    return ret;
++  }
++
++}; // class pass_ipa_reorder_fields
++
++bool
++pass_ipa_reorder_fields::gate (function *)
++{
++  return (optimize >= 3
++	  && flag_ipa_reorder_fields
++	  /* Don't bother doing anything if the program has errors.  */
++	  && !seen_error ()
++	  && flag_lto_partition == LTO_PARTITION_ONE);
++}
++
+ } // anon namespace
+ 
+ simple_ipa_opt_pass *
+@@ -4871,3 +6664,9 @@ make_pass_ipa_struct_reorg (gcc::context
+ {
+   return new pass_ipa_struct_reorg (ctxt);
+ }
++
++simple_ipa_opt_pass *
++make_pass_ipa_reorder_fields (gcc::context *ctxt)
++{
++  return new pass_ipa_reorder_fields (ctxt);
++}
+\ No newline at end of file
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h	2020-12-29 16:27:25.096000000 +0800
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h	2021-02-09 10:51:15.272000000 +0800
+@@ -68,12 +68,14 @@ struct srfunction
+   auto_vec<srdecl*> args;
+   auto_vec<srdecl*> globals;
+   auto_vec_del<srdecl> decls;
+-  srdecl *record_decl (srtype *, tree, int arg);
++  srdecl *record_decl (srtype *, tree, int arg, tree orig_type = NULL);
+ 
+   srfunction *old;
+   cgraph_node *newnode;
+   srfunction *newf;
+ 
++  bool is_safe_func;
++
+   // Constructors
+   srfunction (cgraph_node *n);
+ 
+@@ -183,6 +185,11 @@ struct srfield
+   void create_new_fields (tree newtype[max_split],
+ 			  tree newfields[max_split],
+ 			  tree newlast[max_split]);
++  void reorder_fields (tree newfields[max_split], tree newlast[max_split],
++		       tree &field);
++  void create_new_reorder_fields (tree newtype[max_split],
++				  tree newfields[max_split],
++				  tree newlast[max_split]);
+ };
+ 
+ struct sraccess
+@@ -219,8 +226,11 @@ struct srdecl
+ 
+   tree newdecl[max_split];
+ 
++  /* Auxiliary record complete original type information of the void* type.  */
++  tree orig_type;
++
+   // Constructors
+-  srdecl (srtype *type, tree decl, int argumentnum = -1);
++  srdecl (srtype *type, tree decl, int argumentnum = -1, tree orgtype = NULL);
+ 
+   // Methods
+   void dump (FILE *file);
+diff -Nurp a/gcc/passes.def b/gcc/passes.def
+--- a/gcc/passes.def	2020-12-29 16:27:25.504000000 +0800
++++ b/gcc/passes.def	2021-01-06 09:59:52.580000000 +0800
+@@ -169,6 +169,7 @@ along with GCC; see the file COPYING3.
+   INSERT_PASSES_AFTER (all_late_ipa_passes)
+   NEXT_PASS (pass_materialize_all_clones);
+   NEXT_PASS (pass_ipa_pta);
++  NEXT_PASS (pass_ipa_reorder_fields);
+   /* FIXME: this should a normal IP pass */
+   NEXT_PASS (pass_ipa_struct_reorg);
+   NEXT_PASS (pass_omp_simd_clone);
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_create_fields_bug.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,82 @@
++// bugfix: 
++// Common members do not need to reconstruct. 
++// Otherwise, eg:int* -> int** and void* -> void**.
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <assert.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t* cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t** org_cost;
++};
++
++struct a
++{
++  int t;
++  int t1;
++};
++
++__attribute__((noinline)) int
++f(int i, int j)
++{
++  struct a *t = NULL;
++  struct a t1 = {i, j};
++  t = &t1;
++  auto int g(void) __attribute__((noinline));
++  int g(void)
++    {
++      return t->t + t->t1;
++    }
++  return g();
++}
++
++arc_t **ap = NULL;
++const int MAX = 100;
++
++int
++main()
++{
++  if (f(1, 2) != 3)
++    {
++      abort ();
++    }
++  ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
++  (*ap)[0].id = 300;
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_create_new_func_bug.c	2021-02-25 10:21:28.888000000 +0800
+@@ -0,0 +1,56 @@
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++#define MallocOrDie(x)     sre_malloc((x))
++
++struct gki_elem {
++  char            *key;
++  int              idx;
++  struct gki_elem *nxt;
++};
++
++typedef struct {
++  struct gki_elem **table;
++
++  int primelevel;
++  int nhash;
++  int nkeys;
++} GKI;
++
++void
++Die(char *format, ...)
++{
++  exit(1);
++}
++
++void *
++sre_malloc(size_t size)
++{
++  void *ptr;
++
++  if ((ptr = malloc (size)) == NULL)
++    {
++      Die("malloc of %ld bytes failed", size);
++    }
++  return ptr;
++}
++
++
++__attribute__((noinline)) int
++GKIStoreKey(GKI *hash, char *key)
++{
++  hash->table[0] = MallocOrDie(sizeof(struct gki_elem));
++}
++
++int
++main ()
++{
++  GKI *hash;
++  char *key;
++  GKIStoreKey(hash, key);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_struct_instance_field.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,75 @@
++// escape_instance_field, "Type escapes via a field of instance".
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++typedef struct network
++{
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++  node_t node;
++} network_t;
++
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++  node_t node;
++};
++
++
++const int MAX = 100;
++
++/* let it escape_array, "Type is used in an array [not handled yet]".  */
++network_t* net[2];
++
++int
++main ()
++{
++  net[0] = (network_t*) calloc (1, sizeof(network_t));
++  net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++
++  /* Contains an escape type and has structure instance field.  */
++  net[0]->arcs->node = net[0]->node;
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_DTE_verify.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,94 @@
++// Verify in escape_dependent_type_escapes,
++// the multi-layer dereference is rewriting correctly,and the memory access
++// is correct.
++
++// release
++// escape_dependent_type_escapes,
++// "Type uses a type which escapes or is used by a type which escapes"
++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer"
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++  network_t* net_add;
++};
++
++
++const int MAX = 100;
++
++/* let it escape_array, "Type is used in an array [not handled yet]".  */
++network_t* net[2];
++arc_p stop_arcs = NULL;
++
++int
++main ()
++{
++  net[0] = (network_t*) calloc (1, sizeof(network_t));
++  net[0]->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++
++  net[0]->arcs->id = 100;
++
++  for (unsigned i = 0; i < 3; i++)
++    {        
++      net[0]->arcs->id = net[0]->arcs->id + 2;
++      stop_arcs->cost = net[0]->arcs->id / 2;
++      stop_arcs->net_add = net[0];
++      printf("stop_arcs->cost = %ld\n", stop_arcs->cost);
++      net[0]->arcs++;
++      stop_arcs++;
++    }
++
++  if( net[1] != 0 && stop_arcs != 0)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_ele_minus_verify.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,60 @@
++// verify newarc[cmp-1].flow
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[0].id);
++  for (int i = 1; i < MAX; i++)
++    {
++      ap[i-1].id = 500;
++    }
++  printf("%d\n", ap[0].id);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_escape_by_base.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,83 @@
++// release type is used by a type which escapes.
++// avoid escape_cast_another_ptr, "Type escapes a cast to a different pointer"
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++network_t* net = NULL;
++arc_p stop_arcs = NULL;
++int cnt = 0;
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, 20);
++  net->arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
++  if(!(net->arcs))
++    {
++      return -1;
++    }
++
++  for( int i = 0; i < MAX; i++, net->arcs = stop_arcs)
++    {
++      cnt++;
++    }
++
++  net = (network_t*) calloc (1, 20); 
++  if( !(net->arcs) )
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_int_cast_ptr.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,72 @@
++// release escape_cast_another_ptr, "Type escapes a cast to a different pointer"
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++typedef int cmp_t(const void *, const void *);
++
++__attribute__((noinline)) void
++spec_qsort(void *a, cmp_t *cmp)
++{
++  char *pb = NULL;
++  while (cmp(pb, a))
++    {
++      pb += 1;
++    }
++}
++
++static int arc_compare( arc_t **a1, int a2 )
++{
++  if( (*a1)->id < a2 )
++    {
++      return -1;
++    }
++  return 1;
++}
++
++int
++main()
++{
++  spec_qsort(NULL, (int (*)(const void *, const void *))arc_compare);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_mem_ref_offset.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,58 @@
++/* Supports the MEM_REF offset.
++   _1 = MEM[(struct arc *)ap_4 + 72B].flow;
++   Old rewrite：_1 = ap.reorder.0_8->flow;
++   New rewrite：_1 = MEM[(struct arc.reorder.0 *)ap.reorder.0_8 + 64B].flow.  */
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  const int MAX = 100;
++  /* A similar scenario can be reproduced only by using local variables.  */
++  arc_p ap = NULL;
++  ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_pass_conflict.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,110 @@
++// 针对 
++/*
++Compile options: /home/hxq/hcc_gcc9.3.0_org_debug/bin/gcc -O3 -g 
++-flto -flto-partition=one -fipa-reorder-fields -fipa-struct-reorg 
++-v -save-temps -fdump-ipa-all-details test.c -o  test
++
++in COMPLETE_STRUCT_RELAYOUT pass：
++N type: struct node.reorder.0 new = "Type escapes a cast to a different pointer"
++copy$head_26 = test_arc.reorder.0_49->head;
++
++type : struct arc.reorder.0(1599) { 
++fields = {
++field (5382) {type = cost_t}
++field (5383) {type = struct node.reorder.0 *} // but node has escaped.
++field (5384) {type = struct node.reorder.0 *}
++field (5386) {type = struct arc.reorder.0 *}
++field (5387) {type = struct arc.reorder.0 *}
++field (5388) {type = flow_t}
++field (5389) {type = cost_t}
++field (5381) {type = int}
++field (5385) {type = short int}
++}
++
++// The types of the two types are inconsistent after the rewriting.
++newarc_2(D)->tail = tail_1(D);
++vs
++struct_reorder.0_61(D)->tail = tail_1(D); 
++*/
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++__attribute__((noinline)) void
++replace_weaker_arc( arc_t *newarc, node_t *tail, node_t *head)
++{
++    printf("test");
++}
++
++__attribute__((noinline)) int64_t
++switch_arcs(arc_t** deleted_arcs, arc_t* arcnew)
++{
++  int64_t count = 0;
++  arc_t *test_arc, copy;
++
++  if (!test_arc->ident)
++    {
++      copy = *test_arc;
++      count++;
++      *test_arc = arcnew[0];
++      replace_weaker_arc(arcnew, copy.tail, copy.head);
++    }
++  return count;
++}
++
++
++int
++main ()
++{
++  switch_arcs(NULL, NULL);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr2void_lto.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,87 @@
++// escape_cast_void, "Type escapes a cast to/from void*"
++// stop_393 = net.stop_nodes; void *stop;
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs, sorted_arcs;
++  int x;
++  node_p nodes, stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++network_t* net = NULL;
++int cnt = 0;
++
++__attribute__((noinline)) int
++primal_feasible (network_t *net)
++{
++  void* stop;
++  node_t *node;
++
++  node = net->nodes;
++  stop = (void *)net->stop_nodes;
++  for( node++; node < (node_t *)stop; node++ )
++    {
++      printf( "PRIMAL NETWORK SIMPLEX: " );
++    }
++  return 0;
++}
++
++int
++main ()
++{
++  net = (network_t*) calloc (1, 20);
++  net->nodes = calloc (MAX, sizeof (node_t));
++  net->stop_nodes = calloc (MAX, sizeof (node_t));
++  cnt = primal_feasible( net ); 
++    
++  net = (network_t*) calloc (1, 20); 
++  if( !(net->arcs) )
++    {
++      return -1;
++    }
++  return cnt;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_diff.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,71 @@
++// support POINTER_DIFF_EXPR & NOP_EXPR to avoid 
++// escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt"
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct network
++{    
++  arc_p arcs;
++  arc_p sorted_arcs;
++  int x;
++  node_p nodes;
++  node_p stop_nodes;
++} network_t;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  arc_t *old_arcs;
++  node_t *node;
++  node_t *stop;
++  size_t off;
++  network_t* net;
++
++  for( ; node->number < stop->number; node++ )
++    {
++      off = node->basic_arc - old_arcs;
++      node->basic_arc = (arc_t *)(net->arcs + off);
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_negate_expr.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,55 @@
++// support NEGATE_EXPR rewriting
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main ()
++{
++  int64_t susp = 0;
++  const int MAX = 100;
++  arc_p ap = (arc_p) calloc(MAX, sizeof(arc_t));
++  ap -= susp;
++  printf("%d\n", ap[1].flow);
++  return 0; 
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr.c	2021-02-09 10:51:15.272000000 +0800
+@@ -0,0 +1,55 @@
++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]";
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_t **ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_t**) malloc(MAX * sizeof(arc_t*));
++  (*ap)[0].id = 300;
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_ptr_ptr_ptr.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,58 @@
++// release escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]"
++
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p **ap;
++
++
++int
++main ()
++{
++  ap = (arc_p**) calloc(MAX, sizeof(arc_p*));
++  (**ap)[0].id = 500;
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_rescusive_type.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,57 @@
++// release escape_rescusive_type, "Recusive type"
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++arc_p ap = NULL;
++
++int
++main ()
++{
++  ap = (arc_p) calloc (MAX, sizeof (arc_t));
++  ap[0].id = 100;
++  ap[0].head = (node_p) calloc (MAX, sizeof (node_t));
++    
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_assign_more_cmp.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,65 @@
++// support more gimple assign rhs code
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++__attribute__((noinline)) int
++compare(arc_p p1, arc_p p2)
++{
++  return p1 < p2;
++}
++
++int n = 0;
++int m = 0;
++
++int
++main ()
++{
++  scanf ("%d %d", &n, &m);
++  arc_p p = calloc (10, sizeof (struct arc));
++  if (compare (&p[n], &p[m]))
++    {
++      printf ("ss!");
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_bug.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,72 @@
++// rewrite_cond bugfix；
++/*
++if (iterator_600 != 0B)
++old rewrite: _1369 = iterator.reorder.0_1249 != 0B; if (_1369 != 1)
++new rewrite: if (iterator.reorder.0_1249 != 0B)
++*/
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct list_elem
++{
++  arc_t* arc;
++  struct list_elem* next;
++}list_elem;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int i = 0;
++
++int
++main ()
++{
++  register list_elem *first_list_elem;
++  register list_elem* iterator;
++  iterator = first_list_elem->next;
++  while (iterator)
++    {
++      iterator = iterator->next;
++      i++;
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_cond_more_cmp.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,58 @@
++// support if (_150 >= _154)
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++int
++main()
++{
++  arc_p **ap = (arc_p**) malloc(1 * sizeof(arc_p*));
++  arc_p **arcs_pointer_sorted = (arc_p**) malloc(1 * sizeof(arc_p*));
++  arcs_pointer_sorted[0] = (arc_p*) calloc (1, sizeof(arc_p));
++
++  if (arcs_pointer_sorted >= ap)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_rewrite_phi_bug.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,81 @@
++/* 
++Exclude the rewriting error caused by 
++first_list_elem = (list_elem *)NULL;
++rewriting PHI:first_list_elem_700 = PHI <0B(144), 0B(146)>
++into:
++first_list_elem.reorder.0_55 = PHI <(144), (146)>
++*/
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++typedef struct list_elem
++{
++  arc_t* arc;
++  struct list_elem* next;
++}list_elem;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout, firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail, head;
++  short ident;
++  arc_p nextout, nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++const int MAX = 100;
++
++list_elem* new_list_elem;
++list_elem* first_list_elem;
++
++int
++main ()
++{
++  int i = 0;
++  list_elem *first_list_elem;
++  list_elem *new_list_elem;
++  arc_t *arcout;
++  for( ; i < MAX && arcout->ident == -1; i++);
++
++  first_list_elem = (list_elem *)NULL;
++  for( ; i < MAX; i++)
++    {
++      new_list_elem = (list_elem*) calloc(1, sizeof(list_elem));
++      new_list_elem->next = first_list_elem;
++      first_list_elem = new_list_elem;
++    }
++  if (first_list_elem != 0)
++    {
++      return -1;
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 3" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_visible_func.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_visible_func.c	2021-02-09 10:51:15.276000000 +0800
+@@ -0,0 +1,92 @@
++// release escape_visible_function, "Type escapes via expternally visible function call"
++// compile options: gcc -O3 -fno-inline -fwhole-program 
++// -flto-partition=one -fipa-struct-reorg arc_compare.c -fdump-ipa-all -S -v
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node *node_p;
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct node
++{
++  int64_t potential;
++  int orientation;
++  node_p child;
++  node_p pred;
++  node_p sibling;
++  node_p sibling_prev;
++  arc_p basic_arc;
++  arc_p firstout;
++  arc_p firstin;
++  arc_p arc_tmp;
++  int64_t flow;
++  int64_t depth;
++  int number;
++  int time;
++};
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  node_p tail;
++  node_p head;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++__attribute__((noinline)) static int
++arc_compare( arc_t **a1, arc_t **a2 )
++{
++  if( (*a1)->flow > (*a2)->flow )
++    {
++      return 1;
++    }
++  if( (*a1)->flow < (*a2)->flow )
++    {
++      return -1;
++    }
++  if( (*a1)->id < (*a2)->id )
++    {
++      return -1;
++    }
++
++    return 1;
++}
++
++__attribute__((noinline)) void
++spec_qsort(void *array, int nitems, int size,
++	   int (*cmp)(const void*,const void*))
++{
++  for (int i = 0; i < nitems - 1; i++)
++  {
++    if (cmp (array , array))
++      {
++        printf ("CMP 1\n");
++      }
++    else
++      {
++        printf ("CMP 2\n");
++      }
++  }
++}
++
++typedef int cmp_t(const void *, const void *);
++
++int
++main ()
++{
++  void *p = calloc (100, sizeof (arc_t **));
++  spec_qsort (p, 100, 0, (int (*)(const void *, const void *))arc_compare);
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c
+--- a/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/rf_void_ptr_param_func.c	2021-03-03 16:58:00.296000000 +0800
+@@ -0,0 +1,54 @@
++// Add a safe func mechanism.
++// avoid escape_unkown_field, "Type escapes via an unkown field accessed"
++// avoid escape_cast_void, "Type escapes a cast to/from void*" eg: GIMPLE_NOP
++/* { dg-do compile } */
++
++#include <stdio.h>
++#include <stdlib.h>
++
++typedef struct arc arc_t;
++typedef struct arc *arc_p;
++
++struct arc
++{
++  int id;
++  int64_t cost;
++  short ident;
++  arc_p nextout;
++  arc_p nextin;
++  int64_t flow;
++  int64_t org_cost;
++};
++
++void
++__attribute__((noinline)) spec_qsort (void *a, size_t es) 
++{
++  char *pa;
++  char *pb;
++  int cmp_result;
++
++  while ((*(arc_t **)a)->id < *((int *)a))
++    { 
++      if (cmp_result == 0)
++	{
++	  spec_qsort (a, es);
++	  pa = (char *)a - es;
++	  a += es;
++	  *(long *)pb = *(long *)pa;
++	}
++      else
++	{
++	  a -= pa - pb;
++	}
++    }  
++}
++
++int
++main()
++{
++  arc_p **arcs_pointer_sorted;
++  spec_qsort (arcs_pointer_sorted[0], sizeof (arc_p));
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "reorder_fields" } } */
+\ No newline at end of file
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp	2020-12-29 16:27:25.052000000 +0800
++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp	2021-02-07 21:27:02.676000000 +0800
+@@ -27,8 +27,25 @@ set STRUCT_REORG_TORTURE_OPTIONS [list \
+ 
+ set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}}
+ 
+-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
++# -fipa-struct-reorg
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] \
+ 	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/struct_reorg*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/sr_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/csr_*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/complete_struct_relayout.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
++
++# -fipa-reorder-fields
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/rf*.c]] \
++	"" "-fipa-reorder-fields -fdump-ipa-all -flto-partition=one -fwhole-program"
+ 
+ # All done.
+ torture-finish
+diff -Nurp a/gcc/timevar.def b/gcc/timevar.def
+--- a/gcc/timevar.def	2020-12-29 16:27:25.044000000 +0800
++++ b/gcc/timevar.def	2021-01-06 09:59:52.580000000 +0800
+@@ -77,6 +77,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP     , "
+ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining heuristics")
+ DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS	     , "ipa comdats")
++DEFTIMEVAR (TV_IPA_REORDER_FIELDS    , "ipa struct reorder fields optimization")
+ DEFTIMEVAR (TV_IPA_STRUCT_REORG	     , "ipa struct reorg optimization")
+ DEFTIMEVAR (TV_IPA_OPT		     , "ipa various optimizations")
+ DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS    , "lto stream inflate")
+diff -Nurp a/gcc/tree-pass.h b/gcc/tree-pass.h
+--- a/gcc/tree-pass.h	2020-12-29 16:27:25.780000000 +0800
++++ b/gcc/tree-pass.h	2021-01-06 09:59:52.580000000 +0800
+@@ -504,6 +504,7 @@ extern ipa_opt_pass_d *make_pass_ipa_dev
+ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
++extern simple_ipa_opt_pass *make_pass_ipa_reorder_fields (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
diff --git a/tree-optimization-92328-fix-value-number-with-bogus-type.patch b/tree-optimization-92328-fix-value-number-with-bogus-type.patch
new file mode 100644
index 0000000000000000000000000000000000000000..a74f0951f2d52bbc374d2da497af82c38affa466
--- /dev/null
+++ b/tree-optimization-92328-fix-value-number-with-bogus-type.patch
@@ -0,0 +1,120 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-backport-tree-optimization-92328-fix-value-number-with-bogus-type.patch
+01e9f1812c72c940172700971179d7726b7a3050
+
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92328.c b/gcc/testsuite/gcc.dg/torture/pr92328.c
+new file mode 100644
+index 0000000..7898b9e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92328.c
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-pre -Wno-div-by-zero" } */
++
++int nt;
++
++void
++ja (int os)
++{
++  int *ku = &os, *id = &os;
++  unsigned int qr = 0;
++
++  for (;;)
++    {
++      if (os == *ku)
++        {
++          *id = 0;
++          qr += os != *ku;
++          id = &qr;
++        }
++
++      *id &= qr;
++
++      if (os != 0)
++        {
++          nt /= 0;
++          ku = &qr;
++        }
++    }
++}
+diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+index c033b19..e3b1973 100644
+--- a/gcc/tree-ssa-sccvn.c
++++ b/gcc/tree-ssa-sccvn.c
+@@ -2995,24 +2995,30 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *data_,
+ 		  || known_eq (ref->size, TYPE_PRECISION (vr->type)))
+ 	      && multiple_p (ref->size, BITS_PER_UNIT))
+ 	    {
+-	      if (known_eq (ref->size, size2))
+-		return vn_reference_lookup_or_insert_for_pieces
+-		    (vuse, get_alias_set (lhs), vr->type, vr->operands,
+-		     SSA_VAL (def_rhs));
+-	      else if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
+-		       || type_has_mode_precision_p (TREE_TYPE (def_rhs)))
++	      tree val = NULL_TREE;
++	      if (! INTEGRAL_TYPE_P (TREE_TYPE (def_rhs))
++		  || type_has_mode_precision_p (TREE_TYPE (def_rhs)))
+ 		{
+ 		  gimple_match_op op (gimple_match_cond::UNCOND,
+ 				      BIT_FIELD_REF, vr->type,
+ 				      SSA_VAL (def_rhs),
+ 				      bitsize_int (ref->size),
+ 				      bitsize_int (offset - offset2));
+-		  tree val = vn_nary_build_or_lookup (&op);
+-		  if (val
+-		      && (TREE_CODE (val) != SSA_NAME
+-			  || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
+-		    return data->finish (get_alias_set (lhs), val);
++		  val = vn_nary_build_or_lookup (&op);
+ 		}
++	      else if (known_eq (ref->size, size2))
++		{
++		  gimple_match_op op (gimple_match_cond::UNCOND,
++				      VIEW_CONVERT_EXPR, vr->type,
++				      SSA_VAL (def_rhs));
++		  val = vn_nary_build_or_lookup (&op);
++		}
++	      if (val
++		  && (TREE_CODE (val) != SSA_NAME
++		      || ! SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)))
++		return vn_reference_lookup_or_insert_for_pieces
++			    (vuse, get_alias_set (lhs), vr->type,
++			     vr->operands, val);
+ 	    }
+ 	  else if (maxsize.is_constant (&maxsizei)
+ 		   && offset.is_constant (&offseti)
+@@ -5956,7 +5962,6 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi)
+       && (TREE_CODE (gimple_assign_rhs1 (stmt)) == SSA_NAME
+ 	  || is_gimple_min_invariant (gimple_assign_rhs1 (stmt))))
+     {
+-      tree val;
+       tree rhs = gimple_assign_rhs1 (stmt);
+       vn_reference_t vnresult;
+       /* ???  gcc.dg/torture/pr91445.c shows that we lookup a boolean
+@@ -5997,14 +6002,22 @@ eliminate_dom_walker::eliminate_stmt (basic_block b, gimple_stmt_iterator *gsi)
+ 	  else
+ 	    lookup_lhs = NULL_TREE;
+ 	}
+-      val = NULL_TREE;
++      tree val = NULL_TREE;
+       if (lookup_lhs)
+ 	val = vn_reference_lookup (lookup_lhs, gimple_vuse (stmt),
+ 				   VN_WALKREWRITE, &vnresult, false);
+       if (TREE_CODE (rhs) == SSA_NAME)
+ 	rhs = VN_INFO (rhs)->valnum;
+       if (val
+-	  && operand_equal_p (val, rhs, 0))
++	  && (operand_equal_p (val, rhs, 0)
++	      /* Due to the bitfield lookups above we can get bit
++		 interpretations of the same RHS as values here.  Those
++		 are redundant as well.  */
++	      || (TREE_CODE (val) == SSA_NAME
++		  && gimple_assign_single_p (SSA_NAME_DEF_STMT (val))
++		  && (val = gimple_assign_rhs1 (SSA_NAME_DEF_STMT (val)))
++		  && TREE_CODE (val) == VIEW_CONVERT_EXPR
++		  && TREE_OPERAND (val, 0) == rhs)))
+ 	{
+ 	  /* We can only remove the later store if the former aliases
+ 	     at least all accesses the later one does or if the store