From db8a90ff34c062bc0eb5337ee08833aebf8ef2c4 Mon Sep 17 00:00:00 2001
From: jdkboy <guoge1@huawei.com>
Date: Sat, 29 Aug 2020 09:39:46 +0800
Subject: [PATCH] Add several enhancement patches  - Add
 add-checks-to-avoid-spoiling-if-conversion.patch  - Add
 add-option-fallow-store-data-races.patch  - Add complete-struct-reorg.patch 
 - Add cse-in-vectorization.patch  - Add enable-simd-math.patch  - Add
 fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch  - Add
 fix-ICE-in-compute_live_loop_exits.patch  - Add
 fix-ICE-in-copy_reference_ops_from_ref.patch  - Add
 fix-ICE-in-declare-return-variable.patch  - Add fix-ICE-in-exact_div.patch  -
 Add fix-ICE-in-gimple_op.patch  - Add
 fix-ICE-in-model_update_limit_points_in_group.patch  - Add
 fix-ICE-in-reload.patch  - Add fix-ICE-in-store_constructor.patch  - Add
 fix-ICE-in-vec.patch  - Add fix-ICE-in-vect_create_epilog_for_reduction.patch
  - Add fix-ICE-in-vect_create_epilog_for_reduction_2.patch  - Add
 fix-ICE-in-vect_create_epilog_for_reduction_3.patch  - Add
 fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch  - Add
 fix-ICE-in-vect_slp_analyze_node_operations.patch  - Add
 fix-ICE-in-vect_stmt_to_vectorize.patch  - Add
 fix-ICE-in-vect_transform_stmt.patch  - Add
 fix-ICE-in-vectorizable_condition.patch  - Add fix-ICE-in-verify_ssa.patch  -
 Add fix-ICE-statement-uses-released-SSA-name.patch  - Add
 fix-ICE-when-vectorizing-nested-cycles.patch  - Add
 fix-SSA-update-for-vectorizer-epilogue.patch  - Add fix-do-not-build-op.patch
  - Add fix-load-eliding-in-SM.patch  - Add fix-wrong-vectorizer-code.patch  -
 Add generate-csel-for-arrayref.patch  - Add
 ipa-const-prop-self-recursion-bugfix.patch  - Add ipa-const-prop.patch  - Add
 ipa-struct-reorg-bugfix.patch  - Add ipa-struct-reorg.patch  - Add
 medium-code-mode.patch  - Add reduction-chain-slp-option.patch  - Add
 reductions-slp-enhancement.patch  - Add simplify-removing-subregs.patch  -
 Add tighten-range-for-generating-csel.patch  - Add
 vectorization-enhancement.patch  - Add
 add-checks-to-avoid-spoiling-if-conversion.patch  - Add
 add-option-fallow-store-data-races.patch  - Add complete-struct-reorg.patch 
 - Add cse-in-vectorization.patch  - Add enable-simd-math.patch  - Add
 fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch  - Add
 fix-ICE-in-compute_live_loop_exits.patch  - Add
 fix-ICE-in-copy_reference_ops_from_ref.patch  - Add
 fix-ICE-in-declare-return-variable.patch  - Add fix-ICE-in-exact_div.patch  -
 Add fix-ICE-in-gimple_op.patch  - Add
 fix-ICE-in-model_update_limit_points_in_group.patch  - Add
 fix-ICE-in-reload.patch  - Add fix-ICE-in-store_constructor.patch  - Add
 fix-ICE-in-vec.patch  - Add fix-ICE-in-vect_create_epilog_for_reduction.patch
  - Add fix-ICE-in-vect_create_epilog_for_reduction_2.patch  - Add
 fix-ICE-in-vect_create_epilog_for_reduction_3.patch  - Add
 fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch  - Add
 fix-ICE-in-vect_slp_analyze_node_operations.patch  - Add
 fix-ICE-in-vect_stmt_to_vectorize.patch  - Add
 fix-ICE-in-vect_transform_stmt.patch  - Add
 fix-ICE-in-vectorizable_condition.patch  - Add fix-ICE-in-verify_ssa.patch  -
 Add fix-ICE-statement-uses-released-SSA-name.patch  - Add
 fix-ICE-when-vectorizing-nested-cycles.patch  - Add
 fix-SSA-update-for-vectorizer-epilogue.patch  - Add fix-do-not-build-op.patch
  - Add fix-load-eliding-in-SM.patch  - Add fix-wrong-vectorizer-code.patch  -
 Add generate-csel-for-arrayref.patch  - Add
 ipa-const-prop-self-recursion-bugfix.patch  - Add ipa-const-prop.patch  - Add
 ipa-struct-reorg-bugfix.patch  - Add ipa-struct-reorg.patch  - Add
 medium-code-mode.patch  - Add reduction-chain-slp-option.patch  - Add
 reductions-slp-enhancement.patch  - Add simplify-removing-subregs.patch  -
 Add tighten-range-for-generating-csel.patch  - Add
 vectorization-enhancement.patch

---
 ...ecks-to-avoid-spoiling-if-conversion.patch |    86 +
 add-option-fallow-store-data-races.patch      |   298 +
 ...calculation-optimization-within-loop.patch |    10 +-
 change-gcc-BASE-VER.patch                     |    28 +-
 complete-struct-reorg.patch                   |  1814 ++
 cse-in-vectorization.patch                    |    68 +
 delete-incorrect-smw.patch                    |     6 +
 enable-simd-math.patch                        |    34 +
 ...d-issueing-loads-in-SM-when-possible.patch |   123 +
 fix-ICE-during-pass-ccp.patch                 |     6 +
 fix-ICE-in-compute_live_loop_exits.patch      |    76 +
 fix-ICE-in-copy_reference_ops_from_ref.patch  |    70 +
 fix-ICE-in-declare-return-variable.patch      |    31 +
 fix-ICE-in-exact_div.patch                    |    54 +
 fix-ICE-in-gimple_op.patch                    |    65 +
 ...n-model_update_limit_points_in_group.patch |   248 +
 fix-ICE-in-reload.patch                       |   369 +
 fix-ICE-in-store_constructor.patch            |   356 +
 fix-ICE-in-vec.patch                          |    93 +
 ...-in-vect_create_epilog_for_reduction.patch |    81 +
 ...n-vect_create_epilog_for_reduction_2.patch |    33 +
 ...n-vect_create_epilog_for_reduction_3.patch |    87 +
 ...CE-in-vect_get_vec_def_for_stmt_copy.patch |    54 +
 ...-in-vect_slp_analyze_node_operations.patch |   381 +
 fix-ICE-in-vect_stmt_to_vectorize.patch       |    41 +
 fix-ICE-in-vect_transform_stmt.patch          |    96 +
 fix-ICE-in-vectorizable-load.patch            |     6 +
 fix-ICE-in-vectorizable_condition.patch       |    18 +
 fix-ICE-in-verify_ssa.patch                   |    41 +
 ...ICE-statement-uses-released-SSA-name.patch |   109 +
 fix-ICE-when-vectorizing-nested-cycles.patch  |   145 +
 fix-SSA-update-for-vectorizer-epilogue.patch  |    47 +
 fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch  |     6 +
 fix-cost-of-plus.patch                        |     3 +
 fix-do-not-build-op.patch                     |    27 +
 fix-load-eliding-in-SM.patch                  |    55 +
 fix-regno-out-of-range.patch                  |     6 +
 fix-wrong-vectorizer-code.patch               |    71 +
 gcc.spec                                      |   170 +-
 generate-csel-for-arrayref.patch              |   218 +
 generate-csel.patch                           |     6 +
 ipa-const-prop-self-recursion-bugfix.patch    |   191 +
 ipa-const-prop.patch                          | 11040 +++++++++
 ipa-struct-reorg-bugfix.patch                 |   613 +
 ipa-struct-reorg.patch                        |  5846 +++++
 ivopts-1.patch                                |     3 +
 ivopts-2.patch                                |     3 +
 loop-finite-bugfix.patch                      |     6 +
 loop-finite.patch                             |     6 +
 loop-split.patch                              |     6 +
 medium-code-mode.patch                        |   426 +
 reduction-chain-slp-option.patch              |    52 +
 reductions-slp-enhancement.patch              |    59 +
 remove-array-index-inliner-hint.patch         |     6 +
 simplify-removing-subregs.patch               |   117 +
 ...ug-insns-when-computing-inline-costs.patch |     6 +
 tighten-range-for-generating-csel.patch       |   132 +
 vectorization-enhancement.patch               | 20239 ++++++++++++++++
 58 files changed, 44250 insertions(+), 37 deletions(-)
 create mode 100644 add-checks-to-avoid-spoiling-if-conversion.patch
 create mode 100644 add-option-fallow-store-data-races.patch
 create mode 100644 complete-struct-reorg.patch
 create mode 100644 cse-in-vectorization.patch
 create mode 100644 enable-simd-math.patch
 create mode 100644 fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch
 create mode 100644 fix-ICE-in-compute_live_loop_exits.patch
 create mode 100644 fix-ICE-in-copy_reference_ops_from_ref.patch
 create mode 100644 fix-ICE-in-declare-return-variable.patch
 create mode 100644 fix-ICE-in-exact_div.patch
 create mode 100644 fix-ICE-in-gimple_op.patch
 create mode 100644 fix-ICE-in-model_update_limit_points_in_group.patch
 create mode 100644 fix-ICE-in-reload.patch
 create mode 100644 fix-ICE-in-store_constructor.patch
 create mode 100644 fix-ICE-in-vec.patch
 create mode 100644 fix-ICE-in-vect_create_epilog_for_reduction.patch
 create mode 100644 fix-ICE-in-vect_create_epilog_for_reduction_2.patch
 create mode 100644 fix-ICE-in-vect_create_epilog_for_reduction_3.patch
 create mode 100644 fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch
 create mode 100644 fix-ICE-in-vect_slp_analyze_node_operations.patch
 create mode 100644 fix-ICE-in-vect_stmt_to_vectorize.patch
 create mode 100644 fix-ICE-in-vect_transform_stmt.patch
 create mode 100644 fix-ICE-in-vectorizable_condition.patch
 create mode 100644 fix-ICE-in-verify_ssa.patch
 create mode 100644 fix-ICE-statement-uses-released-SSA-name.patch
 create mode 100644 fix-ICE-when-vectorizing-nested-cycles.patch
 create mode 100644 fix-SSA-update-for-vectorizer-epilogue.patch
 create mode 100644 fix-do-not-build-op.patch
 create mode 100644 fix-load-eliding-in-SM.patch
 create mode 100644 fix-wrong-vectorizer-code.patch
 create mode 100644 generate-csel-for-arrayref.patch
 create mode 100644 ipa-const-prop-self-recursion-bugfix.patch
 create mode 100644 ipa-const-prop.patch
 create mode 100644 ipa-struct-reorg-bugfix.patch
 create mode 100644 ipa-struct-reorg.patch
 create mode 100644 medium-code-mode.patch
 create mode 100644 reduction-chain-slp-option.patch
 create mode 100644 reductions-slp-enhancement.patch
 create mode 100644 simplify-removing-subregs.patch
 create mode 100644 tighten-range-for-generating-csel.patch
 create mode 100644 vectorization-enhancement.patch

diff --git a/add-checks-to-avoid-spoiling-if-conversion.patch b/add-checks-to-avoid-spoiling-if-conversion.patch
new file mode 100644
index 0000000..34d7505
--- /dev/null
+++ b/add-checks-to-avoid-spoiling-if-conversion.patch
@@ -0,0 +1,86 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-optimization-95855-Add-checks-to-avoid-spoiling.patch
+33d114f570b4a3583421c700396fd5945acebc28
+
+diff -uprN a/gcc/gimple-ssa-split-paths.c b/gcc/gimple-ssa-split-paths.c
+--- a/gcc/gimple-ssa-split-paths.c
++++ b/gcc/gimple-ssa-split-paths.c
+@@ -34,6 +34,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "gimple-ssa.h"
+ #include "tree-phinodes.h"
+ #include "ssa-iterators.h"
++#include "fold-const.h"
+ 
+ /* Given LATCH, the latch block in a loop, see if the shape of the
+    path reaching LATCH is suitable for being split by duplication.
+@@ -254,6 +255,44 @@ is_feasible_trace (basic_block bb)
+ 	}
+     }
+ 
++  /* Canonicalize the form.  */
++  if (single_pred_p (pred1) && single_pred (pred1) == pred2
++      && num_stmts_in_pred1 == 0)
++    std::swap (pred1, pred2);
++
++  /* This is meant to catch another kind of cases that are likely opportunities
++     for if-conversion.  After canonicalizing, PRED2 must be an empty block and
++     PRED1 must be the only predecessor of PRED2.  Moreover, PRED1 is supposed
++     to end with a cond_stmt which has the same args with the PHI in BB.  */
++  if (single_pred_p (pred2) && single_pred (pred2) == pred1
++      && num_stmts_in_pred2 == 0)
++    {
++      gimple *cond_stmt = last_stmt (pred1);
++      if (cond_stmt && gimple_code (cond_stmt) == GIMPLE_COND)
++	{
++	  tree lhs = gimple_cond_lhs (cond_stmt);
++	  tree rhs = gimple_cond_rhs (cond_stmt);
++
++	  gimple_stmt_iterator gsi;
++	  for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
++	    {
++	      gimple *phi = gsi_stmt (gsi);
++	      if ((operand_equal_p (gimple_phi_arg_def (phi, 0), lhs)
++		   && operand_equal_p (gimple_phi_arg_def (phi, 1), rhs))
++		  || (operand_equal_p (gimple_phi_arg_def (phi, 0), rhs)
++		      && (operand_equal_p (gimple_phi_arg_def (phi, 1), lhs))))
++		{
++		  if (dump_file && (dump_flags & TDF_DETAILS))
++		    fprintf (dump_file,
++			     "Block %d appears to be optimized to a join "
++			     "point for if-convertable half-diamond.\n",
++			     bb->index);
++		  return false;
++		}
++	    }
++	}
++    }
++
+   /* If the joiner has no PHIs with useful uses there is zero chance
+      of CSE/DCE/jump-threading possibilities exposed by duplicating it.  */
+   bool found_useful_phi = false;
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
+new file mode 100644
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/split-path-12.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fsplit-paths -fdump-tree-split-paths-details " } */
++
++double
++foo(double *d1, double *d2, double *d3, int num, double *ip)
++{
++  double dmax[3];
++
++  for (int i = 0; i < num; i++) {
++    dmax[0] = d1[i] < dmax[0] ? dmax[0] : d1[i];
++    dmax[1] = d2[i] < dmax[1] ? dmax[1] : d2[i];
++    dmax[2] = d3[i] < dmax[2] ? dmax[2] : d3[i];
++    ip[i] = dmax[2];
++  }
++
++  return dmax[0] + dmax[1] + dmax[2];
++}
++
++/* { dg-final { scan-tree-dump "appears to be optimized to a join point for if-convertable half-diamond" "split-paths" } } */
diff --git a/add-option-fallow-store-data-races.patch b/add-option-fallow-store-data-races.patch
new file mode 100644
index 0000000..8ecb581
--- /dev/null
+++ b/add-option-fallow-store-data-races.patch
@@ -0,0 +1,298 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-middle-end-92046-Command-line-options-that-are.patch
+e622a32db78300821fc1327637ec6413febc2c66
+
+diff -uprN a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2020-05-28 16:12:58.815511599 +0800
++++ b/gcc/common.opt	2020-05-28 15:54:33.797511589 +0800
+@@ -993,6 +993,10 @@ Align the start of loops.
+ falign-loops=
+ Common RejectNegative Joined Var(str_align_loops) Optimization
+ 
++fallow-store-data-races
++Common Report Var(flag_store_data_races) Optimization
++Allow the compiler to introduce new data races on stores.
++
+ fargument-alias
+ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+diff -uprN a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+--- a/gcc/doc/invoke.texi	2020-05-28 16:12:56.875511599 +0800
++++ b/gcc/doc/invoke.texi	2020-05-28 15:54:33.757511589 +0800
+@@ -400,6 +400,7 @@ Objective-C and Objective-C++ Dialects}.
+ -falign-jumps[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol
+ -falign-labels[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol
+ -falign-loops[=@var{n}[:@var{m}:[@var{n2}[:@var{m2}]]]] @gol
++-fallow-store-data-races @gol
+ -fassociative-math  -fauto-profile  -fauto-profile[=@var{path}] @gol
+ -fauto-inc-dec  -fbranch-probabilities @gol
+ -fbranch-target-load-optimize  -fbranch-target-load-optimize2 @gol
+@@ -8365,9 +8366,9 @@ designed to reduce code size.
+ Disregard strict standards compliance.  @option{-Ofast} enables all
+ @option{-O3} optimizations.  It also enables optimizations that are not
+ valid for all standard-compliant programs.
+-It turns on @option{-ffast-math} and the Fortran-specific
+-@option{-fstack-arrays}, unless @option{-fmax-stack-var-size} is
+-specified, and @option{-fno-protect-parens}.
++It turns on @option{-ffast-math}, @option{-fallow-store-data-races}
++and the Fortran-specific @option{-fstack-arrays}, unless
++@option{-fmax-stack-var-size} is specified, and @option{-fno-protect-parens}.
+ 
+ @item -Og
+ @opindex Og
+@@ -10120,6 +10121,12 @@ The maximum allowed @var{n} option value
+ 
+ Enabled at levels @option{-O2}, @option{-O3}.
+ 
++@item -fallow-store-data-races
++@opindex fallow-store-data-races
++Allow the compiler to introduce new data races on stores.
++
++Enabled at level @option{-Ofast}.
++
+ @item -funit-at-a-time
+ @opindex funit-at-a-time
+ This option is left for compatibility reasons. @option{-funit-at-a-time}
+@@ -11902,10 +11909,6 @@ The maximum number of conditional store
+ if either vectorization (@option{-ftree-vectorize}) or if-conversion
+ (@option{-ftree-loop-if-convert}) is disabled.
+ 
+-@item allow-store-data-races
+-Allow optimizers to introduce new data races on stores.
+-Set to 1 to allow, otherwise to 0.
+-
+ @item case-values-threshold
+ The smallest number of different values for which it is best to use a
+ jump-table instead of a tree of conditional branches.  If the value is
+diff -uprN a/gcc/opts.c b/gcc/opts.c
+--- a/gcc/opts.c	2020-05-28 16:12:58.847511599 +0800
++++ b/gcc/opts.c	2020-05-28 15:54:35.713511589 +0800
+@@ -560,6 +560,7 @@ static const struct default_options defa
+ 
+     /* -Ofast adds optimizations to -O3.  */
+     { OPT_LEVELS_FAST, OPT_ffast_math, NULL, 1 },
++    { OPT_LEVELS_FAST, OPT_fallow_store_data_races, NULL, 1 },
+ 
+     { OPT_LEVELS_NONE, 0, NULL, 0 }
+   };
+@@ -682,13 +683,6 @@ default_options_optimization (struct gcc
+      : default_param_value (PARAM_MAX_DSE_ACTIVE_LOCAL_STORES) / 10,
+      opts->x_param_values, opts_set->x_param_values);
+ 
+-  /* At -Ofast, allow store motion to introduce potential race conditions.  */
+-  maybe_set_param_value
+-    (PARAM_ALLOW_STORE_DATA_RACES,
+-     opts->x_optimize_fast ? 1
+-     : default_param_value (PARAM_ALLOW_STORE_DATA_RACES),
+-     opts->x_param_values, opts_set->x_param_values);
+-
+   if (opts->x_optimize_size)
+     /* We want to crossjump as much as possible.  */
+     maybe_set_param_value (PARAM_MIN_CROSSJUMP_INSNS, 1,
+diff -uprN a/gcc/params.def b/gcc/params.def
+--- a/gcc/params.def	2020-05-28 16:12:58.831511599 +0800
++++ b/gcc/params.def	2020-05-28 15:54:35.725511589 +0800
+@@ -1199,12 +1199,6 @@ DEFPARAM (PARAM_CASE_VALUES_THRESHOLD,
+ 	  "if 0, use the default for the machine.",
+           0, 0, 0)
+ 
+-/* Data race flags for C++0x memory model compliance.  */
+-DEFPARAM (PARAM_ALLOW_STORE_DATA_RACES,
+-	  "allow-store-data-races",
+-	  "Allow new data races on stores to be introduced.",
+-	  0, 0, 1)
+-
+ /* Reassociation width to be used by tree reassoc optimization.  */
+ DEFPARAM (PARAM_TREE_REASSOC_WIDTH,
+ 	  "tree-reassoc-width",
+diff -uprN a/gcc/params.h b/gcc/params.h
+--- a/gcc/params.h	2020-05-28 16:12:58.843511599 +0800
++++ b/gcc/params.h	2020-05-28 15:54:35.725511589 +0800
+@@ -228,8 +228,6 @@ extern void init_param_values (int *para
+   PARAM_VALUE (PARAM_MAX_STORES_TO_SINK)
+ #define ALLOW_LOAD_DATA_RACES \
+   PARAM_VALUE (PARAM_ALLOW_LOAD_DATA_RACES)
+-#define ALLOW_STORE_DATA_RACES \
+-  PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
+ #define ALLOW_PACKED_LOAD_DATA_RACES \
+   PARAM_VALUE (PARAM_ALLOW_PACKED_LOAD_DATA_RACES)
+ #define ALLOW_PACKED_STORE_DATA_RACES \
+diff -uprN a/gcc/testsuite/c-c++-common/cxxbitfields-3.c b/gcc/testsuite/c-c++-common/cxxbitfields-3.c
+--- a/gcc/testsuite/c-c++-common/cxxbitfields-3.c	2020-05-28 16:12:56.959511599 +0800
++++ b/gcc/testsuite/c-c++-common/cxxbitfields-3.c	2020-05-28 15:54:33.853511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+-/* { dg-options "-O2 --param allow-store-data-races=0" } */
++/* { dg-options "-O2 -fno-allow-store-data-races" } */
+ 
+ /* Make sure we don't narrow down to a QI or HI to store into VAR.J,
+    but instead use an SI.  */
+diff -uprN a/gcc/testsuite/c-c++-common/cxxbitfields-6.c b/gcc/testsuite/c-c++-common/cxxbitfields-6.c
+--- a/gcc/testsuite/c-c++-common/cxxbitfields-6.c	2020-05-28 16:12:56.935511599 +0800
++++ b/gcc/testsuite/c-c++-common/cxxbitfields-6.c	2020-05-28 15:54:33.845511589 +0800
+@@ -1,6 +1,6 @@
+ /* PR middle-end/50141 */
+ /* { dg-do compile } */
+-/* { dg-options "-O2 --param allow-store-data-races=0" } */
++/* { dg-options "-O2 -fno-allow-store-data-races" } */
+ 
+ struct S
+ {
+diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c
+--- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c	2020-05-28 16:12:56.939511599 +0800
++++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-1.c	2020-05-28 15:54:33.821511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c
+--- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c	2020-05-28 16:12:56.939511599 +0800
++++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-2.c	2020-05-28 15:54:33.821511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link { target { ! int16 } } } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c
+--- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c	2020-05-28 16:12:56.939511599 +0800
++++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-3.c	2020-05-28 15:54:33.821511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c
+--- a/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c	2020-05-28 16:12:56.939511599 +0800
++++ b/gcc/testsuite/c-c++-common/simulate-thread/bitfields-4.c	2020-05-28 15:54:33.821511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/gcc.dg/lto/pr52097_0.c b/gcc/testsuite/gcc.dg/lto/pr52097_0.c
+--- a/gcc/testsuite/gcc.dg/lto/pr52097_0.c	2020-05-28 16:12:57.803511599 +0800
++++ b/gcc/testsuite/gcc.dg/lto/pr52097_0.c	2020-05-28 15:54:34.777511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-lto-do link } */
+-/* { dg-lto-options { { -O -flto -fexceptions -fnon-call-exceptions --param allow-store-data-races=0 } } } */
++/* { dg-lto-options { { -O -flto -fexceptions -fnon-call-exceptions -fno-allow-store-data-races } } } */
+ /* { dg-require-effective-target exceptions } */
+ 
+ typedef struct { unsigned int e0 : 16; } s1;
+diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c
+--- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c	2020-05-28 16:12:57.815511599 +0800
++++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-2.c	2020-05-28 15:54:34.781511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0 -O2" } */
++/* { dg-options "-fno-allow-store-data-races -O2" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c
+--- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c	2020-05-28 16:12:57.815511599 +0800
++++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-3.c	2020-05-28 15:54:34.781511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0 -O2" } */
++/* { dg-options "-fno-allow-store-data-races -O2" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c
+--- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c	2020-05-28 16:12:57.815511599 +0800
++++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store-4.c	2020-05-28 15:54:34.781511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+diff -uprN a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c
+--- a/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c	2020-05-28 16:12:57.815511599 +0800
++++ b/gcc/testsuite/gcc.dg/simulate-thread/speculative-store.c	2020-05-28 15:54:34.781511589 +0800
+@@ -1,12 +1,12 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ #include <stdio.h>
+ #include "simulate-thread.h"
+ 
+ /* This file tests that speculative store movement out of a loop doesn't 
+-   happen.  This is disallowed when --param allow-store-data-races is 0.  */
++   happen.  This is disallowed when -fno-allow-store-data-races.  */
+ 
+ int global = 100;
+ 
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c	2020-05-28 16:12:58.027511599 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/20050314-1.c	2020-05-28 15:54:34.997511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O1 -fdump-tree-lim2-details --param allow-store-data-races=1" } */
++/* { dg-options "-O1 -fdump-tree-lim2-details -fallow-store-data-races" } */
+ 
+ float a[100];
+ 
+diff -uprN a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C
+--- a/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C	2020-05-28 16:12:57.015511599 +0800
++++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields-2.C	2020-05-28 15:54:33.885511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ /* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+diff -uprN a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C
+--- a/gcc/testsuite/g++.dg/simulate-thread/bitfields.C	2020-05-28 16:12:57.015511599 +0800
++++ b/gcc/testsuite/g++.dg/simulate-thread/bitfields.C	2020-05-28 15:54:33.885511589 +0800
+@@ -1,5 +1,5 @@
+ /* { dg-do link } */
+-/* { dg-options "--param allow-store-data-races=0" } */
++/* { dg-options "-fno-allow-store-data-races" } */
+ /* { dg-final { simulate-thread } } */
+ 
+ /* Test that setting <var.a> does not touch either <var.b> or <var.c>.
+diff -uprN a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
+--- a/gcc/tree-if-conv.c	2020-05-28 16:12:58.831511599 +0800
++++ b/gcc/tree-if-conv.c	2020-05-28 15:54:35.641511589 +0800
+@@ -913,10 +913,10 @@ ifcvt_memrefs_wont_trap (gimple *stmt, v
+          to unconditionally.  */
+       if (base_master_dr
+ 	  && DR_BASE_W_UNCONDITIONALLY (*base_master_dr))
+-	return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
++	return flag_store_data_races;
+       /* or the base is known to be not readonly.  */
+       else if (base_object_writable (DR_REF (a)))
+-	return PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES);
++	return flag_store_data_races;
+     }
+ 
+   return false;
+diff -uprN a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
+--- a/gcc/tree-ssa-loop-im.c	2020-05-28 16:12:58.779511599 +0800
++++ b/gcc/tree-ssa-loop-im.c	2020-05-28 15:54:35.729511589 +0800
+@@ -2088,7 +2088,7 @@ execute_sm (struct loop *loop, vec<edge>
+   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
+ 
+   if (bb_in_transaction (loop_preheader_edge (loop)->src)
+-      || (! PARAM_VALUE (PARAM_ALLOW_STORE_DATA_RACES)
++      || (! flag_store_data_races
+ 	  && ! ref_always_accessed_p (loop, ref, true)))
+     multi_threaded_model_p = true;
+ 
diff --git a/address-calculation-optimization-within-loop.patch b/address-calculation-optimization-within-loop.patch
index be36f80..9bfa0b6 100644
--- a/address-calculation-optimization-within-loop.patch
+++ b/address-calculation-optimization-within-loop.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree. 
+The commit id of these patchs list as following in the order of time.
+
+0001-widening_mul-restrict-ops-to-be-defined-in-the-same-.patch:
+d21dff5b4fee51ae432143065bededfc763dc344
+
 diff -Nurp a/gcc/testsuite/gcc.dg/pr94269.c b/gcc/testsuite/gcc.dg/pr94269.c
 --- a/gcc/testsuite/gcc.dg/pr94269.c	1970-01-01 08:00:00.000000000 +0800
 +++ b/gcc/testsuite/gcc.dg/pr94269.c	2020-04-17 17:04:50.608000000 +0800
@@ -56,8 +62,8 @@ diff -Nurp a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
  				  &type2, &mult_rhs2))
  	return false;
 diff -Nurp a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
---- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c	2020-03-31 09:51:36.000000000 +0800
-+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c	2020-04-29 10:55:44.937471475 +0800
+--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c    2020-03-31 09:51:36.000000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c    2020-04-29 10:55:44.937471475 +0800
 @@ -17,7 +17,6 @@ f (TYPE *x, TYPE *y, unsigned short n, l
  /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
  /* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
diff --git a/change-gcc-BASE-VER.patch b/change-gcc-BASE-VER.patch
index 92ae397..79dd167 100644
--- a/change-gcc-BASE-VER.patch
+++ b/change-gcc-BASE-VER.patch
@@ -1,17 +1,19 @@
-diff -uprN a/gcc/BASE-VER b/gcc/BASE-VER
---- a/gcc/BASE-VER	2020-03-31 09:51:52.000000000 +0800
-+++ b/gcc/BASE-VER	2020-05-14 16:45:36.416688565 +0800
+diff -Nurp a/gcc/BASE-VER b/gcc/BASE-VER
+--- a/gcc/BASE-VER	2020-08-19 10:47:14.100000000 +0800
++++ b/gcc/BASE-VER	2020-08-19 10:32:30.380000000 +0800
 @@ -1 +1 @@
 -9.3.0
 +9.3.1
-diff -uprN a/gcc/ChangeLog b/gcc/ChangeLog
---- a/gcc/ChangeLog	2020-03-31 09:51:30.000000000 +0800
-+++ b/gcc/ChangeLog	2020-05-14 16:45:36.420688565 +0800
-@@ -1,3 +1,7 @@
-+2020-03-12  openEuler
-+
-+	* BASE-VER: Set to 9.3.1.
-+
- 2020-03-12  Release Manager
+diff -Nurp a/gcc/Makefile.in b/gcc/Makefile.in
+--- a/gcc/Makefile.in	2020-08-19 10:32:45.528000000 +0800
++++ b/gcc/Makefile.in	2020-08-19 10:34:24.968000000 +0800
+@@ -885,8 +885,7 @@ PATCHLEVEL_c := \
+ # significant - do not remove it.
+ BASEVER_s   := "\"$(BASEVER_c)\""
+ DEVPHASE_s  := "\"$(if $(DEVPHASE_c), ($(DEVPHASE_c)))\""
+-DATESTAMP_s := \
+-  "\"$(if $(DEVPHASE_c)$(filter-out 0,$(PATCHLEVEL_c)), $(DATESTAMP_c))\""
++DATESTAMP_s := "\"\""
+ PKGVERSION_s:= "\"@PKGVERSION@\""
+ BUGURL_s    := "\"@REPORT_BUGS_TO@\""
  
- 	* GCC 9.3.0 released.
diff --git a/complete-struct-reorg.patch b/complete-struct-reorg.patch
new file mode 100644
index 0000000..60c8cf2
--- /dev/null
+++ b/complete-struct-reorg.patch
@@ -0,0 +1,1814 @@
+diff -Nurp a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def
+--- a/gcc/ipa-struct-reorg/escapes.def	2020-07-18 05:11:11.548000000 -0400
++++ b/gcc/ipa-struct-reorg/escapes.def	2020-07-18 05:16:25.928000000 -0400
+@@ -56,5 +56,7 @@ DEF_ESCAPE (escape_non_optimize, "Type u
+ DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]")
+ DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]")
+ DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]")
++DEF_ESCAPE (escape_separate_instance, "Type escapes via a separate instance")
++DEF_ESCAPE (escape_unhandled_rewrite, "Type escapes via a unhandled rewrite stmt")
+ 
+ #undef DEF_ESCAPE
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2020-07-18 05:11:17.664000000 -0400
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2020-07-20 09:08:08.912000000 -0400
+@@ -95,6 +95,7 @@ along with GCC; see the file COPYING3.
+ #include "ipa-struct-reorg.h"
+ #include "tree-eh.h"
+ #include "bitmap.h"
++#include "cfgloop.h"
+ #include "ipa-param-manipulation.h"
+ #include "tree-ssa-live.h"  /* For remove_unused_locals.  */
+ 
+@@ -103,6 +104,7 @@ along with GCC; see the file COPYING3.
+ namespace {
+ 
+ using namespace struct_reorg;
++using namespace struct_relayout;
+ 
+ /* Return true iff TYPE is stdarg va_list type.  */
+ 
+@@ -152,6 +154,14 @@ handled_type (tree type)
+   return false;
+ }
+ 
++enum srmode
++{
++  NORMAL = 0,
++  COMPLETE_STRUCT_RELAYOUT
++};
++
++static bool is_result_of_mult (tree arg, tree *num, tree struct_size);
++
+ } // anon namespace
+ 
+ namespace struct_reorg {
+@@ -241,7 +251,8 @@ srtype::srtype (tree type)
+   : type (type),
+     chain_type (false),
+     escapes (does_not_escape),
+-    visited (false)
++    visited (false),
++    has_alloc_array (0)
+ {
+   for (int i = 0; i < max_split; i++)
+     newtype[i] = NULL_TREE;
+@@ -441,13 +452,6 @@ srtype::dump (FILE *f)
+       fn->simple_dump (f);
+     }
+   fprintf (f, "\n }\n");
+-  fprintf (f, "\n field_sites = {");
+-  FOR_EACH_VEC_ELT (field_sites, i, field)
+-    {
+-      fprintf (f, "  \n");
+-      field->simple_dump (f);
+-    }
+-  fprintf (f, "\n }\n");
+   fprintf (f, "}\n");
+ }
+ 
+@@ -798,12 +802,6 @@ srfield::dump (FILE *f)
+   fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset);
+   fprintf (f, ", type = ");
+   print_generic_expr (f, fieldtype);
+-  if (type)
+-    {
+-      fprintf (f, "( srtype = ");
+-      type->simple_dump (f);
+-      fprintf (f, ")");
+-    }
+   fprintf (f, "\n}\n");
+ }
+ 
+@@ -813,7 +811,10 @@ srfield::dump (FILE *f)
+ void
+ srfield::simple_dump (FILE *f)
+ {
+-  fprintf (f, "field (%d)", DECL_UID (fielddecl));
++  if (fielddecl)
++    {
++      fprintf (f, "field (%d)", DECL_UID (fielddecl));
++    }
+ }
+ 
+ /* Dump out the access structure to FILE. */
+@@ -857,21 +858,113 @@ srdecl::dump (FILE *file)
+ 
+ } // namespace struct_reorg
+ 
++namespace struct_relayout {
++
++/* Complete Structure Relayout Optimization.
++   It reorganizes all structure members, and puts same member together.
++   struct s {
++     long a;
++     int b;
++     struct s* c;
++   };
++   Array looks like
++     abcabcabcabc...
++   will be transformed to
++     aaaa...bbbb...cccc...
++*/
++
++#define GPTR_SIZE(i) \
++  TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (gptr[i])))
++
++unsigned transformed = 0;
++
++unsigned
++csrtype::calculate_field_num (tree field_offset)
++{
++  HOST_WIDE_INT off = int_byte_position (field_offset);
++  unsigned i = 1;
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (off == int_byte_position (field))
++	{
++	  return i;
++	}
++      i++;
++    }
++  return 0;
++}
++
++void
++csrtype::init_type_info (void)
++{
++  if (!type)
++    {
++      return;
++    }
++  new_size = old_size = tree_to_uhwi (TYPE_SIZE_UNIT (type));
++
++  /* Close enough to pad to improve performance.  */
++  if (old_size > 48 && old_size < 64)
++    {
++      new_size = 64;
++    }
++  if (old_size > 96 && old_size < 128)
++    {
++      new_size = 128;
++    }
++
++  /* For performance reasons, only allow structure size
++     that is a power of 2 and not too big.  */
++  if (new_size != 1 && new_size != 2
++      && new_size != 4 && new_size != 8
++      && new_size != 16 && new_size != 32
++      && new_size != 64 && new_size != 128)
++    {
++      new_size = 0;
++      field_count = 0;
++      return;
++    }
++
++  unsigned i = 0;
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      i++;
++    }
++  field_count = i;
++
++  struct_size = build_int_cstu (TREE_TYPE (TYPE_SIZE_UNIT (type)),
++				new_size);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Type: ");
++      print_generic_expr (dump_file, type);
++      fprintf (dump_file, " has %d members.\n", field_count);
++      fprintf (dump_file, "Modify struct size from %ld to %ld.\n",
++			  old_size, new_size);
++    }
++}
++
++} // namespace struct_relayout
++
+ namespace {
+ 
++/* Structure definition for ipa_struct_reorg and ipa_struct_relayout.  */
++
+ struct ipa_struct_reorg
+ {
++public:
+   // Constructors
+   ipa_struct_reorg(void)
+     : current_function (NULL),
+-      done_recording(false)
++      done_recording (false),
++      current_mode (NORMAL)
+   {
+   }
+ 
+-  // public methods
+-  unsigned execute(void);
++  unsigned execute (enum srmode mode);
+   void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL);
+-private:
++
+   // fields
+   auto_vec_del<srtype> types;
+   auto_vec_del<srfunction> functions;
+@@ -879,8 +972,8 @@ private:
+   srfunction *current_function;
+ 
+   bool done_recording;
++  srmode current_mode;
+ 
+-  // private methods
+   void dump_types (FILE *f);
+   void dump_types_escaped (FILE *f);
+   void dump_functions (FILE *f);
+@@ -910,6 +1003,7 @@ private:
+   void maybe_record_allocation_site (cgraph_node *, gimple *);
+   void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt);
+   void mark_expr_escape(tree, escape_type, gimple *stmt);
++  bool handled_allocation_stmt (gimple *stmt);
+   tree allocate_size (srtype *t, gimple *stmt);
+ 
+   void mark_decls_in_as_not_needed (tree fn);
+@@ -925,6 +1019,7 @@ private:
+   bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create = false, bool can_escape = false);
+   bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t);
+ 
++  void check_alloc_num (gimple *stmt, srtype *type);
+   void check_definition (srdecl *decl, vec<srdecl*>&);
+   void check_uses (srdecl *decl, vec<srdecl*>&);
+   void check_use (srdecl *decl, gimple *stmt, vec<srdecl*>&);
+@@ -937,8 +1032,631 @@ private:
+ 
+   bool has_rewritten_type (srfunction*);
+   void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt);
++  unsigned execute_struct_relayout (void);
++};
++
++struct ipa_struct_relayout
++{
++public:
++  // fields
++  tree gptr[max_relayout_split + 1];
++  csrtype ctype;
++  ipa_struct_reorg* sr;
++  cgraph_node* current_node;
++
++  // Constructors
++  ipa_struct_relayout (tree type, ipa_struct_reorg* sr_)
++  {
++    ctype.type = type;
++    sr = sr_;
++    current_node = NULL;
++    for (int i = 0; i < max_relayout_split + 1; i++)
++      {
++	gptr[i] = NULL;
++      }
++  }
++
++  // Methods
++  tree create_new_vars (tree type, const char *name);
++  void create_global_ptrs (void);
++  unsigned int rewrite (void);
++  void rewrite_stmt_in_function (void);
++  bool rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi);
++  bool rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi);
++  bool handled_allocation_stmt (gcall *stmt);
++  void init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi);
++  bool check_call_uses (gcall *stmt);
++  bool rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi);
++  tree create_ssa (tree node, gimple_stmt_iterator *gsi);
++  bool is_candidate (tree xhs);
++  tree rewrite_address (tree xhs, gimple_stmt_iterator *gsi);
++  tree rewrite_offset (tree offset, HOST_WIDE_INT num);
++  bool rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi);
++  bool maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
++				    HOST_WIDE_INT &times);
++  unsigned int execute (void);
+ };
+ 
++} // anon namespace
++
++namespace {
++
++/* Methods for ipa_struct_relayout.  */
++
++static void
++set_var_attributes (tree var)
++{
++  if (!var)
++    {
++      return;
++    }
++  gcc_assert (TREE_CODE (var) == VAR_DECL);
++
++  DECL_ARTIFICIAL (var) = 1;
++  DECL_EXTERNAL (var) = 0;
++  TREE_STATIC (var) = 1;
++  TREE_PUBLIC (var) = 0;
++  TREE_USED (var) = 1;
++  DECL_CONTEXT (var) = NULL;
++  TREE_THIS_VOLATILE (var) = 0;
++  TREE_ADDRESSABLE (var) = 0;
++  TREE_READONLY (var) = 0;
++  if (is_global_var (var))
++    {
++      set_decl_tls_model (var, TLS_MODEL_NONE);
++    }
++}
++
++tree
++ipa_struct_relayout::create_new_vars (tree type, const char *name)
++{
++  gcc_assert (type);
++  tree new_type = build_pointer_type (type);
++
++  tree new_name = NULL;
++  if (name)
++    {
++      new_name = get_identifier (name);
++    }
++
++  tree new_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, new_name, new_type);
++
++  /* set new_var's attributes.  */
++  set_var_attributes (new_var);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Created new var: ");
++      print_generic_expr (dump_file, new_var);
++      fprintf (dump_file, "\n");
++    }
++  return new_var;
++}
++
++void
++ipa_struct_relayout::create_global_ptrs (void)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Create global gptrs: {\n");
++    }
++
++  const char *type_name = get_type_name (ctype.type);
++  char *gptr0_name = concat (type_name, "_gptr0", NULL);
++  tree var_gptr0 = create_new_vars (ctype.type, gptr0_name);
++  gptr[0] = var_gptr0;
++  varpool_node::add (var_gptr0);
++
++  unsigned i = 1;
++  for (tree field = TYPE_FIELDS (ctype.type); field;
++	    field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  tree type = TREE_TYPE (field);
++
++	  char *name = NULL;
++	  char id[10] = {0};
++	  sprintf (id, "%d", i);
++	  const char *decl_name = IDENTIFIER_POINTER (DECL_NAME (field));
++
++	  name = concat (type_name, "_", decl_name, "_gptr", id, NULL);
++
++	  tree var = create_new_vars (type, name);
++
++	  gptr[i] = var;
++	  varpool_node::add (var);
++	  i++;
++	}
++    }
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nTotally create %d gptrs. }\n\n", i);
++    }
++  gcc_assert (ctype.field_count == i - 1);
++}
++
++void
++ipa_struct_relayout::rewrite_stmt_in_function (void)
++{
++  gcc_assert (cfun);
++
++  basic_block bb = NULL;
++  gimple_stmt_iterator si;
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  if (rewrite_stmt (stmt, &si))
++	    {
++	      gsi_remove (&si, true);
++	    }
++	  else
++	    {
++	      gsi_next (&si);
++	    }
++	}
++    }
++
++  /* Debug statements need to happen after all other statements
++     have changed.  */
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si);)
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  if (gimple_code (stmt) == GIMPLE_DEBUG
++	      && rewrite_debug (stmt, &si))
++	    {
++	      gsi_remove (&si, true);
++	    }
++	  else
++	    {
++	      gsi_next (&si);
++	    }
++	}
++    }
++}
++
++unsigned int
++ipa_struct_relayout::rewrite (void)
++{
++  cgraph_node *cnode = NULL;
++  function *fn = NULL;
++  FOR_EACH_FUNCTION (cnode)
++    {
++      if (!cnode->real_symbol_p () || !cnode->has_gimple_body_p ())
++	{
++	  continue;
++	}
++      if (cnode->definition)
++	{
++	  fn = DECL_STRUCT_FUNCTION (cnode->decl);
++	  current_node = cnode;
++	  push_cfun (fn);
++
++	  rewrite_stmt_in_function ();
++
++	  update_ssa (TODO_update_ssa_only_virtuals);
++
++	  if (flag_tree_pta)
++	    {
++	      compute_may_aliases ();
++	    }
++
++	  remove_unused_locals ();
++
++	  cgraph_edge::rebuild_edges ();
++
++	  free_dominance_info (CDI_DOMINATORS);
++
++	  pop_cfun ();
++	  current_node = NULL;
++	}
++    }
++  return TODO_verify_all;
++}
++
++bool
++ipa_struct_relayout::rewrite_debug (gimple *stmt, gimple_stmt_iterator *gsi)
++{
++  /* TODO: For future implement.  */
++  return true;
++}
++
++bool
++ipa_struct_relayout::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi)
++{
++  switch (gimple_code (stmt))
++    {
++      case GIMPLE_ASSIGN:
++	return rewrite_assign (as_a <gassign *> (stmt), gsi);
++      case GIMPLE_CALL:
++	return rewrite_call (as_a <gcall *> (stmt), gsi);
++      default:
++	break;
++    }
++  return false;
++}
++
++bool
++ipa_struct_relayout::handled_allocation_stmt (gcall *stmt)
++{
++  if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++    {
++      return true;
++    }
++  return false;
++}
++
++void
++ipa_struct_relayout::init_global_ptrs (gcall *stmt, gimple_stmt_iterator *gsi)
++{
++  gcc_assert (handled_allocation_stmt (stmt));
++
++  tree lhs = gimple_call_lhs (stmt);
++
++  /* Case that gimple is at the end of bb.  */
++  if (gsi_one_before_end_p (*gsi))
++    {
++      gassign* gptr0 = gimple_build_assign (gptr[0], lhs);
++      gsi_insert_after (gsi, gptr0, GSI_SAME_STMT);
++    }
++  gsi_next (gsi);
++
++  /* Emit gimple gptr0 = _X and gptr1 = _X.  */
++  gassign* gptr0 = gimple_build_assign (gptr[0], lhs);
++  gsi_insert_before (gsi, gptr0, GSI_SAME_STMT);
++  gassign* gptr1 = gimple_build_assign (gptr[1], lhs);
++  gsi_insert_before (gsi, gptr1, GSI_SAME_STMT);
++
++  /* Emit gimple gptr_[i] = gptr_[i-1] + _Y[gap].  */
++  for (unsigned i = 2; i <= ctype.field_count; i++)
++    {
++      gimple *new_stmt = NULL;
++      tree gptr_i_prev_ssa = create_ssa (gptr[i-1], gsi);
++      tree gptr_i_ssa = make_ssa_name (TREE_TYPE (gptr[i-1]));
++
++      /* Emit gimple _Y[gap] = N * sizeof (member).  */
++      tree member_gap = gimplify_build2 (gsi, MULT_EXPR,
++					 long_unsigned_type_node,
++					 gimple_call_arg (stmt, 0),
++					 GPTR_SIZE (i-1));
++
++      new_stmt = gimple_build_assign (gptr_i_ssa, POINTER_PLUS_EXPR,
++				      gptr_i_prev_ssa, member_gap);
++      gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++
++      gassign *gptr_i = gimple_build_assign (gptr[i], gptr_i_ssa);
++      gsi_insert_before (gsi, gptr_i, GSI_SAME_STMT);
++    }
++  gsi_prev (gsi);
++}
++
++bool
++ipa_struct_relayout::check_call_uses (gcall *stmt)
++{
++  gcc_assert (current_node);
++  srfunction *fn = sr->find_function (current_node);
++  tree lhs = gimple_call_lhs (stmt);
++
++  if (fn == NULL)
++    {
++      return false;
++    }
++
++  srdecl *d = fn->find_decl (lhs);
++  if (d == NULL)
++    {
++      return false;
++    }
++  if (types_compatible_p (d->type->type, ctype.type))
++    {
++      return true;
++    }
++
++  return false;
++}
++
++bool
++ipa_struct_relayout::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi)
++{
++  if (handled_allocation_stmt (stmt))
++    {
++      /* Rewrite stmt _X = calloc (N, sizeof (struct)).  */
++      tree size = gimple_call_arg (stmt, 1);
++      if (TREE_CODE (size) != INTEGER_CST)
++	{
++	  return false;
++	}
++      if (tree_to_uhwi (size) != ctype.old_size)
++	{
++	  return false;
++	}
++      if (!check_call_uses (stmt))
++	{
++	  return false;
++	}
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Rewrite allocation call:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "to\n");
++	}
++
++      /* Modify sizeof (struct).  */
++      gimple_call_set_arg (stmt, 1, ctype.struct_size);
++      update_stmt (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++
++      init_global_ptrs (stmt, gsi);
++    }
++  return false;
++}
++
++tree
++ipa_struct_relayout::create_ssa (tree node, gimple_stmt_iterator *gsi)
++{
++  tree node_ssa = make_ssa_name (TREE_TYPE (node));
++  gassign *stmt = gimple_build_assign (node_ssa, node);
++  gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
++  return node_ssa;
++}
++
++bool
++ipa_struct_relayout::is_candidate (tree xhs)
++{
++  if (TREE_CODE (xhs) != COMPONENT_REF)
++    {
++      return false;
++    }
++  tree mem = TREE_OPERAND (xhs, 0);
++  if (TREE_CODE (mem) == MEM_REF)
++    {
++      tree type = TREE_TYPE (mem);
++      if (types_compatible_p (type, ctype.type))
++	{
++	  return true;
++	}
++    }
++  return false;
++}
++
++tree
++ipa_struct_relayout::rewrite_address (tree xhs, gimple_stmt_iterator *gsi)
++{
++  tree mem_ref = TREE_OPERAND (xhs, 0);
++  tree pointer = TREE_OPERAND (mem_ref, 0);
++  tree pointer_offset = TREE_OPERAND (mem_ref, 1);
++  tree field = TREE_OPERAND (xhs, 1);
++
++  tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer);
++  tree gptr0_ssa = fold_convert (long_unsigned_type_node, gptr[0]);
++
++  /* Emit gimple _X1 = ptr - gptr0.  */
++  tree step1 = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node,
++				pointer_ssa, gptr0_ssa);
++
++  /* Emit gimple _X2 = _X1 / sizeof (struct).  */
++  tree step2 = gimplify_build2 (gsi, TRUNC_DIV_EXPR, long_unsigned_type_node,
++				step1, ctype.struct_size);
++
++  unsigned field_num = ctype.calculate_field_num (field);
++  gcc_assert (field_num > 0 && field_num <= ctype.field_count);
++
++  /* Emit gimple _X3 = _X2 * sizeof (member).  */
++  tree step3 = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node,
++				step2, GPTR_SIZE (field_num));
++
++  /* Emit gimple _X4 = gptr[I].  */
++  tree gptr_field_ssa = create_ssa (gptr[field_num], gsi);
++  tree new_address = make_ssa_name (TREE_TYPE (gptr[field_num]));
++  gassign *new_stmt = gimple_build_assign (new_address, POINTER_PLUS_EXPR,
++					   gptr_field_ssa, step3);
++  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++
++  /* MEM_REF with nonzero offset like
++       MEM[ptr + sizeof (struct)] = 0B
++     should be transformed to
++       MEM[gptr + sizeof (member)] = 0B
++  */
++  HOST_WIDE_INT size
++    = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (new_address))));
++  tree new_size = rewrite_offset (pointer_offset, size);
++  if (new_size)
++    {
++      TREE_OPERAND (mem_ref, 1) = new_size;
++    }
++
++  /* Update mem_ref pointer.  */
++  TREE_OPERAND (mem_ref, 0) = new_address;
++
++  /* Update mem_ref TREE_TYPE.  */
++  TREE_TYPE (mem_ref) = TREE_TYPE (TREE_TYPE (new_address));
++
++  return mem_ref;
++}
++
++tree
++ipa_struct_relayout::rewrite_offset (tree offset, HOST_WIDE_INT num)
++{
++  if (TREE_CODE (offset) == INTEGER_CST)
++    {
++      bool sign = false;
++      HOST_WIDE_INT off = TREE_INT_CST_LOW (offset);
++      if (off == 0)
++	{
++	  return NULL;
++	}
++      if (off < 0)
++	{
++	  off = -off;
++	  sign = true;
++	}
++      if (off % ctype.old_size == 0)
++	{
++	  HOST_WIDE_INT times = off / ctype.old_size;
++	  times = sign ? -times : times;
++	  return build_int_cst (TREE_TYPE (offset), num * times);
++	}
++    }
++  return NULL;
++}
++
++#define REWRITE_ASSIGN_TREE_IN_STMT(node)		\
++do							\
++{							\
++  tree node = gimple_assign_##node (stmt);		\
++  if (node && is_candidate (node))			\
++    {							\
++      tree mem_ref = rewrite_address (node, gsi);	\
++      gimple_assign_set_##node (stmt, mem_ref);		\
++      update_stmt (stmt);				\
++    }							\
++} while (0)
++
++/*       COMPONENT_REF  = exp  =>     MEM_REF = exp
++	  /       \		      /     \
++       MEM_REF   field		    gptr   offset
++       /    \
++   pointer offset
++*/
++bool
++ipa_struct_relayout::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
++{
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Maybe rewrite assign:\n");
++      print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "to\n");
++    }
++
++  switch (gimple_num_ops (stmt))
++    {
++      case 4: REWRITE_ASSIGN_TREE_IN_STMT (rhs3);  // FALLTHRU
++      case 3:
++	{
++	  REWRITE_ASSIGN_TREE_IN_STMT (rhs2);
++	  tree rhs2 = gimple_assign_rhs2 (stmt);
++	  if (rhs2 && TREE_CODE (rhs2) == INTEGER_CST)
++	    {
++	      /* Handle pointer++ and pointer-- or
++		 factor is euqal to struct size.  */
++	      HOST_WIDE_INT times = 1;
++	      if (maybe_rewrite_cst (rhs2, gsi, times))
++		{
++		  tree tmp = build_int_cst (
++				TREE_TYPE (TYPE_SIZE_UNIT (ctype.type)),
++				ctype.new_size * times);
++		  gimple_assign_set_rhs2 (stmt, tmp);
++		  update_stmt (stmt);
++		}
++	    }
++	}  // FALLTHRU
++      case 2: REWRITE_ASSIGN_TREE_IN_STMT (rhs1);  // FALLTHRU
++      case 1: REWRITE_ASSIGN_TREE_IN_STMT (lhs);   // FALLTHRU
++      case 0: break;
++      default: gcc_unreachable ();
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++  return false;
++}
++
++bool
++ipa_struct_relayout::maybe_rewrite_cst (tree cst, gimple_stmt_iterator *gsi,
++					HOST_WIDE_INT &times)
++{
++  bool ret = false;
++  gcc_assert (TREE_CODE (cst) == INTEGER_CST);
++
++  gimple *stmt = gsi_stmt (*gsi);
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
++	  || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type))
++	{
++	  tree num = NULL;
++	  if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type)))
++	    {
++	      times = TREE_INT_CST_LOW (num);
++	      return true;
++	    }
++	}
++    }
++
++  if (gimple_assign_rhs_code (stmt) == MULT_EXPR)
++    {
++      if (gsi_one_before_end_p (*gsi))
++	{
++	  return false;
++	}
++      gsi_next (gsi);
++      gimple *stmt2 = gsi_stmt (*gsi);
++
++      if (gimple_code (stmt2) == GIMPLE_ASSIGN
++	  && gimple_assign_rhs_code (stmt2) == POINTER_PLUS_EXPR)
++	{
++	  tree lhs = gimple_assign_lhs (stmt2);
++	  tree rhs1 = gimple_assign_rhs1 (stmt2);
++	  if (types_compatible_p (inner_type (TREE_TYPE (rhs1)), ctype.type)
++	      || types_compatible_p (inner_type (TREE_TYPE (lhs)), ctype.type))
++	    {
++	      tree num = NULL;
++	      if (is_result_of_mult (cst, &num, TYPE_SIZE_UNIT (ctype.type)))
++		{
++		  times = TREE_INT_CST_LOW (num);
++		  ret = true;
++		}
++	    }
++	}
++      gsi_prev (gsi);
++      return ret;
++    }
++  return false;
++}
++
++unsigned int
++ipa_struct_relayout::execute (void)
++{
++  ctype.init_type_info ();
++  if (ctype.field_count < min_relayout_split
++      || ctype.field_count > max_relayout_split)
++    {
++      return 0;
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Complete Struct Relayout Type: ");
++      print_generic_expr (dump_file, ctype.type);
++      fprintf (dump_file, "\n");
++    }
++  transformed++;
++
++  create_global_ptrs ();
++  return rewrite ();
++}
++
++} // anon namespace
++
++namespace {
++
++/* Methods for ipa_struct_reorg.  */
++
+ /* Dump all of the recorded types to file F. */
+ 
+ void
+@@ -1134,8 +1852,10 @@ ipa_struct_reorg::record_type (tree type
+ 		  f->type = t1;
+ 		  t1->add_field_site (f);
+ 		}
+-	      if (t1 == type1)
+-		type1->mark_escape (escape_rescusive_type, NULL);
++	      if (t1 == type1 && current_mode != COMPLETE_STRUCT_RELAYOUT)
++		{
++		  type1->mark_escape (escape_rescusive_type, NULL);
++		}
+ 	    }
+         }
+     }
+@@ -1272,6 +1992,14 @@ ipa_struct_reorg::record_var (tree decl,
+       else
+ 	e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl));
+ 
++      /* Separate instance is hard to trace in complete struct
++	 relayout optimization.  */
++      if (current_mode == COMPLETE_STRUCT_RELAYOUT
++	  && TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE)
++	{
++	  e = escape_separate_instance;
++	}
++
+       if (e != does_not_escape)
+ 	type->mark_escape (e, NULL);
+     }
+@@ -1347,7 +2075,8 @@ ipa_struct_reorg::find_vars (gimple *stm
+     {
+     case GIMPLE_ASSIGN:
+       if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS
+-          || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++	  || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR
++	  || gimple_assign_rhs_code (stmt) == NOP_EXPR)
+ 	{
+ 	  tree lhs = gimple_assign_lhs (stmt);
+ 	  tree rhs = gimple_assign_rhs1 (stmt);
+@@ -1372,6 +2101,32 @@ ipa_struct_reorg::find_vars (gimple *stm
+ 		current_function->record_decl (t, rhs, -1);
+ 	    }
+ 	}
++      else
++	{
++	  /* Because we won't handle these stmts in rewrite phase,
++	     just mark these types as escaped.  */
++	  switch (gimple_num_ops (stmt))
++	    {
++	      case 4: mark_type_as_escape (
++				TREE_TYPE (gimple_assign_rhs3 (stmt)),
++				escape_unhandled_rewrite, stmt);
++				// FALLTHRU
++	      case 3: mark_type_as_escape (
++				TREE_TYPE (gimple_assign_rhs2 (stmt)),
++				escape_unhandled_rewrite, stmt);
++				// FALLTHRU
++	      case 2: mark_type_as_escape (
++				TREE_TYPE (gimple_assign_rhs1 (stmt)),
++				escape_unhandled_rewrite, stmt);
++				// FALLTHRU
++	      case 1: mark_type_as_escape (
++				TREE_TYPE (gimple_assign_lhs (stmt)),
++				escape_unhandled_rewrite, stmt);
++				// FALLTHRU
++	      case 0: break;
++	      default: gcc_unreachable ();
++	    }
++	}
+       break;
+ 
+     case GIMPLE_CALL:
+@@ -1453,9 +2208,23 @@ is_result_of_mult (tree arg, tree *num,
+   /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */
+   if (TREE_CODE (arg) == INTEGER_CST)
+     {
+-      if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size)))
++      bool sign = false;
++      HOST_WIDE_INT size = TREE_INT_CST_LOW (arg);
++      if (size < 0)
+ 	{
+-	  *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size);
++	  size = -size;
++	  sign = true;
++	}
++      tree arg2 = build_int_cst (TREE_TYPE (arg), size);
++      if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg2, struct_size)))
++	{
++	  tree number = size_binop (FLOOR_DIV_EXPR, arg2, struct_size);
++	  if (sign)
++	    {
++	      number = build_int_cst (TREE_TYPE (number),
++				      -tree_to_shwi (number));
++	    }
++	  *num = number;
+ 	  return true;
+ 	}
+       return false;
+@@ -1525,15 +2294,19 @@ is_result_of_mult (tree arg, tree *num,
+ 
+ /* Return TRUE if STMT is an allocation statement that is handled. */
+ 
+-static bool
+-handled_allocation_stmt (gimple *stmt)
++bool
++ipa_struct_reorg::handled_allocation_stmt (gimple *stmt)
+ {
+-  if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
+-      || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
+-      || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)
+-      || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)
+-      || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)
+-      || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN))
++  if (current_mode == COMPLETE_STRUCT_RELAYOUT
++      && gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++    return true;
++  if (current_mode != COMPLETE_STRUCT_RELAYOUT
++      && (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)
++	  || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN)))
+     return true;
+   return false;
+ }
+@@ -1575,7 +2348,8 @@ ipa_struct_reorg::allocate_size (srtype
+       /* Check that second argument is a constant equal to the size of structure.  */
+       if (operand_equal_p (arg1, struct_size, 0))
+ 	return size;
+-      /* Check that first argument is a constant equal to the size of structure.  */
++      /* ??? Check that first argument is a constant
++	 equal to the size of structure.  */
+       if (operand_equal_p (size, struct_size, 0))
+ 	return arg1;
+       if (dump_file && (dump_flags & TDF_DETAILS))
+@@ -1692,6 +2466,29 @@ ipa_struct_reorg::maybe_record_assign (c
+     }
+ }
+ 
++bool
++check_mem_ref_offset (tree expr)
++{
++  tree num = NULL;
++  bool ret = false;
++
++  if (TREE_CODE (expr) != MEM_REF)
++    {
++      return false;
++    }
++
++  /* Try to find the structure size.  */
++  tree field_off = TREE_OPERAND (expr, 1);
++  tree tmp = TREE_OPERAND (expr, 0);
++  if (TREE_CODE (tmp) == ADDR_EXPR)
++    {
++      tmp = TREE_OPERAND (tmp, 0);
++    }
++  tree size = TYPE_SIZE_UNIT (inner_type (TREE_TYPE (tmp)));
++  ret = is_result_of_mult (field_off, &num, size);
++  return ret;
++}
++
+ tree
+ get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, bool &realpart, bool &imagpart, tree &accesstype)
+ {
+@@ -1731,7 +2528,10 @@ get_ref_base_and_offset (tree &e, HOST_W
+ 	    gcc_assert (TREE_CODE (field_off) == INTEGER_CST);
+ 	    /* So we can mark the types as escaping if different. */
+ 	    accesstype = TREE_TYPE (field_off);
+-	    offset += tree_to_uhwi (field_off);
++	    if (!check_mem_ref_offset (expr))
++	      {
++		offset += tree_to_uhwi (field_off);
++	      }
+ 	    return TREE_OPERAND (expr, 0);
+ 	  }
+ 	  default:
+@@ -2108,6 +2908,39 @@ ipa_struct_reorg::check_type_and_push (t
+ 
+ }
+ 
++void
++ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type)
++{
++  if (current_mode == COMPLETE_STRUCT_RELAYOUT
++      && handled_allocation_stmt (stmt))
++    {
++      tree arg0 = gimple_call_arg (stmt, 0);
++      basic_block bb = gimple_bb (stmt);
++      cgraph_node *node = current_function->node;
++      if (integer_onep (arg0))
++	{
++	  /* Actually NOT an array, but may ruin other array.  */
++	  type->has_alloc_array = -1;
++	}
++      else if (bb->loop_father != NULL
++	       && loop_outer (bb->loop_father) != NULL)
++	{
++	  /* The allocation is in a loop.  */
++	  type->has_alloc_array = -2;
++	}
++      else if (node->callers != NULL)
++	{
++	  type->has_alloc_array = -3;
++	}
++      else
++	{
++	  type->has_alloc_array = type->has_alloc_array < 0
++				  ? type->has_alloc_array
++				  : type->has_alloc_array + 1;
++	}
++    }
++}
++
+ /*
+   2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
+      a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
+@@ -2151,6 +2984,7 @@ ipa_struct_reorg::check_definition (srde
+       if (!handled_allocation_stmt (stmt)
+           || !allocate_size (type, stmt))
+         type->mark_escape (escape_return, stmt);
++      check_alloc_num (stmt, type);
+       return;
+     }
+   /* If the SSA_NAME is sourced from an inline-asm, just mark the type as escaping.  */
+@@ -2189,6 +3023,21 @@ ipa_struct_reorg::check_definition (srde
+       return;
+     }
+ 
++  if (gimple_assign_rhs_code (stmt) == MAX_EXPR
++      || gimple_assign_rhs_code (stmt) == MIN_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      if (TREE_CODE (rhs) == SSA_NAME)
++	{
++	  check_type_and_push (rhs, type, worklist, stmt);
++	}
++      if (TREE_CODE (rhs2) == SSA_NAME)
++	{
++	  check_type_and_push (rhs2, type, worklist, stmt);
++	}
++      return;
++    }
++
+   /* Casts between pointers and integer are escaping.  */
+   if (gimple_assign_cast_p (stmt))
+     {
+@@ -2251,6 +3100,13 @@ ipa_struct_reorg::check_other_side (srde
+   srtype *t1 = find_type (inner_type (t));
+   if (t1 == type)
+     {
++      /* In Complete Struct Relayout opti, if lhs type is the same
++	 as rhs type, we could return without any harm.  */
++      if (current_mode == COMPLETE_STRUCT_RELAYOUT)
++	{
++	  return;
++	}
++
+       tree base;
+       bool indirect;
+       srtype *type1;
+@@ -2298,8 +3154,11 @@ ipa_struct_reorg::check_use (srdecl *dec
+       tree rhs1 = gimple_cond_lhs (stmt);
+       tree rhs2 = gimple_cond_rhs (stmt);
+       tree orhs = rhs1;
+-      if (gimple_cond_code (stmt) != EQ_EXPR
+-          && gimple_cond_code (stmt) != NE_EXPR)
++      enum tree_code code = gimple_cond_code (stmt);
++      if (code != EQ_EXPR && code != NE_EXPR
++	  && (current_mode != COMPLETE_STRUCT_RELAYOUT
++	      || (code != LT_EXPR && code != LE_EXPR
++		  && code != GT_EXPR && code != GE_EXPR)))
+ 	{
+ 	  mark_expr_escape (rhs1, escape_non_eq, stmt);
+ 	  mark_expr_escape (rhs2, escape_non_eq, stmt);
+@@ -2329,8 +3188,11 @@ ipa_struct_reorg::check_use (srdecl *dec
+       tree rhs1 = gimple_assign_rhs1 (stmt);
+       tree rhs2 = gimple_assign_rhs2 (stmt);
+       tree orhs = rhs1;
+-      if (gimple_assign_rhs_code (stmt) != EQ_EXPR
+-          && gimple_assign_rhs_code (stmt) != NE_EXPR)
++      enum tree_code code = gimple_assign_rhs_code (stmt);
++      if (code != EQ_EXPR && code != NE_EXPR
++	  && (current_mode != COMPLETE_STRUCT_RELAYOUT
++	      || (code != LT_EXPR && code != LE_EXPR
++		  && code != GT_EXPR && code != GE_EXPR)))
+ 	{
+ 	  mark_expr_escape (rhs1, escape_non_eq, stmt);
+ 	  mark_expr_escape (rhs2, escape_non_eq, stmt);
+@@ -2727,8 +3589,11 @@ ipa_struct_reorg::propagate_escape (void
+ void
+ ipa_struct_reorg::prune_escaped_types (void)
+ {
+-  detect_cycles ();
+-  propagate_escape ();
++  if (current_mode != COMPLETE_STRUCT_RELAYOUT)
++    {
++      detect_cycles ();
++      propagate_escape ();
++    }
+ 
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+@@ -3850,16 +4715,82 @@ ipa_struct_reorg::rewrite_functions (voi
+ }
+ 
+ unsigned int
+-ipa_struct_reorg::execute (void)
++ipa_struct_reorg::execute_struct_relayout (void)
+ {
+-  /* FIXME: If there is a top-level inline-asm, the pass immediately returns. */
+-  if (symtab->first_asm_symbol ())
+-    return 0;
+-  record_accesses ();
+-  prune_escaped_types ();
+-  analyze_types ();
++  unsigned retval = 0;
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      tree type = types[i]->type;
++      if (TYPE_FIELDS (type) == NULL)
++	{
++	  continue;
++	}
++      if (types[i]->has_alloc_array != 1)
++	{
++	  continue;
++	}
++      if (types[i]->chain_type)
++	{
++	  continue;
++	}
++      retval |= ipa_struct_relayout (type, this).execute ();
++    }
++
++  if (dump_file)
++    {
++      if (transformed)
++	{
++	  fprintf (dump_file, "\nNumber of structures to transform in "
++		   "Complete Structure Relayout is %d\n", transformed);
++	}
++      else
++	{
++	  fprintf (dump_file, "\nNo structures to transform in "
++		   "Complete Structure Relayout.\n");
++	}
++    }
+ 
+-  return rewrite_functions ();
++  return retval;
++}
++
++unsigned int
++ipa_struct_reorg::execute (enum srmode mode)
++{
++  unsigned int ret = 0;
++
++  if (mode == NORMAL)
++    {
++      current_mode = NORMAL;
++      /* FIXME: If there is a top-level inline-asm,
++	 the pass immediately returns.  */
++      if (symtab->first_asm_symbol ())
++	{
++	  return 0;
++	}
++      record_accesses ();
++      prune_escaped_types ();
++      analyze_types ();
++
++      ret = rewrite_functions ();
++    }
++  else if (mode == COMPLETE_STRUCT_RELAYOUT)
++    {
++      if (dump_file)
++	{
++	  fprintf (dump_file, "\n\nTry Complete Struct Relayout:\n");
++	}
++      current_mode = COMPLETE_STRUCT_RELAYOUT;
++      if (symtab->first_asm_symbol ())
++	{
++	  return 0;
++	}
++      record_accesses ();
++      prune_escaped_types ();
++
++      ret = execute_struct_relayout ();
++    }
++
++  return ret;
+ }
+ 
+ const pass_data pass_data_ipa_struct_reorg =
+@@ -3884,17 +4815,27 @@ public:
+ 
+   /* opt_pass methods: */
+   virtual bool gate (function *);
+-  virtual unsigned int execute (function *) { return ipa_struct_reorg ().execute(); }
++  virtual unsigned int execute (function *)
++  {
++    unsigned int ret = 0;
++    ret = ipa_struct_reorg ().execute (NORMAL);
++    if (!ret)
++      {
++	ret = ipa_struct_reorg ().execute (COMPLETE_STRUCT_RELAYOUT);
++      }
++    return ret;
++  }
+ 
+ }; // class pass_ipa_struct_reorg
+ 
+ bool
+ pass_ipa_struct_reorg::gate (function *)
+ {
+-  return (optimize
++  return (optimize >= 3
+ 	  && flag_ipa_struct_reorg
+ 	  /* Don't bother doing anything if the program has errors.  */
+-	  && !seen_error ());
++	  && !seen_error ()
++	  && flag_lto_partition == LTO_PARTITION_ONE);
+ }
+ 
+ } // anon namespace
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h	2020-07-18 05:11:11.548000000 -0400
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h	2020-07-18 05:16:25.928000000 -0400
+@@ -121,6 +121,7 @@ public:
+ 
+   tree newtype[max_split];
+   bool visited;
++  int has_alloc_array;
+ 
+   // Constructors
+   srtype(tree type);
+@@ -232,4 +233,34 @@ struct srdecl
+ 
+ } // namespace struct_reorg
+ 
++
++namespace struct_relayout {
++
++const int min_relayout_split = 8;
++const int max_relayout_split = 16;
++
++struct csrtype
++{
++  tree type;
++  unsigned HOST_WIDE_INT old_size;
++  unsigned HOST_WIDE_INT new_size;
++  unsigned field_count;
++  tree struct_size;
++
++  // Constructors
++  csrtype ()
++    : type (NULL),
++      old_size (0),
++      new_size (0),
++      field_count (0),
++      struct_size (NULL)
++  {}
++
++  // Methods
++  unsigned calculate_field_num (tree field_offset);
++  void init_type_info (void);
++};
++
++} // namespace struct_relayout
++
+ #endif
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c b/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c
+--- a/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/complete_struct_relayout.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,60 @@
++// { dg-do run }
++
++#include <stdlib.h>
++#include <stdio.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10000;
++node_p n;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].l = n[i].a;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].l != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform in Complete Structure Relayout is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c
+--- a/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-1.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,46 @@
++#include <stdlib.h>
++#include <stdio.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 1;
++node_p n;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c
+--- a/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-2.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,59 @@
++#include <stdlib.h>
++#include <stdio.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10;
++node_p n;
++node_p m;
++
++int main()
++{
++  int i;
++  for (i = 0; i < MAX / 5; i++)
++    {
++      n = (node_p) calloc(MAX, sizeof(node_t));
++      if (i == 0)
++	{
++	  m = n;
++	}
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      m[i].a = 50;
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c
+--- a/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/csr_allocation-3.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,77 @@
++#include <stdlib.h>
++#include <stdio.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10;
++node_p n;
++node_p m;
++
++void test (int, int) __attribute__((noinline));
++
++void
++test (int num, int flag)
++{
++  if (num <= 0)
++    {
++      return;
++    }
++  n = (node_p) calloc (num, sizeof (node_t));
++  if (flag)
++    {
++      m = n;
++    }
++  return;
++}
++
++int
++main ()
++{
++  test (MAX, 1);
++  test (MAX, 0);
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = 100;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      m[i].a = 50;
++    }
++
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (m[i].a != 50)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform in Complete Structure Relayout." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c
+--- a/gcc/testsuite/gcc.dg/struct/csr_cast_int.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/csr_cast_int.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,52 @@
++// { dg-do run }
++
++#include <stdlib.h>
++#include <stdio.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 100;
++node_p n;
++unsigned long y;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].b = 50;
++    }
++
++  node_p x = &n[5];
++  y = (unsigned long) x;
++  y += 8;
++
++  if (*((unsigned long*) y) != 50)
++    {
++      abort ();
++    }
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes a cast from/to intergral type\"" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c
+--- a/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/csr_separate_instance.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,48 @@
++#include <stdlib.h>
++#include <stdio.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++  node_p c;
++  node_p d;
++  long e;
++  long f;
++  long g;
++  long h;
++  long i;
++  long j;
++  long k;
++  long l;
++  int m;
++  int n;
++};
++
++const int MAX = 10000;
++node_p n;
++node_t t;
++
++int
++main ()
++{
++  n = (node_p) calloc (MAX, sizeof (node_t));
++  t.a = 100;
++
++  for (int i = 0; i < MAX; i++)
++    {
++      n[i].a = t.a;
++    }
++  for (int i = 0; i < MAX; i++)
++    {
++      if (n[i].a != 100)
++	{
++	  abort ();
++	}
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a separate instance\"" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c
+--- a/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/sr_address_of_field.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,37 @@
++/* { dg-do run } */
++
++static struct S {
++  int *p1;
++  int *p2;
++} s;
++
++typedef __UINTPTR_TYPE__ uintptr_t;
++
++int
++foo ()
++{
++  int i = 1;
++  int j = 2;
++  struct S s;
++  int **p;
++  s.p1 = &i;
++  s.p2 = &j;
++  p = &s.p1;
++  uintptr_t pi = (uintptr_t) p;
++  pi = pi + sizeof (int *);
++  p = (int **)pi;
++  **p = 3;
++  return j;
++}
++
++int
++main ()
++{
++  if (foo () != 3)
++    {
++      __builtin_abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct S has escaped: \"Type escapes via taking the address of field\"" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c
+--- a/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/sr_maxmin_expr.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,25 @@
++// { dg-do compile }
++
++#include <stdlib.h>
++
++struct S {
++  unsigned long a;
++  unsigned long b;
++};
++
++struct S* s;
++struct S* t = (struct S*) 1000;
++
++int
++main ()
++{
++  s = (struct S*) calloc (1000, sizeof (struct S));
++  s = s > t ? s : t;
++  if (s == 0)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c
+--- a/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/sr_pointer_minus.c	2020-07-18 05:16:25.928000000 -0400
+@@ -0,0 +1,33 @@
++// { dg-do compile }
++
++#include <stdlib.h>
++
++typedef struct node node_t;
++typedef struct node* node_p;
++
++struct node {
++  unsigned long a;
++  unsigned long b;
++};
++
++int max;
++int x;
++
++node_p n;
++node_p z;
++
++int
++main ()
++{
++  n = (node_p) calloc (max, sizeof (node_t));
++
++  node_p xp = &n[x];
++
++  if (xp - z == 10)
++    {
++      abort ();
++    }
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "struct node has escaped: \"Type escapes via a unhandled rewrite stmt\"" "struct_reorg" } } */
diff --git a/cse-in-vectorization.patch b/cse-in-vectorization.patch
new file mode 100644
index 0000000..ac8e7a2
--- /dev/null
+++ b/cse-in-vectorization.patch
@@ -0,0 +1,68 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-vect-CSE-for-bump-and-offset-in-strided-load-store-o.patch
+4a31a8add56d49867c187d90b3a89e97634543c2
+
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/pr95199.c b/gcc/testsuite/gcc.target/aarch64/sve/pr95199.c
+new file mode 100644
+index 00000000000..adcd5124a7c
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sve/pr95199.c
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -march=armv8.2-a+sve -fdump-tree-vect" } */
++
++void
++foo (double *a, double *b, double m, int inc_x, int inc_y)
++{
++  int ix = 0, iy = 0;
++  for (int i = 0; i < 1000; ++i)
++    {
++      a[ix] += m * b[iy];
++      ix += inc_x;
++      iy += inc_y;
++    }
++  return ;
++}
++
++/* { dg-final { scan-tree-dump-times "VEC_SERIES_EXPR" 2 "vect" } } */
+diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+index 4a0a907fcb4..c9174395fca 100644
+--- a/gcc/tree-vect-stmts.c
++++ b/gcc/tree-vect-stmts.c
+@@ -2846,16 +2846,12 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
+ 				 tree *dataref_bump, tree *vec_offset)
+ {
+   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+-  struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+-  gimple_seq stmts;
+
+   tree bump = size_binop (MULT_EXPR,
+ 			  fold_convert (sizetype, DR_STEP (dr)),
+ 			  size_int (TYPE_VECTOR_SUBPARTS (vectype)));
+-  *dataref_bump = force_gimple_operand (bump, &stmts, true, NULL_TREE);
+-  if (stmts)
+-    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
++  *dataref_bump = cse_and_gimplify_to_preheader (loop_vinfo, bump);
+
+   /* The offset given in GS_INFO can have pointer type, so use the element
+      type of the vector instead.  */
+@@ -2866,13 +2862,11 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
+   tree step = size_binop (EXACT_DIV_EXPR, DR_STEP (dr),
+ 			  ssize_int (gs_info->scale));
+   step = fold_convert (offset_type, step);
+-  step = force_gimple_operand (step, &stmts, true, NULL_TREE);
+
+   /* Create {0, X, X*2, X*3, ...}.  */
+-  *vec_offset = gimple_build (&stmts, VEC_SERIES_EXPR, offset_vectype,
+-			      build_zero_cst (offset_type), step);
+-  if (stmts)
+-    gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
++  tree offset = fold_build2 (VEC_SERIES_EXPR, offset_vectype,
++			     build_zero_cst (offset_type), step);
++  *vec_offset = cse_and_gimplify_to_preheader (loop_vinfo, offset);
+ }
+
+ /* Return the amount that should be added to a vector pointer to move
diff --git a/delete-incorrect-smw.patch b/delete-incorrect-smw.patch
index 087166f..44ed526 100644
--- a/delete-incorrect-smw.patch
+++ b/delete-incorrect-smw.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-middle-end-91195-incorrect-may-be-used-uniniti.patch
+06e8db10cd80d88fb3a6afedf2c35da6c1fa6d85
+
 diff -uprN a/gcc/testsuite/gcc.dg/pr91195.c b/gcc/testsuite/gcc.dg/pr91195.c
 new file mode 100644
 --- /dev/null
diff --git a/enable-simd-math.patch b/enable-simd-math.patch
new file mode 100644
index 0000000..46f7d3d
--- /dev/null
+++ b/enable-simd-math.patch
@@ -0,0 +1,34 @@
+diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2020-07-06 17:20:30.368000000 +0800
++++ b/gcc/config/aarch64/aarch64.c	2020-07-06 20:02:39.480000000 +0800
+@@ -18860,8 +18860,12 @@ aarch64_simd_clone_compute_vecsize_and_s
+   elt_bits = GET_MODE_BITSIZE (SCALAR_TYPE_MODE (base_type));
+   if (clonei->simdlen == 0)
+     {
+-      count = 2;
+-      vec_bits = (num == 0 ? 64 : 128);
++      /* Currently mathlib or sleef hasn't provide function for V2SF mode
++      simdclone of single precision functions. (e.g._ZCVnN2v_expf)
++      Therefore this mode is disabled by default to avoid link error.
++      Use -msimdmath-64 option to enable this mode.  */
++      count = flag_simdmath_64 ? 2 : 1;
++      vec_bits = ((num == 0 && flag_simdmath_64) ? 64 : 128);
+       clonei->simdlen = vec_bits / elt_bits;
+     }
+   else
+diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+--- a/gcc/config/aarch64/aarch64.opt	2020-07-06 17:20:30.364000000 +0800
++++ b/gcc/config/aarch64/aarch64.opt	2020-07-06 20:02:39.480000000 +0800
+@@ -186,6 +186,12 @@ precision of square root results to abou
+ single precision and to 32 bits for double precision.
+ If enabled, it implies -mlow-precision-recip-sqrt.
+ 
++msimdmath-64
++Target Var(flag_simdmath_64) Optimization
++Allow compiler to generate V2SF 64 bits simdclone of math functions,
++which is not currently supported in mathlib or sleef.
++Therefore this option is disabled by default.
++
+ mlow-precision-div
+ Target Var(flag_mlow_precision_div) Optimization
+ Enable the division approximation.  Enabling this reduces
diff --git a/fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch b/fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch
new file mode 100644
index 0000000..01a33e3
--- /dev/null
+++ b/fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch
@@ -0,0 +1,123 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-optimization-39612-avoid-issueing-loads-in-SM-w.patch
+f9e1ea10e657af9fb02fafecf1a600740fd34409
+
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr39612.c	2020-08-17 11:14:08.000000000 +0800
+@@ -0,0 +1,21 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -fdump-tree-lim2-details -Wuninitialized" } */
++
++void foo(int *);
++void f2(int dst[3], int R)
++{
++  int i, inter[2];
++
++  for (i = 1; i < R; i++) {
++    if (i & 8)
++      {
++	inter[0] = 1;
++	inter[1] = 1;
++      }
++  }
++
++  foo(inter);
++}
++
++/* { dg-final { scan-tree-dump-times "Executing store motion" 2 "lim2" } } */
++/* { dg-final { scan-tree-dump-not " = inter\\\[\[0-1\]\\\];" "lim2" } } */
+diff -Nurp a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
+--- a/gcc/tree-ssa-loop-im.c	2020-08-17 11:13:58.436000000 +0800
++++ b/gcc/tree-ssa-loop-im.c	2020-08-17 11:14:08.000000000 +0800
+@@ -127,6 +127,8 @@ struct im_mem_ref
+ 
+   bitmap stored;		/* The set of loops in that this memory location
+ 				   is stored to.  */
++  bitmap loaded;		/* The set of loops in that this memory location
++				   is loaded from.  */
+   vec<mem_ref_loc>		accesses_in_loop;
+ 				/* The locations of the accesses.  Vector
+ 				   indexed by the loop number.  */
+@@ -1394,6 +1396,7 @@ mem_ref_alloc (ao_ref *mem, unsigned has
+   ref->ref_decomposed = false;
+   ref->hash = hash;
+   ref->stored = NULL;
++  ref->loaded = NULL;
+   bitmap_initialize (&ref->indep_loop, &lim_bitmap_obstack);
+   bitmap_initialize (&ref->dep_loop, &lim_bitmap_obstack);
+   ref->accesses_in_loop.create (1);
+@@ -1434,6 +1437,27 @@ mark_ref_stored (im_mem_ref *ref, struct
+     loop = loop_outer (loop);
+ }
+ 
++/* Set the LOOP bit in REF loaded bitmap and allocate that if
++   necessary.  Return whether a bit was changed.  */
++
++static bool
++set_ref_loaded_in_loop (im_mem_ref *ref, class loop *loop)
++{
++  if (!ref->loaded)
++    ref->loaded = BITMAP_ALLOC (&lim_bitmap_obstack);
++  return bitmap_set_bit (ref->loaded, loop->num);
++}
++
++/* Marks reference REF as loaded in LOOP.  */
++
++static void
++mark_ref_loaded (im_mem_ref *ref, class loop *loop)
++{
++  while (loop != current_loops->tree_root
++	 && set_ref_loaded_in_loop (ref, loop))
++    loop = loop_outer (loop);
++}
++
+ /* Gathers memory references in statement STMT in LOOP, storing the
+    information about them in the memory_accesses structure.  Marks
+    the vops accessed through unrecognized statements there as
+@@ -1569,6 +1593,8 @@ gather_mem_refs_stmt (struct loop *loop,
+       bitmap_set_bit (&memory_accesses.refs_stored_in_loop[loop->num], ref->id);
+       mark_ref_stored (ref, loop);
+     }
++  else
++    mark_ref_loaded (ref, loop);
+   init_lim_data (stmt)->ref = ref->id;
+   return;
+ }
+@@ -1956,6 +1982,8 @@ execute_sm_if_changed (edge ex, tree mem
+   gsi = gsi_start_bb (then_bb);
+   /* Insert actual store.  */
+   stmt = gimple_build_assign (unshare_expr (mem), tmp_var);
++  /* Make sure to not warn about maybe-uninit uses of tmp_var here.  */
++  gimple_set_no_warning (stmt, true);
+   gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+ 
+   edge e1 = single_succ_edge (new_bb);
+@@ -2102,14 +2130,17 @@ execute_sm (struct loop *loop, vec<edge>
+      by move_computations after all dependencies.  */
+   gsi = gsi_for_stmt (first_mem_ref_loc (loop, ref)->stmt);
+ 
+-  /* FIXME/TODO: For the multi-threaded variant, we could avoid this
+-     load altogether, since the store is predicated by a flag.  We
+-     could, do the load only if it was originally in the loop.  */
+-  load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+-  lim_data = init_lim_data (load);
+-  lim_data->max_loop = loop;
+-  lim_data->tgt_loop = loop;
+-  gsi_insert_before (&gsi, load, GSI_SAME_STMT);
++  /* Avoid doing a load if there was no load of the ref in the loop.
++     Esp. when the ref is not always stored we cannot optimize it
++     away later.  */
++  if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
++    {
++      load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
++      lim_data = init_lim_data (load);
++      lim_data->max_loop = loop;
++      lim_data->tgt_loop = loop;
++      gsi_insert_before (&gsi, load, GSI_SAME_STMT);
++    }
+ 
+   if (multi_threaded_model_p)
+     {
diff --git a/fix-ICE-during-pass-ccp.patch b/fix-ICE-during-pass-ccp.patch
index 67d332d..15cb1df 100644
--- a/fix-ICE-during-pass-ccp.patch
+++ b/fix-ICE-during-pass-ccp.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-PR-tree-optimization-94574-aarch64-ICE-during-GIMPLE.patch
+f65cecabc32fe12b024253502af953e657e1a878
+
 diff -uprN a/gcc/testsuite/gcc.dg/pr94574.c b/gcc/testsuite/gcc.dg/pr94574.c
 --- a/gcc/testsuite/gcc.dg/pr94574.c	1970-01-01 00:00:00.000000000 +0000
 +++ b/gcc/testsuite/gcc.dg/pr94574.c	2020-04-15 21:08:48.972000000 +0000
diff --git a/fix-ICE-in-compute_live_loop_exits.patch b/fix-ICE-in-compute_live_loop_exits.patch
new file mode 100644
index 0000000..013ec83
--- /dev/null
+++ b/fix-ICE-in-compute_live_loop_exits.patch
@@ -0,0 +1,76 @@
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-1.c	2020-07-09 11:05:23.136000000 +0800
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O1 -fexceptions -fnon-call-exceptions -ftree-loop-vectorize -fno-tree-sink --param dse-max-alias-queries-per-store=2 -w" } */
++
++void
++di (int y9, int qw)
++{
++  if ((int) &y9 != 0)
++    {
++      int py;
++      int **fq = &py;
++
++      while (qw < 1)
++        {
++          if ((0 < (**fq ? **fq : (**fq = 1))) / (**fq = y9))
++            ;
++
++          ++qw;
++        }
++    }
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92085-2.c	2020-07-09 11:05:23.136000000 +0800
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-options "-O1 -ftree-loop-vectorize -fno-tree-dce -fno-tree-sink -w" } */
++
++int a8;
++
++void
++c1 (int oz, int dk, int ub)
++{
++  int *hd = 0;
++  long int *th = &dk;
++
++  while (ub < 1)
++    {
++      oz || dk;
++      ++ub;
++    }
++
++  while (oz < 2)
++    {
++      long int *lq = &oz;
++
++      (*hd < (*lq = *th)) < oz;
++
++      if (oz == 0)
++        *th = a8 = oz;
++
++      *lq = 0;
++    }
++}
+diff -Nurp a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
+--- a/gcc/tree-if-conv.c	2020-07-09 11:04:58.832000000 +0800
++++ b/gcc/tree-if-conv.c	2020-07-09 11:05:23.136000000 +0800
+@@ -2984,10 +2984,11 @@ ifcvt_local_dce (class loop *loop)
+ 	  ao_ref write;
+ 	  ao_ref_init (&write, lhs);
+ 
+-          if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
+-              == DSE_STORE_DEAD)
+-            delete_dead_or_redundant_assignment (&gsi, "dead");
+-	  gsi_next (&gsi);
++	  if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
++	      == DSE_STORE_DEAD)
++	    delete_dead_or_redundant_assignment (&gsi, "dead");
++	  else
++	    gsi_next (&gsi);
+ 	  continue;
+ 	}
+ 
diff --git a/fix-ICE-in-copy_reference_ops_from_ref.patch b/fix-ICE-in-copy_reference_ops_from_ref.patch
new file mode 100644
index 0000000..52c660d
--- /dev/null
+++ b/fix-ICE-in-copy_reference_ops_from_ref.patch
@@ -0,0 +1,70 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-ssa-sccvn.c-copy_reference_ops_from_ref-Adjust-.patch
+2f215d2176608467aeee73b245beedfc60836b71
+
+diff -Nurp gcc-9.3.0_org/gcc/tree-ssa-sccvn.c gcc-9.3.0/gcc/tree-ssa-sccvn.c
+--- gcc-9.3.0_org/gcc/tree-ssa-sccvn.c	2020-08-18 15:31:39.308000000 +0800
++++ gcc-9.3.0/gcc/tree-ssa-sccvn.c	2020-08-18 15:32:03.456000000 +0800
+@@ -797,39 +797,6 @@ vn_reference_eq (const_vn_reference_t co
+ static void
+ copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result)
+ {
+-  if (TREE_CODE (ref) == TARGET_MEM_REF)
+-    {
+-      vn_reference_op_s temp;
+-
+-      result->reserve (3);
+-
+-      memset (&temp, 0, sizeof (temp));
+-      temp.type = TREE_TYPE (ref);
+-      temp.opcode = TREE_CODE (ref);
+-      temp.op0 = TMR_INDEX (ref);
+-      temp.op1 = TMR_STEP (ref);
+-      temp.op2 = TMR_OFFSET (ref);
+-      temp.off = -1;
+-      temp.clique = MR_DEPENDENCE_CLIQUE (ref);
+-      temp.base = MR_DEPENDENCE_BASE (ref);
+-      result->quick_push (temp);
+-
+-      memset (&temp, 0, sizeof (temp));
+-      temp.type = NULL_TREE;
+-      temp.opcode = ERROR_MARK;
+-      temp.op0 = TMR_INDEX2 (ref);
+-      temp.off = -1;
+-      result->quick_push (temp);
+-
+-      memset (&temp, 0, sizeof (temp));
+-      temp.type = NULL_TREE;
+-      temp.opcode = TREE_CODE (TMR_BASE (ref));
+-      temp.op0 = TMR_BASE (ref);
+-      temp.off = -1;
+-      result->quick_push (temp);
+-      return;
+-    }
+-
+   /* For non-calls, store the information that makes up the address.  */
+   tree orig = ref;
+   while (ref)
+@@ -859,6 +826,20 @@ copy_reference_ops_from_ref (tree ref, v
+ 	  temp.base = MR_DEPENDENCE_BASE (ref);
+ 	  temp.reverse = REF_REVERSE_STORAGE_ORDER (ref);
+ 	  break;
++	case TARGET_MEM_REF:
++	  /* The base address gets its own vn_reference_op_s structure.  */
++	  temp.op0 = TMR_INDEX (ref);
++	  temp.op1 = TMR_STEP (ref);
++	  temp.op2 = TMR_OFFSET (ref);
++	  temp.clique = MR_DEPENDENCE_CLIQUE (ref);
++	  temp.base = MR_DEPENDENCE_BASE (ref);
++	  result->safe_push (temp);
++	  memset (&temp, 0, sizeof (temp));
++	  temp.type = NULL_TREE;
++	  temp.opcode = ERROR_MARK;
++	  temp.op0 = TMR_INDEX2 (ref);
++	  temp.off = -1;
++	  break;
+ 	case BIT_FIELD_REF:
+ 	  /* Record bits, position and storage order.  */
+ 	  temp.op0 = TREE_OPERAND (ref, 1);
diff --git a/fix-ICE-in-declare-return-variable.patch b/fix-ICE-in-declare-return-variable.patch
new file mode 100644
index 0000000..4faa0cb
--- /dev/null
+++ b/fix-ICE-in-declare-return-variable.patch
@@ -0,0 +1,31 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-ipa-92409-r277920-causes-ICE-in-gcc.dg-cast-fu.patch
+e7399b548c866ee2e408e0855b3be794c056fb1d
+
+diff -uprN a/gcc/tree-inline.c b/gcc/tree-inline.c
+--- a/gcc/tree-inline.c
++++ b/gcc/tree-inline.c
+@@ -3593,7 +3593,9 @@ declare_return_variable (copy_body_data *id, tree return_slot, tree modify_dest,
+      vs. the call expression.  */
+   if (modify_dest)
+     caller_type = TREE_TYPE (modify_dest);
+-  else
++  else if (return_slot)
++    caller_type = TREE_TYPE (return_slot);
++  else /* No LHS on the call.  */
+     caller_type = TREE_TYPE (TREE_TYPE (callee));
+ 
+   /* We don't need to do anything for functions that don't return anything.  */
+@@ -3634,6 +3636,10 @@ declare_return_variable (copy_body_data *id, tree return_slot, tree modify_dest,
+ 	  && !DECL_GIMPLE_REG_P (result)
+ 	  && DECL_P (var))
+ 	DECL_GIMPLE_REG_P (var) = 0;
++
++      if (!useless_type_conversion_p (callee_type, caller_type))
++	var = build1 (VIEW_CONVERT_EXPR, callee_type, var);
++
+       use = NULL;
+       goto done;
+     }
diff --git a/fix-ICE-in-exact_div.patch b/fix-ICE-in-exact_div.patch
new file mode 100644
index 0000000..a606157
--- /dev/null
+++ b/fix-ICE-in-exact_div.patch
@@ -0,0 +1,54 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92555-ICE-in-exact_div-at-po.patch
+f1e0c7e0eb3eafb122fc3d00242828c82a9286a2
+
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92555.c b/gcc/testsuite/gcc.dg/vect/pr92555.c
+--- a/gcc/testsuite/gcc.dg/vect/pr92555.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/pr92555.c	2020-08-11 09:36:18.060000000 +0800
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-fwrapv" } */
++
++signed char rq;
++
++signed char
++pu (int tr, int al)
++{
++  signed char x8;
++
++  while (tr != 0)
++    {
++      for (x8 = 0; x8 >= 0; x8 += 2)
++        ;
++
++      rq ^= al ^ 1;
++      ++x8;
++      ++tr;
++    }
++
++  return x8;
++}
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-11 09:35:10.952000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-11 09:36:18.064000000 +0800
+@@ -1415,6 +1415,18 @@ vect_update_vf_for_slp (loop_vec_info lo
+   for (i = 0; i < nbbs; i++)
+     {
+       basic_block bb = bbs[i];
++      for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
++	   gsi_next (&si))
++	{
++	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (si.phi ());
++	  if (!stmt_info)
++	    continue;
++	  if ((STMT_VINFO_RELEVANT_P (stmt_info)
++	       || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
++	      && !PURE_SLP_STMT (stmt_info))
++	    /* STMT needs both SLP and loop-based vectorization.  */
++	    only_slp_in_loop = false;
++	}
+       for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
+ 	   gsi_next (&si))
+ 	{
diff --git a/fix-ICE-in-gimple_op.patch b/fix-ICE-in-gimple_op.patch
new file mode 100644
index 0000000..e6949dc
--- /dev/null
+++ b/fix-ICE-in-gimple_op.patch
@@ -0,0 +1,65 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92512-ICE-in-gimple_op-at-gi.patch
+b9f71c51cd578c6ab6ad2986edb80ba48aa477bc
+
+diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92512.c b/gcc/testsuite/gcc.dg/torture/pr92512.c
+--- a/gcc/testsuite/gcc.dg/torture/pr92512.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/torture/pr92512.c	2020-08-10 20:53:50.404000000 +0800
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++long int
++nl (long int fy, int k3, int zr)
++{
++  while (k3 < 1)
++    {
++      if (zr == 0)
++        fy = 0;
++
++      fy *= fy < zr;
++      ++k3;
++    }
++
++  return fy;
++}
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-10 20:53:42.636000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-10 20:53:50.404000000 +0800
+@@ -2931,9 +2931,11 @@ pop:
+ 	  /* The following make sure we can compute the operand index
+ 	     easily plus it mostly disallows chaining via COND_EXPR condition
+ 	     operands.  */
+-	  || (gimple_assign_rhs1 (use_stmt) != op
+-	      && gimple_assign_rhs2 (use_stmt) != op
+-	      && gimple_assign_rhs3 (use_stmt) != op))
++	  || (gimple_assign_rhs1_ptr (use_stmt) != path[i].second->use
++	      && (gimple_num_ops (use_stmt) <= 2
++		  || gimple_assign_rhs2_ptr (use_stmt) != path[i].second->use)
++	      && (gimple_num_ops (use_stmt) <= 3
++		  || gimple_assign_rhs3_ptr (use_stmt) != path[i].second->use)))
+ 	{
+ 	  fail = true;
+ 	  break;
+@@ -2946,7 +2948,18 @@ pop:
+       FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
+ 	if (!is_gimple_debug (op_use_stmt)
+ 	    && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
+-	  cnt++;
++	  {
++	    /* We want to allow x + x but not x < 1 ? x : 2.  */
++	    if (is_gimple_assign (op_use_stmt)
++		&& gimple_assign_rhs_code (op_use_stmt) == COND_EXPR)
++	      {
++		use_operand_p use_p;
++		FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
++		  cnt++;
++	      }
++	    else
++	      cnt++;
++	  }
+       if (cnt != 1)
+ 	{
+ 	  fail = true;
diff --git a/fix-ICE-in-model_update_limit_points_in_group.patch b/fix-ICE-in-model_update_limit_points_in_group.patch
new file mode 100644
index 0000000..2692196
--- /dev/null
+++ b/fix-ICE-in-model_update_limit_points_in_group.patch
@@ -0,0 +1,248 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-88828-Inefficient-update-of-.patch
+3bc104bdb4b5aa99ff6dceb246beaa65b012c5ac
+
+diff -Nurp a/gcc/testsuite/gcc.target/i386/pr88828-0.c b/gcc/testsuite/gcc.target/i386/pr88828-0.c
+--- a/gcc/testsuite/gcc.target/i386/pr88828-0.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.target/i386/pr88828-0.c	2020-08-24 21:08:23.028000000 +0800
+@@ -0,0 +1,27 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -msse4.2" } */
++
++typedef int v4si __attribute__((vector_size(16)));
++typedef float v4sf __attribute__((vector_size(16)));
++
++v4si foo (v4si x)
++{
++  return (v4si){ x[0], 1, x[2], 3 };
++}
++
++/* { dg-final { scan-assembler "pblendw" } } */
++
++v4si bar (v4sf x)
++{
++  return (v4si){ 1, x[1], x[2], 3 };
++}
++
++/* { dg-final { scan-assembler "cvttps2dq" } } */
++/* { dg-final { scan-assembler "pblendw" } } */
++
++v4si baz (v4si x)
++{
++  return (v4si) { x[1], x[2], x[3], 0 };
++}
++
++/* { dg-final { scan-assembler "psrldq" } } */
+diff -Nurp a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
+--- a/gcc/tree-ssa-forwprop.c	2020-08-24 21:07:59.800000000 +0800
++++ b/gcc/tree-ssa-forwprop.c	2020-08-24 21:08:23.028000000 +0800
+@@ -1997,17 +1997,54 @@ simplify_permutation (gimple_stmt_iterat
+   return 0;
+ }
+ 
++/* Get the BIT_FIELD_REF definition of VAL, if any, looking through
++   conversions with code CONV_CODE or update it if still ERROR_MARK.
++   Return NULL_TREE if no such matching def was found.  */
++
++static tree
++get_bit_field_ref_def (tree val, enum tree_code &conv_code)
++{
++  if (TREE_CODE (val) != SSA_NAME)
++    return NULL_TREE ;
++  gimple *def_stmt = get_prop_source_stmt (val, false, NULL);
++  if (!def_stmt)
++    return NULL_TREE;
++  enum tree_code code = gimple_assign_rhs_code (def_stmt);
++  if (code == FLOAT_EXPR
++      || code == FIX_TRUNC_EXPR)
++    {
++      tree op1 = gimple_assign_rhs1 (def_stmt);
++      if (conv_code == ERROR_MARK)
++	{
++	  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (val))),
++			GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1)))))
++	    return NULL_TREE;
++	  conv_code = code;
++	}
++      else if (conv_code != code)
++	return NULL_TREE;
++      if (TREE_CODE (op1) != SSA_NAME)
++	return NULL_TREE;
++      def_stmt = SSA_NAME_DEF_STMT (op1);
++      if (! is_gimple_assign (def_stmt))
++	return NULL_TREE;
++      code = gimple_assign_rhs_code (def_stmt);
++    }
++  if (code != BIT_FIELD_REF)
++    return NULL_TREE;
++  return gimple_assign_rhs1 (def_stmt);
++}
++
+ /* Recognize a VEC_PERM_EXPR.  Returns true if there were any changes.  */
+ 
+ static bool
+ simplify_vector_constructor (gimple_stmt_iterator *gsi)
+ {
+   gimple *stmt = gsi_stmt (*gsi);
+-  gimple *def_stmt;
+   tree op, op2, orig[2], type, elem_type;
+   unsigned elem_size, i;
+   unsigned HOST_WIDE_INT nelts;
+-  enum tree_code code, conv_code;
++  enum tree_code conv_code;
+   constructor_elt *elt;
+   bool maybe_ident;
+ 
+@@ -2027,6 +2064,9 @@ simplify_vector_constructor (gimple_stmt
+   orig[1] = NULL;
+   conv_code = ERROR_MARK;
+   maybe_ident = true;
++  tree one_constant = NULL_TREE;
++  auto_vec<tree> constants;
++  constants.safe_grow_cleared (nelts);
+   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
+     {
+       tree ref, op1;
+@@ -2034,68 +2074,57 @@ simplify_vector_constructor (gimple_stmt
+       if (i >= nelts)
+ 	return false;
+ 
+-      if (TREE_CODE (elt->value) != SSA_NAME)
+-	return false;
+-      def_stmt = get_prop_source_stmt (elt->value, false, NULL);
+-      if (!def_stmt)
+-	return false;
+-      code = gimple_assign_rhs_code (def_stmt);
+-      if (code == FLOAT_EXPR
+-	  || code == FIX_TRUNC_EXPR)
++      op1 = get_bit_field_ref_def (elt->value, conv_code);
++      if (op1)
+ 	{
+-	  op1 = gimple_assign_rhs1 (def_stmt);
+-	  if (conv_code == ERROR_MARK)
++	  ref = TREE_OPERAND (op1, 0);
++	  unsigned int j;
++	  for (j = 0; j < 2; ++j)
+ 	    {
+-	      if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (elt->value))),
+-			    GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (op1)))))
+-		return false;
+-	      conv_code = code;
++	      if (!orig[j])
++		{
++		  if (TREE_CODE (ref) != SSA_NAME)
++		    return false;
++		  if (! VECTOR_TYPE_P (TREE_TYPE (ref))
++		      || ! useless_type_conversion_p (TREE_TYPE (op1),
++						      TREE_TYPE (TREE_TYPE (ref))))
++		    return false;
++		  if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
++						       TREE_TYPE (ref)))
++		    return false;
++		  orig[j] = ref;
++		  break;
++		}
++	      else if (ref == orig[j])
++		break;
+ 	    }
+-	  else if (conv_code != code)
++	  if (j == 2)
+ 	    return false;
+-	  if (TREE_CODE (op1) != SSA_NAME)
+-	    return false;
+-	  def_stmt = SSA_NAME_DEF_STMT (op1);
+-	  if (! is_gimple_assign (def_stmt))
++
++	  unsigned int elt;
++	  if (maybe_ne (bit_field_size (op1), elem_size)
++	      || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
+ 	    return false;
+-	  code = gimple_assign_rhs_code (def_stmt);
++	  if (j)
++	    elt += nelts;
++	  if (elt != i)
++	    maybe_ident = false;
++	  sel.quick_push (elt);
+ 	}
+-      if (code != BIT_FIELD_REF)
+-	return false;
+-      op1 = gimple_assign_rhs1 (def_stmt);
+-      ref = TREE_OPERAND (op1, 0);
+-      unsigned int j;
+-      for (j = 0; j < 2; ++j)
++      else if (CONSTANT_CLASS_P (elt->value))
+ 	{
+-	  if (!orig[j])
+-	    {
+-	      if (TREE_CODE (ref) != SSA_NAME)
+-		return false;
+-	      if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+-		  || ! useless_type_conversion_p (TREE_TYPE (op1),
+-						  TREE_TYPE (TREE_TYPE (ref))))
+-		return false;
+-	      if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
+-						   TREE_TYPE (ref)))
+-		return false;
+-	      orig[j] = ref;
+-	      break;
+-	    }
+-	  else if (ref == orig[j])
+-	    break;
++	  if (orig[1]
++	      && orig[1] != error_mark_node)
++	    return false;
++	  orig[1] = error_mark_node;
++	  if (!one_constant)
++	    one_constant = elt->value;
++	  constants[i] = elt->value;
++	  sel.quick_push (i + nelts);
++	  maybe_ident = false;
+ 	}
+-      if (j == 2)
+-	return false;
+-
+-      unsigned int elt;
+-      if (maybe_ne (bit_field_size (op1), elem_size)
+-	  || !constant_multiple_p (bit_field_offset (op1), elem_size, &elt))
++      else
+ 	return false;
+-      if (j)
+-	elt += nelts;
+-      if (elt != i)
+-	maybe_ident = false;
+-      sel.quick_push (elt);
+     }
+   if (i < nelts)
+     return false;
+@@ -2138,9 +2167,29 @@ simplify_vector_constructor (gimple_stmt
+       op2 = vec_perm_indices_to_tree (mask_type, indices);
+       if (!orig[1])
+ 	orig[1] = orig[0];
++      if (orig[1] == error_mark_node)
++	{
++	  tree_vector_builder vec (type, nelts, 1);
++	  for (unsigned i = 0; i < nelts; ++i)
++	    if (constants[i])
++	      vec.quick_push (constants[i]);
++	    else
++	      /* ??? Push a don't-care value.  */
++	      vec.quick_push (one_constant);
++	  orig[1] = vec.build ();
++	}
+       if (conv_code == ERROR_MARK)
+ 	gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR, orig[0],
+ 					orig[1], op2);
++      else if (TREE_CODE (orig[1]) == VECTOR_CST)
++	{
++	  gimple *conv
++	    = gimple_build_assign (make_ssa_name (type), conv_code, orig[0]);
++	  orig[0] = gimple_assign_lhs (conv);
++	  gsi_insert_before (gsi, conv, GSI_SAME_STMT);
++	  gimple_assign_set_rhs_with_ops (gsi, VEC_PERM_EXPR,
++					  orig[0], orig[1], op2);
++	}
+       else
+ 	{
+ 	  gimple *perm
diff --git a/fix-ICE-in-reload.patch b/fix-ICE-in-reload.patch
new file mode 100644
index 0000000..e1cd079
--- /dev/null
+++ b/fix-ICE-in-reload.patch
@@ -0,0 +1,369 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-Remove-gimple_call_types_likely_match_p-PR-70929.patch
+7313607478c11e9455a32fb0dbfd7867e04ea96a
+
+diff -uprN a/gcc/auto-profile.c b/gcc/auto-profile.c
+--- a/gcc/auto-profile.c	2020-03-31 09:51:52.000000000 +0800
++++ b/gcc/auto-profile.c	2020-07-28 11:15:31.469393370 +0800
+@@ -605,8 +605,6 @@ function_instance::find_icall_target_map
+           get_identifier (afdo_string_table->get_name (callee)));
+       if (node == NULL)
+         continue;
+-      if (!check_ic_target (stmt, node))
+-        continue;
+       (*map)[callee] = iter->second->total_count ();
+       ret += iter->second->total_count ();
+     }
+@@ -1033,7 +1031,7 @@ afdo_indirect_call (gimple_stmt_iterator
+       print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
+     }
+ 
+-  if (direct_call == NULL || !check_ic_target (stmt, direct_call))
++  if (direct_call == NULL)
+     {
+       if (dump_file)
+         fprintf (dump_file, " not transforming\n");
+diff -uprN a/gcc/cgraph.c b/gcc/cgraph.c
+--- a/gcc/cgraph.c	2020-07-28 11:18:05.385393370 +0800
++++ b/gcc/cgraph.c	2020-07-28 11:15:31.469393370 +0800
+@@ -876,19 +876,8 @@ symbol_table::create_edge (cgraph_node *
+   edge->can_throw_external
+     = call_stmt ? stmt_can_throw_external (DECL_STRUCT_FUNCTION (caller->decl),
+ 					   call_stmt) : false;
+-  if (call_stmt
+-      && callee && callee->decl
+-      && !gimple_check_call_matching_types (call_stmt, callee->decl,
+-					    false))
+-    {
+-      edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
+-      edge->call_stmt_cannot_inline_p = true;
+-    }
+-  else
+-    {
+-      edge->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
+-      edge->call_stmt_cannot_inline_p = false;
+-    }
++  edge->inline_failed = CIF_FUNCTION_NOT_CONSIDERED;
++  edge->call_stmt_cannot_inline_p = false;
+ 
+   edge->indirect_info = NULL;
+   edge->indirect_inlining_edge = 0;
+@@ -1253,13 +1242,6 @@ cgraph_edge::make_direct (cgraph_node *c
+   /* Insert to callers list of the new callee.  */
+   edge->set_callee (callee);
+ 
+-  if (call_stmt
+-      && !gimple_check_call_matching_types (call_stmt, callee->decl, false))
+-    {
+-      call_stmt_cannot_inline_p = true;
+-      inline_failed = CIF_MISMATCHED_ARGUMENTS;
+-    }
+-
+   /* We need to re-determine the inlining status of the edge.  */
+   initialize_inline_failed (edge);
+   return edge;
+@@ -1288,28 +1270,9 @@ cgraph_edge::redirect_call_stmt_to_calle
+ 	 substitution), forget about speculating.  */
+       if (decl)
+ 	e = e->resolve_speculation (decl);
+-      /* If types do not match, speculation was likely wrong. 
+-         The direct edge was possibly redirected to the clone with a different
+-	 signature.  We did not update the call statement yet, so compare it 
+-	 with the reference that still points to the proper type.  */
+-      else if (!gimple_check_call_matching_types (e->call_stmt,
+-						  ref->referred->decl,
+-						  true))
+-	{
+-	  if (dump_file)
+-	    fprintf (dump_file, "Not expanding speculative call of %s -> %s\n"
+-		     "Type mismatch.\n",
+-		     e->caller->dump_name (),
+-		     e->callee->dump_name ());
+-	  e = e->resolve_speculation ();
+-	  /* We are producing the final function body and will throw away the
+-	     callgraph edges really soon.  Reset the counts/frequencies to
+-	     keep verifier happy in the case of roundoff errors.  */
+-	  e->count = gimple_bb (e->call_stmt)->count;
+-	}
+-      /* Expand speculation into GIMPLE code.  */
+       else
+ 	{
++	  /* Expand speculation into GIMPLE code.  */
+ 	  if (dump_file)
+ 	    {
+ 	      fprintf (dump_file,
+@@ -3664,102 +3627,6 @@ cgraph_node::get_fun (void)
+   return fun;
+ }
+ 
+-/* Verify if the type of the argument matches that of the function
+-   declaration.  If we cannot verify this or there is a mismatch,
+-   return false.  */
+-
+-static bool
+-gimple_check_call_args (gimple *stmt, tree fndecl, bool args_count_match)
+-{
+-  tree parms, p;
+-  unsigned int i, nargs;
+-
+-  /* Calls to internal functions always match their signature.  */
+-  if (gimple_call_internal_p (stmt))
+-    return true;
+-
+-  nargs = gimple_call_num_args (stmt);
+-
+-  /* Get argument types for verification.  */
+-  if (fndecl)
+-    parms = TYPE_ARG_TYPES (TREE_TYPE (fndecl));
+-  else
+-    parms = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
+-
+-  /* Verify if the type of the argument matches that of the function
+-     declaration.  If we cannot verify this or there is a mismatch,
+-     return false.  */
+-  if (fndecl && DECL_ARGUMENTS (fndecl))
+-    {
+-      for (i = 0, p = DECL_ARGUMENTS (fndecl);
+-	   i < nargs;
+-	   i++, p = DECL_CHAIN (p))
+-	{
+-	  tree arg;
+-	  /* We cannot distinguish a varargs function from the case
+-	     of excess parameters, still deferring the inlining decision
+-	     to the callee is possible.  */
+-	  if (!p)
+-	    break;
+-	  arg = gimple_call_arg (stmt, i);
+-	  if (p == error_mark_node
+-	      || DECL_ARG_TYPE (p) == error_mark_node
+-	      || arg == error_mark_node
+-	      || (!types_compatible_p (DECL_ARG_TYPE (p), TREE_TYPE (arg))
+-		  && !fold_convertible_p (DECL_ARG_TYPE (p), arg)))
+-            return false;
+-	}
+-      if (args_count_match && p)
+-	return false;
+-    }
+-  else if (parms)
+-    {
+-      for (i = 0, p = parms; i < nargs; i++, p = TREE_CHAIN (p))
+-	{
+-	  tree arg;
+-	  /* If this is a varargs function defer inlining decision
+-	     to callee.  */
+-	  if (!p)
+-	    break;
+-	  arg = gimple_call_arg (stmt, i);
+-	  if (TREE_VALUE (p) == error_mark_node
+-	      || arg == error_mark_node
+-	      || TREE_CODE (TREE_VALUE (p)) == VOID_TYPE
+-	      || (!types_compatible_p (TREE_VALUE (p), TREE_TYPE (arg))
+-		  && !fold_convertible_p (TREE_VALUE (p), arg)))
+-            return false;
+-	}
+-    }
+-  else
+-    {
+-      if (nargs != 0)
+-        return false;
+-    }
+-  return true;
+-}
+-
+-/* Verify if the type of the argument and lhs of CALL_STMT matches
+-   that of the function declaration CALLEE. If ARGS_COUNT_MATCH is
+-   true, the arg count needs to be the same.
+-   If we cannot verify this or there is a mismatch, return false.  */
+-
+-bool
+-gimple_check_call_matching_types (gimple *call_stmt, tree callee,
+-				  bool args_count_match)
+-{
+-  tree lhs;
+-
+-  if ((DECL_RESULT (callee)
+-       && !DECL_BY_REFERENCE (DECL_RESULT (callee))
+-       && (lhs = gimple_call_lhs (call_stmt)) != NULL_TREE
+-       && !useless_type_conversion_p (TREE_TYPE (DECL_RESULT (callee)),
+-                                      TREE_TYPE (lhs))
+-       && !fold_convertible_p (TREE_TYPE (DECL_RESULT (callee)), lhs))
+-      || !gimple_check_call_args (call_stmt, callee, args_count_match))
+-    return false;
+-  return true;
+-}
+-
+ /* Reset all state within cgraph.c so that we can rerun the compiler
+    within the same process.  For use by toplev::finalize.  */
+ 
+diff -uprN a/gcc/cgraph.h b/gcc/cgraph.h
+--- a/gcc/cgraph.h	2020-07-28 11:18:04.361393370 +0800
++++ b/gcc/cgraph.h	2020-07-28 11:15:31.469393370 +0800
+@@ -2412,8 +2412,6 @@ bool cgraph_function_possibly_inlined_p
+ const char* cgraph_inline_failed_string (cgraph_inline_failed_t);
+ cgraph_inline_failed_type_t cgraph_inline_failed_type (cgraph_inline_failed_t);
+ 
+-extern bool gimple_check_call_matching_types (gimple *, tree, bool);
+-
+ /* In cgraphunit.c  */
+ void cgraphunit_c_finalize (void);
+ 
+diff -uprN a/gcc/cif-code.def b/gcc/cif-code.def
+--- a/gcc/cif-code.def	2020-03-31 09:51:52.000000000 +0800
++++ b/gcc/cif-code.def	2020-07-28 11:15:31.469393370 +0800
+@@ -88,10 +88,6 @@ DEFCIFCODE(NOT_DECLARED_INLINED, CIF_FIN
+ 	   N_("function not declared inline and code size would grow"))
+ 
+ /* Caller and callee disagree on the arguments.  */
+-DEFCIFCODE(MISMATCHED_ARGUMENTS, CIF_FINAL_ERROR,
+-	   N_("mismatched arguments"))
+-
+-/* Caller and callee disagree on the arguments.  */
+ DEFCIFCODE(LTO_MISMATCHED_DECLARATIONS, CIF_FINAL_ERROR,
+ 	   N_("mismatched declarations during linktime optimization"))
+ 
+diff -uprN a/gcc/ipa-inline.c b/gcc/ipa-inline.c
+--- a/gcc/ipa-inline.c	2020-07-28 11:18:04.377393370 +0800
++++ b/gcc/ipa-inline.c	2020-07-28 11:15:31.469393370 +0800
+@@ -2844,14 +2844,6 @@ early_inliner (function *fun)
+ 		= estimate_num_insns (edge->call_stmt, &eni_size_weights);
+ 	      es->call_stmt_time
+ 		= estimate_num_insns (edge->call_stmt, &eni_time_weights);
+-
+-	      if (edge->callee->decl
+-		  && !gimple_check_call_matching_types (
+-		      edge->call_stmt, edge->callee->decl, false))
+-		{
+- 		  edge->inline_failed = CIF_MISMATCHED_ARGUMENTS;
+-		  edge->call_stmt_cannot_inline_p = true;
+-		}
+ 	    }
+ 	  if (iterations < PARAM_VALUE (PARAM_EARLY_INLINER_MAX_ITERATIONS) - 1)
+ 	    ipa_update_overall_fn_summary (node);
+diff -uprN a/gcc/ipa-prop.c b/gcc/ipa-prop.c
+--- a/gcc/ipa-prop.c	2020-07-28 11:18:04.377393370 +0800
++++ b/gcc/ipa-prop.c	2020-07-28 11:15:31.469393370 +0800
+@@ -3841,11 +3841,6 @@ update_indirect_edges_after_inlining (st
+       else if (new_direct_edge)
+ 	{
+ 	  new_direct_edge->indirect_inlining_edge = 1;
+-	  if (new_direct_edge->call_stmt)
+-	    new_direct_edge->call_stmt_cannot_inline_p
+-	      = !gimple_check_call_matching_types (
+-		  new_direct_edge->call_stmt,
+-		  new_direct_edge->callee->decl, false);
+ 	  if (new_edges)
+ 	    {
+ 	      new_edges->safe_push (new_direct_edge);
+diff -uprN a/gcc/testsuite/gcc.dg/winline-10.c b/gcc/testsuite/gcc.dg/winline-10.c
+--- a/gcc/testsuite/gcc.dg/winline-10.c	2020-03-31 09:51:43.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/winline-10.c	2020-07-28 11:15:31.473393370 +0800
+@@ -1,9 +1,9 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -Winline" } */
++/* { dg-options "-O2 -Winline -fopt-info-optimized-inline=stderr" } */
+ 
+ struct s { int a; };
+ 
+-inline void f (x)	/* { dg-warning "inlining .* mismatched arg" } */
++inline void f (x)
+      int x;
+ {
+   asm ("");
+@@ -11,7 +11,7 @@ inline void f (x)	/* { dg-warning "inlin
+ 
+ void g (struct s x)
+ {
+-  f (x); 		/* { dg-message "called from here" } */
++  f (x); 		/* { dg-optimized "Inlining f.* into g" } */
+ }
+ 
+ void f (int x);		/* { dg-warning "follows non-prototype definition" } */
+diff -uprN a/gcc/testsuite/g++.dg/lto/pr70929_0.C b/gcc/testsuite/g++.dg/lto/pr70929_0.C
+--- a/gcc/testsuite/g++.dg/lto/pr70929_0.C	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/g++.dg/lto/pr70929_0.C	2020-07-28 11:15:31.469393370 +0800
+@@ -0,0 +1,18 @@
++// { dg-lto-do run }
++// { dg-lto-options { "-O3 -flto" } }
++
++struct s
++{
++  int a;
++  s() {a=1;}
++  ~s() {}
++};
++int t(struct s s);
++int main()
++{
++  s s;
++  int v=t(s);
++  if (!__builtin_constant_p (v))
++    __builtin_abort ();
++  return 0;
++}
+diff -uprN a/gcc/testsuite/g++.dg/lto/pr70929_1.C b/gcc/testsuite/g++.dg/lto/pr70929_1.C
+--- a/gcc/testsuite/g++.dg/lto/pr70929_1.C	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/g++.dg/lto/pr70929_1.C	2020-07-28 11:15:31.473393370 +0800
+@@ -0,0 +1,10 @@
++struct s
++{
++  int a;
++  s() {a=1;}
++  ~s() {}
++};
++int t(struct s s)
++{
++  return s.a;
++}
+diff -uprN a/gcc/value-prof.c b/gcc/value-prof.c
+--- a/gcc/value-prof.c	2020-03-31 09:51:30.000000000 +0800
++++ b/gcc/value-prof.c	2020-07-28 11:17:08.281393370 +0800
+@@ -1249,25 +1249,6 @@ find_func_by_profile_id (int profile_id)
+     return NULL;
+ }
+ 
+-/* Perform sanity check on the indirect call target. Due to race conditions,
+-   false function target may be attributed to an indirect call site. If the
+-   call expression type mismatches with the target function's type, expand_call
+-   may ICE. Here we only do very minimal sanity check just to make compiler happy.
+-   Returns true if TARGET is considered ok for call CALL_STMT.  */
+-
+-bool
+-check_ic_target (gcall *call_stmt, struct cgraph_node *target)
+-{
+-   if (gimple_check_call_matching_types (call_stmt, target->decl, true))
+-     return true;
+-
+-   if (dump_enabled_p ())
+-     dump_printf_loc (MSG_MISSED_OPTIMIZATION, call_stmt,
+-                      "Skipping target %s with mismatching types for icall\n",
+-                      target->name ());
+-   return false;
+-}
+-
+ /* Do transformation
+ 
+   if (actual_callee_address == address_of_most_common_function/method)
+@@ -1473,21 +1454,6 @@ gimple_ic_transform (gimple_stmt_iterato
+       return false;
+     }
+ 
+-  if (!check_ic_target (stmt, direct_call))
+-    {
+-      if (dump_file)
+-	{
+-	  fprintf (dump_file, "Indirect call -> direct call ");
+-	  print_generic_expr (dump_file, gimple_call_fn (stmt), TDF_SLIM);
+-	  fprintf (dump_file, "=> ");
+-	  print_generic_expr (dump_file, direct_call->decl, TDF_SLIM);
+-	  fprintf (dump_file, " transformation skipped because of type mismatch");
+-	  print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM);
+-	}
+-      gimple_remove_histogram_value (cfun, stmt, histogram);
+-      return false;
+-    }
+-
+   if (dump_file)
+     {
+       fprintf (dump_file, "Indirect call -> direct call ");
diff --git a/fix-ICE-in-store_constructor.patch b/fix-ICE-in-store_constructor.patch
new file mode 100644
index 0000000..98cae50
--- /dev/null
+++ b/fix-ICE-in-store_constructor.patch
@@ -0,0 +1,356 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-SLP-SLP-vectorization-vectorize-vector-constructors.patch
+818b3293f4545d899148810f4f7d676b81e989dd
+
+diff -N -urp a/gcc/expr.c b/gcc/expr.c
+--- a/gcc/expr.c	2020-07-24 11:19:53.840000000 +0800
++++ b/gcc/expr.c	2020-07-24 11:56:50.128000000 +0800
+@@ -6788,6 +6788,7 @@ store_constructor (tree exp, rtx target,
+ 	    && n_elts.is_constant (&const_n_elts))
+ 	  {
+ 	    machine_mode emode = eltmode;
++	    bool vector_typed_elts_p = false;
+ 
+ 	    if (CONSTRUCTOR_NELTS (exp)
+ 		&& (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp, 0)->value))
+@@ -6798,13 +6799,14 @@ store_constructor (tree exp, rtx target,
+ 				      * TYPE_VECTOR_SUBPARTS (etype),
+ 				      n_elts));
+ 		emode = TYPE_MODE (etype);
++		vector_typed_elts_p = true;
+ 	      }
+ 	    icode = convert_optab_handler (vec_init_optab, mode, emode);
+ 	    if (icode != CODE_FOR_nothing)
+ 	      {
+ 		unsigned int i, n = const_n_elts;
+ 
+-		if (emode != eltmode)
++		if (vector_typed_elts_p)
+ 		  {
+ 		    n = CONSTRUCTOR_NELTS (exp);
+ 		    vec_vec_init_p = true;
+diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c
+--- a/gcc/testsuite/gcc.dg/vect/bb-slp-40.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-40.c	2020-07-24 11:56:50.128000000 +0800
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fdump-tree-slp-all" } */
++/* { dg-require-effective-target vect_int } */
++
++char g_d[1024], g_s1[1024], g_s2[1024];
++void foo(void)
++{
++    char *d = g_d, *s1 = g_s1, *s2 = g_s2;
++
++    for ( int y = 0; y < 128; y++ )
++    {
++      d[0 ] = s1[0 ] + s2[0 ];
++      d[1 ] = s1[1 ] + s2[1 ];
++      d[2 ] = s1[2 ] + s2[2 ];
++      d[3 ] = s1[3 ] + s2[3 ];
++      d[4 ] = s1[4 ] + s2[4 ];
++      d[5 ] = s1[5 ] + s2[5 ];
++      d[6 ] = s1[6 ] + s2[6 ];
++      d[7 ] = s1[7 ] + s2[7 ];
++      d[8 ] = s1[8 ] + s2[8 ];
++      d[9 ] = s1[9 ] + s2[9 ];
++      d[10] = s1[10] + s2[10];
++      d[11] = s1[11] + s2[11];
++      d[12] = s1[12] + s2[12];
++      d[13] = s1[13] + s2[13];
++      d[14] = s1[14] + s2[14];
++      d[15] = s1[15] + s2[15];
++      d += 16;
++    }
++}
++
++/* See that we vectorize an SLP instance.  */
++/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 1 "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "slp1" } } */
+diff -N -urp a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c
+--- a/gcc/testsuite/gcc.dg/vect/bb-slp-41.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-41.c	2020-07-24 11:56:50.128000000 +0800
+@@ -0,0 +1,61 @@
++/* { dg-do run } */
++/* { dg-options "-O3 -fdump-tree-slp-all -fno-vect-cost-model" } */
++/* { dg-require-effective-target vect_int } */
++
++#define ARR_SIZE 1000
++
++void foo (int *a, int *b)
++{
++  int i;
++  for (i = 0; i < (ARR_SIZE - 2); ++i)
++    a[i] = b[0] + b[1] + b[i+1] + b[i+2];
++}
++
++void bar (int *a, int *b)
++{
++  int i;
++  for (i = 0; i < (ARR_SIZE - 2); ++i)
++  {
++    a[i] = b[0];
++  }
++  for (i = 0; i < (ARR_SIZE - 2); ++i)
++  {
++    a[i] = a[i] + b[1];
++  }
++  for (i = 0; i < (ARR_SIZE - 2); ++i)
++  {
++    a[i] = a[i] + b[i+1];
++  }
++  for (i = 0; i < (ARR_SIZE - 2); ++i)
++  {
++    a[i] = a[i] + b[i+2];
++  }
++}
++
++int main ()
++{
++  int a1[ARR_SIZE];
++  int a2[ARR_SIZE];
++  int b[ARR_SIZE];
++  int i;
++
++  for (i = 0; i < ARR_SIZE; i++)
++  {
++    a1[i] = 0;
++    a2[i] = 0;
++    b[i]  = i;
++  }
++
++  foo (a1, b);
++  bar (a2, b);
++
++  for (i = 0; i < ARR_SIZE; i++)
++    if (a1[i] != a2[i])
++      return 1;
++
++  return 0;
++
++}
++/* See that we vectorize an SLP instance.  */
++/* { dg-final { scan-tree-dump-times "Found vectorizable constructor" 12 "slp1" } } */
++/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 4 "slp1" } } */
+diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+--- a/gcc/tree-vectorizer.h	2020-07-24 11:19:51.976000000 +0800
++++ b/gcc/tree-vectorizer.h	2020-07-24 11:56:50.132000000 +0800
+@@ -151,6 +151,10 @@ typedef struct _slp_instance {
+   /* The root of SLP tree.  */
+   slp_tree root;
+ 
++  /* For vector constructors, the constructor stmt that the SLP tree is built
++     from, NULL otherwise.  */
++  stmt_vec_info root_stmt;
++
+   /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s.  */
+   unsigned int group_size;
+ 
+@@ -170,6 +174,7 @@ typedef struct _slp_instance {
+ #define SLP_INSTANCE_GROUP_SIZE(S)               (S)->group_size
+ #define SLP_INSTANCE_UNROLLING_FACTOR(S)         (S)->unrolling_factor
+ #define SLP_INSTANCE_LOADS(S)                    (S)->loads
++#define SLP_INSTANCE_ROOT_STMT(S)                (S)->root_stmt
+ 
+ #define SLP_TREE_CHILDREN(S)                     (S)->children
+ #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
+diff -N -urp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-07-24 11:19:51.980000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-07-24 11:56:50.132000000 +0800
+@@ -2019,6 +2019,7 @@ vect_analyze_slp_instance (vec_info *vin
+   unsigned int i;
+   struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
+   vec<stmt_vec_info> scalar_stmts;
++  bool constructor = false;
+ 
+   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+     {
+@@ -2032,6 +2033,13 @@ vect_analyze_slp_instance (vec_info *vin
+       vectype = STMT_VINFO_VECTYPE (stmt_info);
+       group_size = REDUC_GROUP_SIZE (stmt_info);
+     }
++  else if (is_gimple_assign (stmt_info->stmt)
++	    && gimple_assign_rhs_code (stmt_info->stmt) == CONSTRUCTOR)
++    {
++      vectype = TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt));
++      group_size = CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt_info->stmt));
++      constructor = true;
++    }
+   else
+     {
+       gcc_assert (is_a <loop_vec_info> (vinfo));
+@@ -2079,6 +2087,25 @@ vect_analyze_slp_instance (vec_info *vin
+       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
+ 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
+     }
++  else if (constructor)
++    {
++      tree rhs = gimple_assign_rhs1 (stmt_info->stmt);
++      tree val;
++      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (rhs), i, val)
++	{
++	  if (TREE_CODE (val) == SSA_NAME)
++	    {
++	      gimple* def = SSA_NAME_DEF_STMT (val);
++	      stmt_vec_info def_info = vinfo->lookup_stmt (def);
++	      /* Value is defined in another basic block.  */
++	      if (!def_info)
++		return false;
++	      scalar_stmts.safe_push (def_info);
++	    }
++	  else
++	    return false;
++	}
++    }
+   else
+     {
+       /* Collect reduction statements.  */
+@@ -2164,6 +2191,8 @@ vect_analyze_slp_instance (vec_info *vin
+ 	  SLP_INSTANCE_GROUP_SIZE (new_instance) = group_size;
+ 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+ 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
++	  SLP_INSTANCE_ROOT_STMT (new_instance) = constructor ? stmt_info : NULL;
++
+ 	  vect_gather_slp_loads (new_instance, node);
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_NOTE, vect_location,
+@@ -3032,6 +3061,43 @@ vect_bb_vectorization_profitable_p (bb_v
+   return true;
+ }
+ 
++/* Find any vectorizable constructors and add them to the grouped_store
++   array.  */
++
++static void
++vect_slp_check_for_constructors (bb_vec_info bb_vinfo)
++{
++  gimple_stmt_iterator gsi;
++
++  for (gsi = bb_vinfo->region_begin;
++      gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
++    {
++      gimple *stmt = gsi_stmt (gsi);
++
++      if (is_gimple_assign (stmt)
++	  && gimple_assign_rhs_code (stmt) == CONSTRUCTOR
++	  && TREE_CODE (gimple_assign_lhs (stmt)) == SSA_NAME
++	  && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE)
++	{
++	  tree rhs = gimple_assign_rhs1 (stmt);
++
++	  if (CONSTRUCTOR_NELTS (rhs) == 0)
++	    continue;
++
++	  poly_uint64 subparts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (rhs));
++
++	  if (maybe_ne (subparts, CONSTRUCTOR_NELTS (rhs)))
++	    continue;
++
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "Found vectorizable constructor: %G\n", stmt);
++	  stmt_vec_info stmt_info = bb_vinfo->lookup_stmt (stmt);
++	  BB_VINFO_GROUPED_STORES (bb_vinfo).safe_push (stmt_info);
++	}
++    }
++}
++
+ /* Check if the region described by BB_VINFO can be vectorized, returning
+    true if so.  When returning false, set FATAL to true if the same failure
+    would prevent vectorization at other vector sizes, false if it is still
+@@ -3079,6 +3145,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
+       return false;
+     }
+ 
++  vect_slp_check_for_constructors (bb_vinfo);
++
+   /* If there are no grouped stores in the region there is no need
+      to continue with pattern recog as vect_analyze_slp will fail
+      anyway.  */
+@@ -3135,6 +3203,8 @@ vect_slp_analyze_bb_1 (bb_vec_info bb_vi
+ 	 relevant.  */
+       vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance));
+       vect_mark_slp_stmts_relevant (SLP_INSTANCE_TREE (instance));
++      if (SLP_INSTANCE_ROOT_STMT (instance))
++	STMT_SLP_TYPE (SLP_INSTANCE_ROOT_STMT (instance)) = pure_slp;
+ 
+       i++;
+     }
+@@ -4175,6 +4245,49 @@ vect_remove_slp_scalar_calls (slp_tree n
+   vect_remove_slp_scalar_calls (node, visited);
+ }
+ 
++/* Vectorize the instance root.  */
++
++void
++vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
++{
++  gassign *rstmt;
++
++  if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
++    {
++      stmt_vec_info child_stmt_info;
++      int j;
++
++      FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
++	{
++	  tree vect_lhs = gimple_get_lhs (child_stmt_info->stmt);
++	  tree root_lhs = gimple_get_lhs (instance->root_stmt->stmt);
++	  rstmt = gimple_build_assign (root_lhs, vect_lhs);
++	  break;
++	}
++    }
++  else if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) > 1)
++    {
++      int nelts = SLP_TREE_NUMBER_OF_VEC_STMTS (node);
++      stmt_vec_info child_stmt_info;
++      int j;
++      vec<constructor_elt, va_gc> *v;
++      vec_alloc (v, nelts);
++
++      FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (node), j, child_stmt_info)
++	{
++	  CONSTRUCTOR_APPEND_ELT (v,
++				  NULL_TREE,
++				  gimple_get_lhs (child_stmt_info->stmt));
++	}
++      tree lhs = gimple_get_lhs (instance->root_stmt->stmt);
++      tree rtype = TREE_TYPE (gimple_assign_rhs1 (instance->root_stmt->stmt));
++      tree r_constructor = build_constructor (rtype, v);
++      rstmt = gimple_build_assign (lhs, r_constructor);
++    }
++    gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
++    gsi_replace (&rgsi, rstmt, true);
++}
++
+ /* Generate vector code for all SLP instances in the loop/basic block.  */
+ 
+ void
+@@ -4189,9 +4302,13 @@ vect_schedule_slp (vec_info *vinfo)
+   slp_instances = vinfo->slp_instances;
+   FOR_EACH_VEC_ELT (slp_instances, i, instance)
+     {
++      slp_tree node = SLP_INSTANCE_TREE (instance);
+       /* Schedule the tree of INSTANCE.  */
+-      vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
+-				  instance, bst_map);
++      vect_schedule_slp_instance (node, instance, bst_map);
++
++      if (SLP_INSTANCE_ROOT_STMT (instance))
++	vectorize_slp_instance_root_stmt (node, instance);
++
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+                          "vectorizing stmts using SLP.\n");
+@@ -4220,6 +4337,9 @@ vect_schedule_slp (vec_info *vinfo)
+ 	  if (!STMT_VINFO_DATA_REF (store_info))
+ 	    break;
+ 
++	  if (SLP_INSTANCE_ROOT_STMT (instance))
++	    continue;
++
+ 	  store_info = vect_orig_stmt (store_info);
+ 	  /* Free the attached stmt_vec_info and remove the stmt.  */
+ 	  vinfo->remove_stmt (store_info);
diff --git a/fix-ICE-in-vec.patch b/fix-ICE-in-vec.patch
new file mode 100644
index 0000000..30d1c7e
--- /dev/null
+++ b/fix-ICE-in-vec.patch
@@ -0,0 +1,93 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92345-ICE-in-vec-_stmt_vec_i.patch
+a6ba623777513e31721030092e4d786f461a0f06
+
+diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92345.c b/gcc/testsuite/gcc.dg/torture/pr92345.c
+--- a/gcc/testsuite/gcc.dg/torture/pr92345.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/torture/pr92345.c	2020-08-10 15:08:19.992000000 +0800
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++long int x1;
++int fr;
++
++int
++us (int sk, int jx)
++{
++  while (sk < 1)
++    {
++      jx *= 2;
++      fr += x1 + 1;
++      ++sk;
++    }
++
++  return jx;
++}
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-10 15:07:44.456000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-10 15:08:19.992000000 +0800
+@@ -155,7 +155,7 @@ along with GCC; see the file COPYING3.
+ 
+ static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
+ static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
+-					       bool *);
++					       bool *, bool *);
+ 
+ /* Subroutine of vect_determine_vf_for_stmt that handles only one
+    statement.  VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
+@@ -489,7 +489,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
+   tree init, step;
+   auto_vec<stmt_vec_info, 64> worklist;
+   gphi_iterator gsi;
+-  bool double_reduc;
++  bool double_reduc, reduc_chain;
+ 
+   DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles");
+ 
+@@ -561,7 +561,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
+ 		  && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
+ 
+       stmt_vec_info reduc_stmt_info
+-	= vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc);
++	= vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc,
++				    &reduc_chain);
+       if (reduc_stmt_info)
+         {
+ 	  STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
+@@ -596,7 +597,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
+                   /* Store the reduction cycles for possible vectorization in
+                      loop-aware SLP if it was not detected as reduction
+ 		     chain.  */
+-		  if (! REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
++		  if (! reduc_chain)
+ 		    LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
+ 		      (reduc_stmt_info);
+                 }
+@@ -3032,7 +3033,7 @@ check_reduction_path (dump_user_location
+ 
+ static stmt_vec_info
+ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+-			  bool *double_reduc)
++			  bool *double_reduc, bool *reduc_chain_p)
+ {
+   gphi *phi = as_a <gphi *> (phi_info->stmt);
+   gimple *phi_use_stmt = NULL;
+@@ -3040,6 +3041,7 @@ vect_is_simple_reduction (loop_vec_info
+   use_operand_p use_p;
+ 
+   *double_reduc = false;
++  *reduc_chain_p = false;
+   STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
+ 
+   tree phi_name = PHI_RESULT (phi);
+@@ -3214,6 +3216,7 @@ vect_is_simple_reduction (loop_vec_info
+ 	  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
+ 	  REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
+ 
++	  *reduc_chain_p = true;
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_NOTE, vect_location,
+ 			    "reduction: detected reduction chain\n");
diff --git a/fix-ICE-in-vect_create_epilog_for_reduction.patch b/fix-ICE-in-vect_create_epilog_for_reduction.patch
new file mode 100644
index 0000000..fef451b
--- /dev/null
+++ b/fix-ICE-in-vect_create_epilog_for_reduction.patch
@@ -0,0 +1,81 @@
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-07-09 10:42:35.824000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-07-09 10:43:23.920000000 +0800
+@@ -1143,7 +1143,9 @@ vect_compute_single_scalar_iteration_cos
+              else
+                kind = scalar_store;
+             }
+-          else
++	  else if (vect_nop_conversion_p (stmt_info))
++	    continue;
++	  else
+             kind = scalar_stmt;
+ 
+ 	  record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
+diff -Nurp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+--- a/gcc/tree-vectorizer.h	2020-07-09 10:42:35.824000000 +0800
++++ b/gcc/tree-vectorizer.h	2020-07-09 10:43:23.920000000 +0800
+@@ -1645,6 +1645,7 @@ extern tree vect_get_vec_def_for_stmt_co
+ extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *,
+ 				 slp_tree, slp_instance);
+ extern void vect_remove_stores (stmt_vec_info);
++extern bool vect_nop_conversion_p (stmt_vec_info);
+ extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
+ 				     slp_instance, stmt_vector_for_cost *);
+ extern void vect_get_load_cost (stmt_vec_info, int, bool,
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-07-09 10:42:35.736000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-07-09 10:43:23.920000000 +0800
+@@ -2940,6 +2940,8 @@ vect_bb_slp_scalar_cost (basic_block bb,
+           else
+ 	    kind = scalar_store;
+         }
++      else if (vect_nop_conversion_p (stmt_info))
++	continue;
+       else
+ 	kind = scalar_stmt;
+       record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
+diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2020-07-09 10:42:35.732000000 +0800
++++ b/gcc/tree-vect-stmts.c	2020-07-09 10:43:23.920000000 +0800
+@@ -5283,6 +5283,29 @@ vectorizable_conversion (stmt_vec_info s
+   return true;
+ }
+ 
++/* Return true if we can assume from the scalar form of STMT_INFO that
++   neither the scalar nor the vector forms will generate code.  STMT_INFO
++   is known not to involve a data reference.  */
++
++bool
++vect_nop_conversion_p (stmt_vec_info stmt_info)
++{
++  gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
++  if (!stmt)
++    return false;
++
++  tree lhs = gimple_assign_lhs (stmt);
++  tree_code code = gimple_assign_rhs_code (stmt);
++  tree rhs = gimple_assign_rhs1 (stmt);
++
++  if (code == SSA_NAME || code == VIEW_CONVERT_EXPR)
++    return true;
++
++  if (CONVERT_EXPR_CODE_P (code))
++    return tree_nop_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs));
++
++  return false;
++}
+ 
+ /* Function vectorizable_assignment.
+ 
+@@ -5398,7 +5421,9 @@ vectorizable_assignment (stmt_vec_info s
+     {
+       STMT_VINFO_TYPE (stmt_info) = assignment_vec_info_type;
+       DUMP_VECT_SCOPE ("vectorizable_assignment");
+-      vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
++      if (!vect_nop_conversion_p (stmt_info))
++	vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node,
++				cost_vec);
+       return true;
+     }
+ 
diff --git a/fix-ICE-in-vect_create_epilog_for_reduction_2.patch b/fix-ICE-in-vect_create_epilog_for_reduction_2.patch
new file mode 100644
index 0000000..1130c05
--- /dev/null
+++ b/fix-ICE-in-vect_create_epilog_for_reduction_2.patch
@@ -0,0 +1,33 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92162-ICE-in-vect_create_epi.patch
+53b15ca96116544a7a3ca8bc5f4e1649b74f3d45
+
+diff -Nurp gcc-9.3.0_org/gcc/tree-vect-loop.c gcc-9.3.0/gcc/tree-vect-loop.c
+--- gcc-9.3.0_org/gcc/tree-vect-loop.c	2020-08-17 10:23:55.768000000 +0800
++++ gcc-9.3.0/gcc/tree-vect-loop.c	2020-08-17 10:27:15.848000000 +0800
+@@ -4574,9 +4574,9 @@ vect_create_epilog_for_reduction (stmt_v
+ 	 (CCOMPARE).  The then and else values mirror the main VEC_COND_EXPR:
+ 	 the reduction phi corresponds to NEW_PHI_TREE and the new values
+ 	 correspond to INDEX_BEFORE_INCR.  */
+-      gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1);
++      gcc_assert (STMT_VINFO_REDUC_IDX (reduc_info) >= 1);
+       tree index_cond_expr;
+-      if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
++      if (STMT_VINFO_REDUC_IDX (reduc_info) == 2)
+ 	index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
+ 				  ccompare, indx_before_incr, new_phi_tree);
+       else
+diff -Nurp gcc-9.3.0_org/gcc/tree-vect-stmts.c gcc-9.3.0/gcc/tree-vect-stmts.c
+--- gcc-9.3.0_org/gcc/tree-vect-stmts.c	2020-08-17 10:23:53.960000000 +0800
++++ gcc-9.3.0/gcc/tree-vect-stmts.c	2020-08-17 10:27:15.848000000 +0800
+@@ -9077,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st
+ 	return false;
+       reduc_info = info_for_reduction (stmt_info);
+       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
+-      reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
++      reduc_index = STMT_VINFO_REDUC_IDX (reduc_info);
+       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
+ 		  || reduc_index != -1);
+     }
diff --git a/fix-ICE-in-vect_create_epilog_for_reduction_3.patch b/fix-ICE-in-vect_create_epilog_for_reduction_3.patch
new file mode 100644
index 0000000..f74be1e
--- /dev/null
+++ b/fix-ICE-in-vect_create_epilog_for_reduction_3.patch
@@ -0,0 +1,87 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92554-ICE-in-vect_create_epi.patch
+04c4599d30b1eb7c21d39b15a685aa1d9b8bf968
+
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/pr92554.c b/gcc/testsuite/gcc.dg/vect/pr92554.c
+--- a/gcc/testsuite/gcc.dg/vect/pr92554.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/pr92554.c	2020-08-17 11:08:28.424000000 +0800
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++
++short int w9;
++
++void __attribute__ ((simd))
++zc (int in)
++{
++  int va = 1;
++
++  w9 *= va != 0 ? in < 0 : 0;
++}
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-17 10:41:56.756000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-17 11:09:36.474259880 +0800
+@@ -4515,12 +4515,21 @@ vect_create_epilog_for_reduction (stmt_v
+      zeroes.  */
+   if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
+     {
+-      tree indx_before_incr, indx_after_incr;
+-      poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
+-
+-      gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info)->stmt;
++      stmt_vec_info cond_info = STMT_VINFO_REDUC_DEF (reduc_info);
++      cond_info = vect_stmt_to_vectorize (cond_info);
++      while (gimple_assign_rhs_code (cond_info->stmt) != COND_EXPR)
++	{
++	  cond_info
++	    = loop_vinfo->lookup_def (gimple_op (cond_info->stmt,
++						 1 + STMT_VINFO_REDUC_IDX
++							(cond_info)));
++	  cond_info = vect_stmt_to_vectorize (cond_info);
++	}
++      gimple *vec_stmt = STMT_VINFO_VEC_STMT (cond_info)->stmt;
+       gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
+ 
++      tree indx_before_incr, indx_after_incr;
++      poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
+       int scalar_precision
+ 	= GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
+       tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
+@@ -4574,9 +4583,9 @@ vect_create_epilog_for_reduction (stmt_v
+ 	 (CCOMPARE).  The then and else values mirror the main VEC_COND_EXPR:
+ 	 the reduction phi corresponds to NEW_PHI_TREE and the new values
+ 	 correspond to INDEX_BEFORE_INCR.  */
+-      gcc_assert (STMT_VINFO_REDUC_IDX (reduc_info) >= 1);
++      gcc_assert (STMT_VINFO_REDUC_IDX (cond_info) >= 1);
+       tree index_cond_expr;
+-      if (STMT_VINFO_REDUC_IDX (reduc_info) == 2)
++      if (STMT_VINFO_REDUC_IDX (cond_info) == 2)
+ 	index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
+ 				  ccompare, indx_before_incr, new_phi_tree);
+       else
+@@ -4772,10 +4781,11 @@ vect_create_epilog_for_reduction (stmt_v
+ 	 be zero.  */
+ 
+       /* Vector of {0, 0, 0,...}.  */
+-      tree zero_vec = make_ssa_name (vectype);
+-      tree zero_vec_rhs = build_zero_cst (vectype);
+-      gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
+-      gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
++      tree zero_vec = build_zero_cst (vectype);
++
++      gimple_seq stmts = NULL;
++      new_phi_result = gimple_convert (&stmts, vectype, new_phi_result);
++      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ 
+       /* Find maximum value from the vector of found indexes.  */
+       tree max_index = make_ssa_name (index_scalar_type);
+@@ -4843,7 +4853,7 @@ vect_create_epilog_for_reduction (stmt_v
+ 
+       /* Convert the reduced value back to the result type and set as the
+ 	 result.  */
+-      gimple_seq stmts = NULL;
++      stmts = NULL;
+       new_temp = gimple_build (&stmts, VIEW_CONVERT_EXPR, scalar_type,
+ 			       data_reduc);
+       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
diff --git a/fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch b/fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch
new file mode 100644
index 0000000..b40d5e3
--- /dev/null
+++ b/fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch
@@ -0,0 +1,54 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92161-ICE-in-vect_get_vec_de.patch
+ae7f3143a3876378d051e64c8e68718f27c41075
+
+diff -Nurp a/gcc/testsuite/gfortran.dg/pr92161.f b/gcc/testsuite/gfortran.dg/pr92161.f
+--- a/gcc/testsuite/gfortran.dg/pr92161.f	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gfortran.dg/pr92161.f	2020-08-17 10:18:05.996000000 +0800
+@@ -0,0 +1,23 @@
++! { dg-do compile }
++! { dg-options "-O1 -ftree-loop-vectorize -fno-signed-zeros -fno-trapping-math" }
++! { dg-additional-options "-mvsx" { target { powerpc*-*-* } } }
++      COMPLEX FUNCTION R1 (ZR, CC, EA, U6)
++
++      INTEGER ZR, U6, FZ, J2
++      COMPLEX EA(*), CC
++      DOUBLE PRECISION OS, GA, YU, XT
++
++      OS = DBLE(REAL(CC))
++      GA = DBLE(AIMAG(CC))
++      J2 = 1
++
++      DO 5 FZ = 1, ZR
++        YU = DBLE(REAL(EA(J2)))
++        XT = DBLE(AIMAG(EA(J2)))
++        OS = OS + (YU * 2) - (XT * 2)
++        GA = GA + (YU * 3) + (XT * 3)
++        J2 = J2 + U6
++    5 CONTINUE
++      R1 = CMPLX(REAL(OS), REAL(GA))
++      RETURN
++      END
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-17 10:17:08.288000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-17 10:18:05.996000000 +0800
+@@ -2339,6 +2339,17 @@ again:
+ 	{
+ 	  stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
+ 	  STMT_SLP_TYPE (stmt_info) = loop_vect;
++	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
++	      || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
++	    {
++	      /* vectorizable_reduction adjusts reduction stmt def-types,
++		 restore them to that of the PHI.  */
++	      STMT_VINFO_DEF_TYPE (STMT_VINFO_REDUC_DEF (stmt_info))
++		= STMT_VINFO_DEF_TYPE (stmt_info);
++	      STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize
++					(STMT_VINFO_REDUC_DEF (stmt_info)))
++		= STMT_VINFO_DEF_TYPE (stmt_info);
++	    }
+ 	}
+       for (gimple_stmt_iterator si = gsi_start_bb (bb);
+ 	   !gsi_end_p (si); gsi_next (&si))
diff --git a/fix-ICE-in-vect_slp_analyze_node_operations.patch b/fix-ICE-in-vect_slp_analyze_node_operations.patch
new file mode 100644
index 0000000..5f5d336
--- /dev/null
+++ b/fix-ICE-in-vect_slp_analyze_node_operations.patch
@@ -0,0 +1,381 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-92516-ICE-in-vect_schedule_s.patch
+10a73df76280e12886cb20b028727436d73724c5
+
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c b/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c
+--- a/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/vect-ctor-1.c	2020-08-17 10:33:56.052000000 +0800
+@@ -0,0 +1,17 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-O3" } */
++/* { dg-additional-options "-mavx2" { target { i?86-*-* x86_64-*-* } } } */
++
++typedef struct {
++    unsigned short mprr_2[5][16][16];
++} ImageParameters;
++int s[16][2];
++void intrapred_luma_16x16(ImageParameters *img, int s0)
++{
++  for (int j=0; j < 16; j++)
++    for (int i=0; i < 16; i++)
++      {
++	img->mprr_2[1 ][j][i]=s[j][1];
++	img->mprr_2[2 ][j][i]=s0;
++      }
++}
+diff -Nurp a/gcc/testsuite/g++.dg/vect/slp-pr92516.cc b/gcc/testsuite/g++.dg/vect/slp-pr92516.cc
+--- a/gcc/testsuite/g++.dg/vect/slp-pr92516.cc	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/g++.dg/vect/slp-pr92516.cc	2020-08-17 10:33:56.052000000 +0800
+@@ -0,0 +1,43 @@
++// { dg-do compile }
++// { dg-require-effective-target c++14 }
++
++class a {
++public:
++  typedef int b;
++  operator b();
++};
++class c {
++public:
++  constexpr int m_fn1() const;
++  constexpr int d() const;
++  int e;
++  int f;
++};
++constexpr int c::m_fn1() const { return e; }
++constexpr int c::d() const { return f; }
++class g {
++public:
++  g();
++  constexpr void i(const c &) noexcept;
++  int j;
++  int k;
++  int l;
++  int m;
++};
++constexpr void g::i(const c &n) noexcept {
++  int v = l - j, h = m - k;
++  j = n.m_fn1() - v / 2;
++  k = n.d() - h / 2;
++  l = j + v;
++  m = k + h;
++}
++class o {
++  void m_fn4() const;
++  a p;
++} r;
++void o::m_fn4() const {
++  g q;
++  c t;
++  q.i(t);
++  r.p || 0;
++}
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-08-17 10:31:58.236000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-08-17 10:36:40.976796520 +0800
+@@ -2010,6 +2010,7 @@ calculate_unrolling_factor (poly_uint64
+ 
+ static bool
+ vect_analyze_slp_instance (vec_info *vinfo,
++			   scalar_stmts_to_slp_tree_map_t *bst_map,
+ 			   stmt_vec_info stmt_info, unsigned max_tree_size)
+ {
+   slp_instance new_instance;
+@@ -2117,19 +2118,11 @@ vect_analyze_slp_instance (vec_info *vin
+   /* Build the tree for the SLP instance.  */
+   bool *matches = XALLOCAVEC (bool, group_size);
+   unsigned npermutes = 0;
+-  scalar_stmts_to_slp_tree_map_t *bst_map
+-    = new scalar_stmts_to_slp_tree_map_t ();
+   poly_uint64 max_nunits = nunits;
+   unsigned tree_size = 0;
+   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ 			      &max_nunits, matches, &npermutes,
+ 			      &tree_size, bst_map);
+-  /* The map keeps a reference on SLP nodes built, release that.  */
+-  for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
+-       it != bst_map->end (); ++it)
+-    if ((*it).second)
+-      vect_free_slp_tree ((*it).second, false);
+-  delete bst_map;
+   if (node != NULL)
+     {
+       /* If this is a reduction chain with a conversion in front
+@@ -2183,6 +2176,18 @@ vect_analyze_slp_instance (vec_info *vin
+ 	  matches[group_size / const_max_nunits * const_max_nunits] = false;
+ 	  vect_free_slp_tree (node, false);
+ 	}
++      else if (constructor
++	       && SLP_TREE_DEF_TYPE (node) != vect_internal_def)
++	{
++	  /* CONSTRUCTOR vectorization relies on a vector stmt being
++	     generated, that doesn't work for fully external ones.  */
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "Build SLP failed: CONSTRUCTOR of external "
++			     "or constant elements\n");
++	  vect_free_slp_tree (node, false);
++	  return false;
++	}
+       else
+ 	{
+ 	  /* Create a new SLP instance.  */
+@@ -2317,7 +2322,7 @@ vect_analyze_slp_instance (vec_info *vin
+ 
+ 	  stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
+ 							   group1_size);
+-	  bool res = vect_analyze_slp_instance (vinfo, stmt_info,
++	  bool res = vect_analyze_slp_instance (vinfo, bst_map, stmt_info,
+ 						max_tree_size);
+ 	  /* If the first non-match was in the middle of a vector,
+ 	     skip the rest of that vector.  */
+@@ -2328,7 +2333,8 @@ vect_analyze_slp_instance (vec_info *vin
+ 		rest = vect_split_slp_store_group (rest, const_nunits);
+ 	    }
+ 	  if (i < group_size)
+-	    res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
++	    res |= vect_analyze_slp_instance (vinfo, bst_map,
++					      rest, max_tree_size);
+ 	  return res;
+ 	}
+       /* Even though the first vector did not all match, we might be able to SLP
+@@ -2350,9 +2356,12 @@ vect_analyze_slp (vec_info *vinfo, unsig
+ 
+   DUMP_VECT_SCOPE ("vect_analyze_slp");
+ 
++  scalar_stmts_to_slp_tree_map_t *bst_map
++    = new scalar_stmts_to_slp_tree_map_t ();
++
+   /* Find SLP sequences starting from groups of grouped stores.  */
+   FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
+-    vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
++    vect_analyze_slp_instance (vinfo, bst_map, first_element, max_tree_size);
+ 
+   if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+     {
+@@ -2361,7 +2370,7 @@ vect_analyze_slp (vec_info *vinfo, unsig
+ 	{
+ 	  /* Find SLP sequences starting from reduction chains.  */
+ 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
+-	    if (! vect_analyze_slp_instance (vinfo, first_element,
++	    if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+ 					     max_tree_size))
+ 	      {
+ 		/* Dissolve reduction chain group.  */
+@@ -2383,10 +2392,17 @@ vect_analyze_slp (vec_info *vinfo, unsig
+ 
+       /* Find SLP sequences starting from groups of reductions.  */
+       if (loop_vinfo->reductions.length () > 1)
+-	vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
++	vect_analyze_slp_instance (vinfo, bst_map, loop_vinfo->reductions[0],
+ 				   max_tree_size);
+     }
+ 
++  /* The map keeps a reference on SLP nodes built, release that.  */
++  for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
++       it != bst_map->end (); ++it)
++    if ((*it).second)
++      vect_free_slp_tree ((*it).second, false);
++  delete bst_map;
++
+   return opt_result::success ();
+ }
+ 
+@@ -2513,13 +2529,6 @@ vect_detect_hybrid_slp_stmts (slp_tree n
+ 	vect_detect_hybrid_slp_stmts (child, i, stype, visited);
+ }
+ 
+-static void
+-vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
+-{
+-  hash_map<slp_tree, unsigned> visited;
+-  vect_detect_hybrid_slp_stmts (node, i, stype, visited);
+-}
+-
+ /* Helpers for vect_detect_hybrid_slp walking pattern stmt uses.  */
+ 
+ static tree
+@@ -2602,11 +2611,12 @@ vect_detect_hybrid_slp (loop_vec_info lo
+   /* Then walk the SLP instance trees marking stmts with uses in
+      non-SLP stmts as hybrid, also propagating hybrid down the
+      SLP tree, collecting the above info on-the-fly.  */
++  hash_map<slp_tree, unsigned> visited;
+   FOR_EACH_VEC_ELT (slp_instances, i, instance)
+     {
+       for (unsigned i = 0; i < SLP_INSTANCE_GROUP_SIZE (instance); ++i)
+ 	vect_detect_hybrid_slp_stmts (SLP_INSTANCE_TREE (instance),
+-				      i, pure_slp);
++				      i, pure_slp, visited);
+     }
+ }
+ 
+@@ -2763,8 +2773,8 @@ vect_slp_convert_to_external (vec_info *
+ static bool
+ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
+ 				  slp_instance node_instance,
+-				  scalar_stmts_to_slp_tree_map_t *visited,
+-				  scalar_stmts_to_slp_tree_map_t *lvisited,
++				  hash_set<slp_tree> &visited,
++				  hash_set<slp_tree> &lvisited,
+ 				  stmt_vector_for_cost *cost_vec)
+ {
+   int i, j;
+@@ -2774,27 +2784,13 @@ vect_slp_analyze_node_operations (vec_in
+     return true;
+ 
+   /* If we already analyzed the exact same set of scalar stmts we're done.
+-     We share the generated vector stmts for those.  */
+-  slp_tree *leader;
+-  if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
+-      || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
+-    {
+-      SLP_TREE_NUMBER_OF_VEC_STMTS (node)
+-	= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
+-      /* Cope with cases in which we made a late decision to build the
+-	 node from scalars.  */
+-      if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def
+-	  && vect_slp_convert_to_external (vinfo, node, node_instance))
+-	;
+-      else
+-	gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader));
+-      return true;
+-    }
+-
+-  /* The SLP graph is acyclic so not caching whether we failed or succeeded
++     We share the generated vector stmts for those.
++     The SLP graph is acyclic so not caching whether we failed or succeeded
+      doesn't result in any issue since we throw away the lvisited set
+      when we fail.  */
+-  lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
++  if (visited.contains (node)
++      || lvisited.add (node))
++    return true;
+ 
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
+@@ -2867,16 +2863,15 @@ vect_slp_analyze_operations (vec_info *v
+ 
+   DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
+ 
+-  scalar_stmts_to_slp_tree_map_t *visited
+-    = new scalar_stmts_to_slp_tree_map_t ();
++  hash_set<slp_tree> visited;
+   for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
+     {
+-      scalar_stmts_to_slp_tree_map_t lvisited;
++      hash_set<slp_tree> lvisited;
+       stmt_vector_for_cost cost_vec;
+       cost_vec.create (2);
+       if (!vect_slp_analyze_node_operations (vinfo,
+ 					     SLP_INSTANCE_TREE (instance),
+-					     instance, visited, &lvisited,
++					     instance, visited, lvisited,
+ 					     &cost_vec))
+         {
+ 	  slp_tree node = SLP_INSTANCE_TREE (instance);
+@@ -2891,16 +2886,15 @@ vect_slp_analyze_operations (vec_info *v
+ 	}
+       else
+ 	{
+-	  for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
++	  for (hash_set<slp_tree>::iterator x = lvisited.begin();
+ 	       x != lvisited.end(); ++x)
+-	    visited->put ((*x).first.copy (), (*x).second);
++	    visited.add (*x);
+ 	  i++;
+ 
+ 	  add_stmt_costs (vinfo->target_cost_data, &cost_vec);
+ 	  cost_vec.release ();
+ 	}
+     }
+-  delete visited;
+ 
+   return !vinfo->slp_instances.is_empty ();
+ }
+@@ -2991,15 +2985,6 @@ vect_bb_slp_scalar_cost (basic_block bb,
+     }
+ }
+ 
+-static void 
+-vect_bb_slp_scalar_cost (basic_block bb,
+-			 slp_tree node, vec<bool, va_heap> *life,
+-			 stmt_vector_for_cost *cost_vec)
+-{
+-  hash_set<slp_tree> visited;
+-  vect_bb_slp_scalar_cost (bb, node, life, cost_vec, visited);
+-}
+-
+ /* Check if vectorization of the basic block is profitable.  */
+ 
+ static bool
+@@ -3014,13 +2999,14 @@ vect_bb_vectorization_profitable_p (bb_v
+   /* Calculate scalar cost.  */
+   stmt_vector_for_cost scalar_costs;
+   scalar_costs.create (0);
++  hash_set<slp_tree> visited;
+   FOR_EACH_VEC_ELT (slp_instances, i, instance)
+     {
+       auto_vec<bool, 20> life;
+       life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
+       vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
+ 			       SLP_INSTANCE_TREE (instance),
+-			       &life, &scalar_costs);
++			       &life, &scalar_costs, visited);
+     }
+   void *target_cost_data = init_cost (NULL);
+   add_stmt_costs (target_cost_data, &scalar_costs);
+@@ -4052,8 +4038,7 @@ vect_transform_slp_perm_load (slp_tree n
+ /* Vectorize SLP instance tree in postorder.  */
+ 
+ static void
+-vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+-			    scalar_stmts_to_slp_tree_map_t *bst_map)
++vect_schedule_slp_instance (slp_tree node, slp_instance instance)
+ {
+   gimple_stmt_iterator si;
+   stmt_vec_info stmt_info;
+@@ -4070,17 +4055,8 @@ vect_schedule_slp_instance (slp_tree nod
+   if (SLP_TREE_VEC_STMTS (node).exists ())
+     return;
+ 
+-  /* See if we have already vectorized the same set of stmts and reuse their
+-     vectorized stmts across instances.  */
+-  if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
+-    {
+-      SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
+-      return;
+-    }
+-
+-  bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+-    vect_schedule_slp_instance (child, instance, bst_map);
++    vect_schedule_slp_instance (child, instance);
+ 
+   /* Push SLP node def-type to stmts.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+@@ -4297,14 +4273,12 @@ vect_schedule_slp (vec_info *vinfo)
+   slp_instance instance;
+   unsigned int i;
+ 
+-  scalar_stmts_to_slp_tree_map_t *bst_map
+-    = new scalar_stmts_to_slp_tree_map_t ();
+   slp_instances = vinfo->slp_instances;
+   FOR_EACH_VEC_ELT (slp_instances, i, instance)
+     {
+       slp_tree node = SLP_INSTANCE_TREE (instance);
+       /* Schedule the tree of INSTANCE.  */
+-      vect_schedule_slp_instance (node, instance, bst_map);
++      vect_schedule_slp_instance (node, instance);
+ 
+       if (SLP_INSTANCE_ROOT_STMT (instance))
+ 	vectorize_slp_instance_root_stmt (node, instance);
+@@ -4313,7 +4287,6 @@ vect_schedule_slp (vec_info *vinfo)
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+                          "vectorizing stmts using SLP.\n");
+     }
+-  delete bst_map;
+ 
+   FOR_EACH_VEC_ELT (slp_instances, i, instance)
+     {
diff --git a/fix-ICE-in-vect_stmt_to_vectorize.patch b/fix-ICE-in-vect_stmt_to_vectorize.patch
new file mode 100644
index 0000000..67c9818
--- /dev/null
+++ b/fix-ICE-in-vect_stmt_to_vectorize.patch
@@ -0,0 +1,41 @@
+diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92252.c b/gcc/testsuite/gcc.dg/torture/pr92252.c
+--- a/gcc/testsuite/gcc.dg/torture/pr92252.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/torture/pr92252.c	2020-07-03 10:39:44.808000000 +0800
+@@ -0,0 +1,23 @@
++/* { do-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++long int ar;
++int dt;
++
++long int
++pc (unsigned long int q3, int zw)
++{
++  long int em = 0;
++
++  while (zw < 1)
++    {
++      q3 = zw * 2ul;
++      if (q3 != 0)
++        for (ar = 0; ar < 2; ++ar)
++          em = dt;
++
++      ++zw;
++    }
++
++  return em;
++}
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-07-03 10:35:59.876000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-07-03 10:39:44.808000000 +0800
+@@ -581,6 +581,10 @@ again:
+ 	    {
+ 	      swap_ssa_operands (stmt, gimple_assign_rhs2_ptr (stmt),
+ 				 gimple_assign_rhs3_ptr (stmt));
++	      if (STMT_VINFO_REDUC_IDX (stmt_info) == 1)
++		STMT_VINFO_REDUC_IDX (stmt_info) = 2;
++	      else if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
++		STMT_VINFO_REDUC_IDX (stmt_info) = 1;
+ 	      bool honor_nans = HONOR_NANS (TREE_OPERAND (cond, 0));
+ 	      code = invert_tree_comparison (TREE_CODE (cond), honor_nans);
+ 	      gcc_assert (code != ERROR_MARK);
diff --git a/fix-ICE-in-vect_transform_stmt.patch b/fix-ICE-in-vect_transform_stmt.patch
new file mode 100644
index 0000000..9433155
--- /dev/null
+++ b/fix-ICE-in-vect_transform_stmt.patch
@@ -0,0 +1,96 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-fortran-92094-ice-in-vect_transform_stmt-at-tr.patch
+c30587c0200f52f8845a5aea21bd7bef6cbe0bf4
+
+diff -Nurp a/gcc/testsuite/gfortran.dg/pr92094.f90 b/gcc/testsuite/gfortran.dg/pr92094.f90
+--- a/gcc/testsuite/gfortran.dg/pr92094.f90	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gfortran.dg/pr92094.f90	2020-08-18 14:25:12.392000000 +0800
+@@ -0,0 +1,28 @@
++! { dg-do compile }
++! { dg-options "-O3" }
++      subroutine hesfcn(n, x, h, ldh)
++      integer n,ldh
++      double precision x(n), h(ldh)
++
++      integer i,j,k,kj
++      double precision th,u1,u2,v2
++ 
++      kj = 0
++      do 770 j = 1, n
++         kj = kj - j
++         do 760 k = 1, j
++            kj = kj + 1
++            v2 = 2 * x(k) - 1
++            u1 = 0
++            u2 = 2
++            do 750 i = 1, n
++               h(kj) = h(kj) + u2
++               th = 4 * v2 + u2 - u1
++               u1 = u2
++               u2 = th
++               th = v2 - 1
++  750       continue
++  760    continue
++  770 continue
++
++      end
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-18 14:19:43.784000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-18 14:25:12.396000000 +0800
+@@ -5891,20 +5891,9 @@ vectorizable_reduction (stmt_vec_info st
+   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
+     {
+       if (is_a <gphi *> (stmt_info->stmt))
+-	{
+-	  /* Analysis for double-reduction is done on the outer
+-	     loop PHI, nested cycles have no further restrictions.  */
+-	  STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+-	  /* For nested cycles we want to let regular vectorizable_*
+-	     routines handle code-generation.  */
+-	  if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_double_reduction_def)
+-	    {
+-	      stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
+-	      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
+-	      STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (stmt_info))
+-		= vect_internal_def;
+-	    }
+-	}
++	/* Analysis for double-reduction is done on the outer
++	   loop PHI, nested cycles have no further restrictions.  */
++	STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
+       else
+ 	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+       return true;
+diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2020-08-18 14:19:45.556000000 +0800
++++ b/gcc/tree-vect-stmts.c	2020-08-18 14:25:12.396000000 +0800
+@@ -10224,13 +10224,16 @@ vect_transform_stmt (stmt_vec_info stmt_
+       && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
+     {
+       gphi *phi;
++      edge e;
+       if (!slp_node
+ 	  && (phi = dyn_cast <gphi *>
+ 		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
+ 	  && dominated_by_p (CDI_DOMINATORS,
+-			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)))
++			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi))
++	  && (e = loop_latch_edge (gimple_bb (phi)->loop_father))
++	  && (PHI_ARG_DEF_FROM_EDGE (phi, e)
++	      == gimple_get_lhs (orig_stmt_info->stmt)))
+ 	{
+-	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
+ 	  stmt_vec_info phi_info
+ 	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
+ 	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+@@ -10250,7 +10253,7 @@ vect_transform_stmt (stmt_vec_info stmt_
+ 	{
+ 	  slp_tree phi_node = slp_node_instance->reduc_phis;
+ 	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
+-	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
++	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
+ 	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
+ 		      == SLP_TREE_VEC_STMTS (slp_node).length ());
+ 	  for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
diff --git a/fix-ICE-in-vectorizable-load.patch b/fix-ICE-in-vectorizable-load.patch
index 690ce6c..bb31637 100644
--- a/fix-ICE-in-vectorizable-load.patch
+++ b/fix-ICE-in-vectorizable-load.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree. 
+The commit id of these patchs list as following in the order of time.
+
+0001-vect-ICE-in-vectorizable_load-at-tree-vect-stmts.c-9.patch:
+f14b41d27124601284347a10d496362c8b4b8e1c
+
 diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr94398.c b/gcc/testsuite/gcc.target/aarch64/pr94398.c
 --- a/gcc/testsuite/gcc.target/aarch64/pr94398.c	1970-01-01 08:00:00.000000000 +0800
 +++ b/gcc/testsuite/gcc.target/aarch64/pr94398.c	2020-04-17 17:15:58.176000000 +0800
diff --git a/fix-ICE-in-vectorizable_condition.patch b/fix-ICE-in-vectorizable_condition.patch
new file mode 100644
index 0000000..98f367e
--- /dev/null
+++ b/fix-ICE-in-vectorizable_condition.patch
@@ -0,0 +1,18 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-Fix-reduc_index-calculation-in-vectorizable_conditio.patch
+1d149b7260bcc4c0c6367b3aea47a8b91a1cf345
+
+diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2020-08-18 19:35:06.352000000 +0800
++++ b/gcc/tree-vect-stmts.c	2020-08-18 19:35:20.792000000 +0800
+@@ -9077,7 +9077,7 @@ vectorizable_condition (stmt_vec_info st
+ 	return false;
+       reduc_info = info_for_reduction (stmt_info);
+       reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
+-      reduc_index = STMT_VINFO_REDUC_IDX (reduc_info);
++      reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
+       gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
+ 		  || reduc_index != -1);
+     }
diff --git a/fix-ICE-in-verify_ssa.patch b/fix-ICE-in-verify_ssa.patch
new file mode 100644
index 0000000..056c276
--- /dev/null
+++ b/fix-ICE-in-verify_ssa.patch
@@ -0,0 +1,41 @@
+diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr92461.c b/gcc/testsuite/gcc.dg/torture/pr92461.c
+--- a/gcc/testsuite/gcc.dg/torture/pr92461.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/torture/pr92461.c	2020-07-28 19:48:09.324000000 +0800
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++short int zb;
++
++void
++gs (void)
++{
++  while (zb < 1)
++    {
++      int at;
++
++      zb %= 1;
++
++      for (at = 0; at < 56; ++at)
++	zb += zb;
++
++      ++zb;
++    }
++}
+diff -Nurp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-07-28 19:47:53.896000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-07-28 19:48:09.324000000 +0800
+@@ -5459,8 +5459,11 @@ vect_create_epilog_for_reduction (stmt_v
+           orig_name = PHI_RESULT (exit_phi);
+           scalar_result = scalar_results[k];
+           FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
+-            FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
+-              SET_USE (use_p, scalar_result);
++	    {
++	      FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
++		SET_USE (use_p, scalar_result);
++	      update_stmt (use_stmt);
++	    }
+         }
+ 
+       phis.release ();
diff --git a/fix-ICE-statement-uses-released-SSA-name.patch b/fix-ICE-statement-uses-released-SSA-name.patch
new file mode 100644
index 0000000..06107b4
--- /dev/null
+++ b/fix-ICE-statement-uses-released-SSA-name.patch
@@ -0,0 +1,109 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-ssa-sccvn.c-class-pass_fre-Add-may_iterate-pass.patch
+744fd446c321f78f9a1ce4ef5f83df8dcfa44a9e
+
+diff -Nurp a/gcc/passes.def b/gcc/passes.def
+--- a/gcc/passes.def	2020-08-17 09:46:40.340000000 +0800
++++ b/gcc/passes.def	2020-08-17 10:09:10.808000000 +0800
+@@ -83,7 +83,7 @@ along with GCC; see the file COPYING3.
+ 	  /* pass_build_ealias is a dummy pass that ensures that we
+ 	     execute TODO_rebuild_alias at this point.  */
+ 	  NEXT_PASS (pass_build_ealias);
+-	  NEXT_PASS (pass_fre);
++	  NEXT_PASS (pass_fre, true /* may_iterate */);
+ 	  NEXT_PASS (pass_early_vrp);
+ 	  NEXT_PASS (pass_merge_phi);
+           NEXT_PASS (pass_dse);
+@@ -117,7 +117,7 @@ along with GCC; see the file COPYING3.
+ 	  NEXT_PASS (pass_oacc_kernels);
+ 	  PUSH_INSERT_PASSES_WITHIN (pass_oacc_kernels)
+ 	      NEXT_PASS (pass_ch);
+-	      NEXT_PASS (pass_fre);
++	      NEXT_PASS (pass_fre, true /* may_iterate */);
+ 	      /* We use pass_lim to rewrite in-memory iteration and reduction
+ 		 variable accesses in loops into local variables accesses.  */
+ 	      NEXT_PASS (pass_lim);
+@@ -201,7 +201,7 @@ along with GCC; see the file COPYING3.
+ 	 execute TODO_rebuild_alias at this point.  */
+       NEXT_PASS (pass_build_alias);
+       NEXT_PASS (pass_return_slot);
+-      NEXT_PASS (pass_fre);
++      NEXT_PASS (pass_fre, true /* may_iterate */);
+       NEXT_PASS (pass_merge_phi);
+       NEXT_PASS (pass_thread_jumps);
+       NEXT_PASS (pass_vrp, true /* warn_array_bounds_p */);
+@@ -312,6 +312,7 @@ along with GCC; see the file COPYING3.
+       NEXT_PASS (pass_strength_reduction);
+       NEXT_PASS (pass_split_paths);
+       NEXT_PASS (pass_tracer);
++      NEXT_PASS (pass_fre, false /* may_iterate */);
+       NEXT_PASS (pass_thread_jumps);
+       NEXT_PASS (pass_dominator, false /* may_peel_loop_headers_p */);
+       NEXT_PASS (pass_strlen);
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c	2020-08-17 09:46:41.332000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr77445-2.c	2020-08-17 10:09:10.808000000 +0800
+@@ -125,7 +125,7 @@ enum STATES FMS( u8 **in , u32 *transiti
+    jump threading opportunities.  Skip the later tests on aarch64.  */
+ /* { dg-final { scan-tree-dump "Jumps threaded: 1\[1-9\]" "thread1" } } */
+ /* { dg-final { scan-tree-dump-times "Invalid sum" 3 "thread1" } } */
+-/* { dg-final { scan-tree-dump-not "not considered" "thread1" } } */
+-/* { dg-final { scan-tree-dump-not "not considered" "thread2" } } */
+-/* { dg-final { scan-tree-dump-not "not considered" "thread3" { target { ! aarch64*-*-* } } } } */
+-/* { dg-final { scan-tree-dump-not "not considered" "thread4" { target { ! aarch64*-*-* } } } } */ 
++/* { dg-final { scan-tree-dump-not "optimizing for size" "thread1" } } */
++/* { dg-final { scan-tree-dump-not "optimizing for size" "thread2" } } */
++/* { dg-final { scan-tree-dump-not "optimizing for size" "thread3" { target { ! aarch64*-*-* } } } } */
++/* { dg-final { scan-tree-dump-not "optimizing for size" "thread4" { target { ! aarch64*-*-* } } } } */ 
+diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+--- a/gcc/tree-ssa-sccvn.c	2020-08-17 09:46:42.212000000 +0800
++++ b/gcc/tree-ssa-sccvn.c	2020-08-17 10:09:10.808000000 +0800
+@@ -7232,14 +7232,24 @@ class pass_fre : public gimple_opt_pass
+ {
+ public:
+   pass_fre (gcc::context *ctxt)
+-    : gimple_opt_pass (pass_data_fre, ctxt)
++    : gimple_opt_pass (pass_data_fre, ctxt), may_iterate (true)
+   {}
+ 
+   /* opt_pass methods: */
+   opt_pass * clone () { return new pass_fre (m_ctxt); }
+-  virtual bool gate (function *) { return flag_tree_fre != 0; }
++  void set_pass_param (unsigned int n, bool param)
++    {
++      gcc_assert (n == 0);
++      may_iterate = param;
++    }
++  virtual bool gate (function *)
++    {
++      return flag_tree_fre != 0 && (may_iterate || optimize > 1);
++    }
+   virtual unsigned int execute (function *);
+ 
++private:
++  bool may_iterate;
+ }; // class pass_fre
+ 
+ unsigned int
+@@ -7248,15 +7258,16 @@ pass_fre::execute (function *fun)
+   unsigned todo = 0;
+ 
+   /* At -O[1g] use the cheap non-iterating mode.  */
++  bool iterate_p = may_iterate && (optimize > 1);
+   calculate_dominance_info (CDI_DOMINATORS);
+-  if (optimize > 1)
++  if (iterate_p)
+     loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+ 
+   default_vn_walk_kind = VN_WALKREWRITE;
+-  todo = do_rpo_vn (fun, NULL, NULL, optimize > 1, true);
++  todo = do_rpo_vn (fun, NULL, NULL, iterate_p, true);
+   free_rpo_vn ();
+ 
+-  if (optimize > 1)
++  if (iterate_p)
+     loop_optimizer_finalize ();
+ 
+   return todo;
diff --git a/fix-ICE-when-vectorizing-nested-cycles.patch b/fix-ICE-when-vectorizing-nested-cycles.patch
new file mode 100644
index 0000000..d8a5b69
--- /dev/null
+++ b/fix-ICE-when-vectorizing-nested-cycles.patch
@@ -0,0 +1,145 @@
+diff -uprN a/gcc/testsuite/gcc.dg/vect/pr96698.c b/gcc/testsuite/gcc.dg/vect/pr96698.c
+--- a/gcc/testsuite/gcc.dg/vect/pr96698.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/pr96698.c	2020-08-27 17:53:24.396000000 +0800
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++
++void test(int a, int* i)
++{
++  for (; a < 5; ++a)
++    {
++      int b = 0;
++      int c = 0;
++      for (; b != -11; b--)
++	for (int d = 0; d ==0; d++)
++	  {
++	    *i += c & a;
++	    c = b;
++	  }
++    }
++}
++
++/* We should be able to vectorize the inner cycle.  */
++/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target vect_int } } } */
+diff -uprN a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+--- a/gcc/tree-vect-loop.c	2020-08-27 09:25:58.000000000 +0800
++++ b/gcc/tree-vect-loop.c	2020-08-27 18:41:41.016000000 +0800
+@@ -4325,7 +4325,8 @@ info_for_reduction (stmt_vec_info stmt_i
+ {
+   stmt_info = vect_orig_stmt (stmt_info);
+   gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
+-  if (!is_a <gphi *> (stmt_info->stmt))
++  if (!is_a <gphi *> (stmt_info->stmt)
++      || !VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
+     stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
+   gphi *phi = as_a <gphi *> (stmt_info->stmt);
+   if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+@@ -8622,6 +8623,43 @@ vect_transform_loop (loop_vec_info loop_
+ 	    }
+ 	}
+ 
++      /* Fill in backedge defs of reductions.  */
++      for (unsigned i = 0; i < loop_vinfo->reduc_latch_defs.length (); ++i)
++	{
++	  stmt_vec_info stmt_info = loop_vinfo->reduc_latch_defs[i];
++	  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
++	  stmt_vec_info phi_info
++	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
++	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
++	  gphi *phi
++	    = dyn_cast <gphi *> (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt);
++	  edge e = loop_latch_edge (gimple_bb (phi_info->stmt)->loop_father);
++	  do
++	    {
++	      add_phi_arg (as_a <gphi *> (phi_info->stmt),
++			   gimple_get_lhs (vec_stmt->stmt), e,
++			   gimple_phi_arg_location (phi, e->dest_idx));
++	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
++	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
++	    }
++	  while (phi_info);
++	  gcc_assert (!vec_stmt);
++	}
++      for (unsigned i = 0; i < loop_vinfo->reduc_latch_slp_defs.length (); ++i)
++	{
++	  slp_tree slp_node = loop_vinfo->reduc_latch_slp_defs[i].first;
++	  slp_tree phi_node = loop_vinfo->reduc_latch_slp_defs[i].second;
++	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
++	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
++	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
++		      == SLP_TREE_VEC_STMTS (slp_node).length ());
++	  for (unsigned j = 0; j < SLP_TREE_VEC_STMTS (phi_node).length (); ++j)
++	    add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[j]->stmt),
++			 gimple_get_lhs
++			     (SLP_TREE_VEC_STMTS (slp_node)[j]->stmt),
++			 e, gimple_phi_arg_location (phi, e->dest_idx));
++	}
++
+       /* Stub out scalar statements that must not survive vectorization.
+ 	 Doing this here helps with grouped statements, or statements that
+ 	 are involved in patterns.  */
+diff -uprN a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+--- a/gcc/tree-vectorizer.h	2020-08-27 09:25:57.000000000 +0800
++++ b/gcc/tree-vectorizer.h	2020-08-27 17:53:24.400000000 +0800
+@@ -575,6 +575,11 @@ typedef struct _loop_vec_info : public v
+      stmt in the chain.  */
+   auto_vec<stmt_vec_info> reduction_chains;
+ 
++  /* The vectorized stmts defining the latch values of the reduction
++     they are involved with.  */
++  auto_vec<stmt_vec_info> reduc_latch_defs;
++  auto_vec<std::pair<slp_tree, slp_tree> > reduc_latch_slp_defs;
++
+   /* Cost vector for a single scalar iteration.  */
+   auto_vec<stmt_info_for_cost> scalar_cost_vec;
+ 
+diff -uprN a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2020-08-27 09:25:58.000000000 +0800
++++ b/gcc/tree-vect-stmts.c	2020-08-27 17:53:24.400000000 +0800
+@@ -10213,8 +10213,8 @@ vect_transform_stmt (stmt_vec_info stmt_
+   if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
+     return is_store;
+ 
+-  /* If this stmt defines a value used on a backedge, update the
+-     vectorized PHIs.  */
++  /* If this stmt defines a value used on a backedge, record it so
++     we can update the vectorized PHIs later.  */
+   stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
+   stmt_vec_info reduc_info;
+   if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
+@@ -10234,32 +10234,13 @@ vect_transform_stmt (stmt_vec_info stmt_
+ 	  && (PHI_ARG_DEF_FROM_EDGE (phi, e)
+ 	      == gimple_get_lhs (orig_stmt_info->stmt)))
+ 	{
+-	  stmt_vec_info phi_info
+-	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
+-	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
+-	  do
+-	    {
+-	      add_phi_arg (as_a <gphi *> (phi_info->stmt),
+-			   gimple_get_lhs (vec_stmt->stmt), e,
+-			   gimple_phi_arg_location (phi, e->dest_idx));
+-	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
+-	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
+-	    }
+-	  while (phi_info);
+-	  gcc_assert (!vec_stmt);
++	  as_a <loop_vec_info> (vinfo)->reduc_latch_defs.safe_push (stmt_info);
+ 	}
+       else if (slp_node
+ 	       && slp_node != slp_node_instance->reduc_phis)
+ 	{
+-	  slp_tree phi_node = slp_node_instance->reduc_phis;
+-	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
+-	  e = loop_latch_edge (gimple_bb (phi)->loop_father);
+-	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
+-		      == SLP_TREE_VEC_STMTS (slp_node).length ());
+-	  for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
+-	    add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
+-			 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
+-			 e, gimple_phi_arg_location (phi, e->dest_idx));
++	  as_a <loop_vec_info> (vinfo)->reduc_latch_slp_defs.safe_push
++	    (std::make_pair (slp_node, slp_node_instance->reduc_phis));
+ 	}
+     }
+ 
diff --git a/fix-SSA-update-for-vectorizer-epilogue.patch b/fix-SSA-update-for-vectorizer-epilogue.patch
new file mode 100644
index 0000000..96469b6
--- /dev/null
+++ b/fix-SSA-update-for-vectorizer-epilogue.patch
@@ -0,0 +1,47 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-optimization-95717-fix-SSA-update-for-vectorize.patch
+d0909f5858ad81e6d8b73fa6193be19cb5e6ed7b
+
+diff -Nurp a/gcc/testsuite/g++.dg/torture/pr95717.C b/gcc/testsuite/g++.dg/torture/pr95717.C
+--- a/gcc/testsuite/g++.dg/torture/pr95717.C	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/g++.dg/torture/pr95717.C	2020-08-24 21:45:48.436000000 +0800
+@@ -0,0 +1,12 @@
++// { dg-do compile }
++
++bool a;
++extern bool b[];
++long c, d;
++int *f;
++void g(bool h)
++{
++  for (short e = 0; e < c; e = 4)
++    for (; d; d++)
++      b[d] = a = f[d] ? c ? h : 0 : h;
++}
+diff -Nurp a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
+--- a/gcc/tree-vect-loop-manip.c	2020-08-24 21:45:23.620000000 +0800
++++ b/gcc/tree-vect-loop-manip.c	2020-08-24 21:45:48.436000000 +0800
+@@ -1073,6 +1073,10 @@ slpeel_tree_duplicate_loop_to_edge_cfg (
+ 
+   add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
+ 
++  /* Skip new preheader since it's deleted if copy loop is added at entry.  */
++  for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
++    rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
++
+   if (scalar_loop != loop)
+     {
+       /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
+@@ -1150,10 +1154,6 @@ slpeel_tree_duplicate_loop_to_edge_cfg (
+ 			       loop_preheader_edge (new_loop)->src);
+     }
+ 
+-  /* Skip new preheader since it's deleted if copy loop is added at entry.  */
+-  for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
+-    rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
+-
+   if (scalar_loop != loop)
+     {
+       /* Update new_loop->header PHIs, so that on the preheader
diff --git a/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch b/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
index fccdea5..9f596ca 100644
--- a/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
+++ b/fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-aarch64-Fix-SYMBOL_TINY_GOT-handling-for-ILP32-PR942.patch:
+d91480dee934478063fe5945b73ff3c108e40a91
+
 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
 index b0cbb6e2d55..58d38f74bde 100644
 --- a/gcc/config/aarch64/aarch64.c
diff --git a/fix-cost-of-plus.patch b/fix-cost-of-plus.patch
index 7a34072..7edb1b1 100644
--- a/fix-cost-of-plus.patch
+++ b/fix-cost-of-plus.patch
@@ -1,3 +1,6 @@
+AArch64-Fix-cost-of-plus-.-const_int-C.patch:
+commit 835d50c66aa5bde2f354a6e63a2afa7d2f76a05a
+
 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
 index 56a4a47db73..71d44de1d0a 100644
 --- a/gcc/config/aarch64/aarch64.c
diff --git a/fix-do-not-build-op.patch b/fix-do-not-build-op.patch
new file mode 100644
index 0000000..d3a59d4
--- /dev/null
+++ b/fix-do-not-build-op.patch
@@ -0,0 +1,27 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-vect-slp.c-vect_build_slp_tree_2-Do-not-build-o.patch
+f99d62629933adf91e7e0bc1b1ff344ffb68e1a2
+
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-08-24 21:31:24.780000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-08-24 21:31:53.516000000 +0800
+@@ -1326,7 +1326,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 	      slp_tree grandchild;
+ 
+ 	      FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+-		if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
++		if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
+ 		  break;
+ 	      if (!grandchild)
+ 		{
+@@ -1486,7 +1486,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 		  slp_tree grandchild;
+ 
+ 		  FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
+-		    if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
++		    if (SLP_TREE_DEF_TYPE (grandchild) != vect_external_def)
+ 		      break;
+ 		  if (!grandchild)
+ 		    {
diff --git a/fix-load-eliding-in-SM.patch b/fix-load-eliding-in-SM.patch
new file mode 100644
index 0000000..5e25a3d
--- /dev/null
+++ b/fix-load-eliding-in-SM.patch
@@ -0,0 +1,55 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-optimization-94949-fix-load-eliding-in-SM.patch
+0424a5ece5307cc22bbc0fe97edf4707d7a798ed
+
+diff -Nurp a/gcc/testsuite/gcc.dg/torture/pr94949.c b/gcc/testsuite/gcc.dg/torture/pr94949.c
+--- a/gcc/testsuite/gcc.dg/torture/pr94949.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/torture/pr94949.c	2020-08-24 21:40:32.208000000 +0800
+@@ -0,0 +1,17 @@
++/* { dg-do run } */
++/* { dg-additional-options "-fallow-store-data-races" } */
++
++static int x = 1;
++static volatile int y = -1;
++int
++main()
++{
++  for (int i = 0; i < 128; ++i)
++    {
++      if (i == y)
++	x = i;
++    }
++  if (x != 1)
++    __builtin_abort ();
++  return 0;
++}
+diff -Nurp a/gcc/tree-ssa-loop-im.c b/gcc/tree-ssa-loop-im.c
+--- a/gcc/tree-ssa-loop-im.c	2020-08-24 21:40:14.164000000 +0800
++++ b/gcc/tree-ssa-loop-im.c	2020-08-24 21:40:32.208000000 +0800
+@@ -2115,9 +2115,9 @@ execute_sm (struct loop *loop, vec<edge>
+   fmt_data.orig_loop = loop;
+   for_each_index (&ref->mem.ref, force_move_till, &fmt_data);
+ 
++  bool always_stored = ref_always_accessed_p (loop, ref, true);
+   if (bb_in_transaction (loop_preheader_edge (loop)->src)
+-      || (! flag_store_data_races
+-	  && ! ref_always_accessed_p (loop, ref, true)))
++      || (! flag_store_data_races && ! always_stored))
+     multi_threaded_model_p = true;
+ 
+   if (multi_threaded_model_p)
+@@ -2132,8 +2132,10 @@ execute_sm (struct loop *loop, vec<edge>
+ 
+   /* Avoid doing a load if there was no load of the ref in the loop.
+      Esp. when the ref is not always stored we cannot optimize it
+-     away later.  */
+-  if (ref->loaded && bitmap_bit_p (ref->loaded, loop->num))
++     away later.  But when it is not always stored we must use a conditional
++     store then.  */
++  if ((!always_stored && !multi_threaded_model_p)
++      || (ref->loaded && bitmap_bit_p (ref->loaded, loop->num)))
+     {
+       load = gimple_build_assign (tmp_var, unshare_expr (ref->mem.ref));
+       lim_data = init_lim_data (load);
diff --git a/fix-regno-out-of-range.patch b/fix-regno-out-of-range.patch
index aa8aaa5..cf2746b 100644
--- a/fix-regno-out-of-range.patch
+++ b/fix-regno-out-of-range.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree. 
+The commit id of these patchs list as following in the order of time.
+
+0001-PR93561-bounds-checking-memory-overflow-for-spill_fo.patch:
+d26f37a16e3ed3d75a93ffb1da10c44c36a8a36d
+
 diff -Nurp a/gcc/lra-assigns.c b/gcc/lra-assigns.c
 --- a/gcc/lra-assigns.c	2020-04-17 16:27:46.192000000 +0800
 +++ b/gcc/lra-assigns.c	2020-04-17 16:29:37.125688580 +0800
diff --git a/fix-wrong-vectorizer-code.patch b/fix-wrong-vectorizer-code.patch
new file mode 100644
index 0000000..e3387bc
--- /dev/null
+++ b/fix-wrong-vectorizer-code.patch
@@ -0,0 +1,71 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-bootstrap-92301-Wrong-vectorizer-code-since-r2.patch
+b76f4e6c06bd494d2383c4c16d1e1a034da74641
+
+diff -Nurp a/gcc/testsuite/gcc.dg/pr92301.c b/gcc/testsuite/gcc.dg/pr92301.c
+--- a/gcc/testsuite/gcc.dg/pr92301.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/pr92301.c	2020-08-24 21:36:23.556000000 +0800
+@@ -0,0 +1,35 @@
++/* { dg-do run } */
++/* { dg-options "-O3" } */
++
++unsigned int m;
++
++#define N 128
++unsigned int a[N];
++
++unsigned int
++__attribute__((noipa))
++df_count_refs (_Bool include_defs)
++{
++  int size = 0;
++
++  for (unsigned int regno = 0; regno < m; regno++)
++    if (include_defs)
++      size += a[regno];
++  return size;
++}
++
++int main(int argc, char **argv)
++{
++  for (unsigned i = 0; i < N; i++)
++    a[i] = i;
++
++  if (argc == 1)
++    m = 17;
++
++  unsigned int r = df_count_refs(1);
++  __builtin_printf ("r: %d\n", r);
++  if (r != 136)
++    __builtin_abort ();
++
++  return 0;
++}
+diff -Nurp a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+--- a/gcc/tree-vect-stmts.c	2020-08-24 21:35:23.664000000 +0800
++++ b/gcc/tree-vect-stmts.c	2020-08-24 21:36:23.556000000 +0800
+@@ -474,6 +474,22 @@ process_use (stmt_vec_info stmt_vinfo, t
+   basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
+   basic_block bb = gimple_bb (stmt_vinfo->stmt);
+ 
++  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
++     We have to force the stmt live since the epilogue loop needs it to
++     continue computing the reduction.  */
++  if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
++      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
++      && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
++      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
++      && bb->loop_father == def_bb->loop_father)
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "reduc-stmt defining reduc-phi in the same nest.\n");
++      vect_mark_relevant (worklist, dstmt_vinfo, relevant, true);
++      return opt_result::success ();
++    }
++
+   /* case 3a: outer-loop stmt defining an inner-loop stmt:
+ 	outer-loop-header-bb:
+ 		d = dstmt_vinfo
diff --git a/gcc.spec b/gcc.spec
index 306aba1..a0e5fc2 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -1,4 +1,4 @@
-%global DATE 20200629
+%global DATE 20200828
 
 %global gcc_version 9.3.1
 %global gcc_major 9.3.1
@@ -112,27 +112,69 @@ Provides: bundled(libiberty)
 Provides: gcc(major) = %{gcc_major}
 
 Patch0: enable-aarch64-libquadmath.patch
-Patch1: generate-csel.patch
-Patch2: delete-incorrect-smw.patch
-Patch3: remove-array-index-inliner-hint.patch
-Patch4: ivopts-1.patch
-Patch5: ivopts-2.patch
-Patch6: dont-generate-IF_THEN_ELSE.patch
-Patch7: fix-cost-of-plus.patch
-Patch8: div-opti.patch
-Patch9: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
-Patch10: fix-ICE-during-pass-ccp.patch
-Patch11: loop-split.patch
-Patch12: loop-finite.patch
-Patch13: loop-finite-bugfix.patch
-Patch14: fix-regno-out-of-range.patch
-Patch15: fix-ICE-in-vectorizable-load.patch
-Patch16: address-calculation-optimization-within-loop.patch
-Patch17: skip-debug-insns-when-computing-inline-costs.patch
-Patch18: change-gcc-BASE-VER.patch
-Patch19: PR92303-Try-to-simplify-memory-subreg.patch
-Patch20: Fix-PR94185.patch
-Patch21: testsuite-Fix-pr94185.patch
+Patch1: medium-code-mode.patch
+Patch2: generate-csel.patch
+Patch3: delete-incorrect-smw.patch
+Patch4: remove-array-index-inliner-hint.patch
+Patch5: ivopts-1.patch
+Patch6: ivopts-2.patch
+Patch7: dont-generate-IF_THEN_ELSE.patch
+Patch8: fix-cost-of-plus.patch
+Patch9: div-opti.patch
+Patch10: fix-SYMBOL_TINY_GOT-handling-for-ILP32.patch
+Patch11: fix-ICE-during-pass-ccp.patch
+Patch12: loop-split.patch
+Patch13: loop-finite.patch
+Patch14: loop-finite-bugfix.patch
+Patch15: fix-regno-out-of-range.patch
+Patch16: fix-ICE-in-vectorizable-load.patch
+Patch17: address-calculation-optimization-within-loop.patch
+Patch18: skip-debug-insns-when-computing-inline-costs.patch
+Patch19: ipa-const-prop.patch
+Patch20: ipa-const-prop-self-recursion-bugfix.patch
+Patch21: change-gcc-BASE-VER.patch
+Patch22: add-option-fallow-store-data-races.patch
+Patch23: tighten-range-for-generating-csel.patch
+Patch24: generate-csel-for-arrayref.patch
+Patch25: vectorization-enhancement.patch
+Patch26: ipa-struct-reorg.patch
+Patch27: ipa-struct-reorg-bugfix.patch
+Patch28: enable-simd-math.patch
+Patch29: complete-struct-reorg.patch
+Patch30: reductions-slp-enhancement.patch
+Patch31: cse-in-vectorization.patch
+Patch32: PR92303-Try-to-simplify-memory-subreg.patch
+Patch33: Fix-PR94185.patch
+Patch34: testsuite-Fix-pr94185.patch
+Patch35: fix-ICE-in-vect_stmt_to_vectorize.patch
+Patch36: add-checks-to-avoid-spoiling-if-conversion.patch
+Patch37: fix-ICE-in-vect_create_epilog_for_reduction.patch
+Patch38: fix-ICE-in-compute_live_loop_exits.patch
+Patch39: fix-ICE-in-store_constructor.patch
+Patch40: fix-ICE-in-verify_ssa.patch
+Patch41: fix-ICE-in-reload.patch
+Patch42: fix-ICE-in-declare-return-variable.patch
+Patch43: simplify-removing-subregs.patch
+Patch44: fix-ICE-in-vec.patch
+Patch45: fix-ICE-in-gimple_op.patch
+Patch46: fix-ICE-in-exact_div.patch
+Patch47: fix-ICE-statement-uses-released-SSA-name.patch
+Patch48: fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch
+Patch49: fix-ICE-in-vect_create_epilog_for_reduction_2.patch
+Patch50: fix-ICE-in-vect_slp_analyze_node_operations.patch
+Patch51: fix-ICE-in-vect_create_epilog_for_reduction_3.patch
+Patch52: fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch
+Patch53: fix-ICE-in-vect_transform_stmt.patch
+Patch54: fix-ICE-in-copy_reference_ops_from_ref.patch
+Patch55: fix-ICE-in-vectorizable_condition.patch
+Patch56: reduction-chain-slp-option.patch
+Patch57: fix-ICE-in-model_update_limit_points_in_group.patch
+Patch58: fix-do-not-build-op.patch
+Patch59: fix-wrong-vectorizer-code.patch
+Patch60: fix-load-eliding-in-SM.patch
+Patch61: fix-SSA-update-for-vectorizer-epilogue.patch
+Patch62: fix-ICE-when-vectorizing-nested-cycles.patch
+
 
 %global gcc_target_platform %{_arch}-linux-gnu
 
@@ -596,6 +638,47 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch19 -p1
 %patch20 -p1
 %patch21 -p1
+%patch22 -p1
+%patch23 -p1
+%patch24 -p1
+%patch25 -p1
+%patch26 -p1
+%patch27 -p1
+%patch28 -p1
+%patch29 -p1
+%patch30 -p1
+%patch31 -p1
+%patch32 -p1
+%patch33 -p1
+%patch34 -p1
+%patch35 -p1
+%patch36 -p1
+%patch37 -p1
+%patch38 -p1
+%patch39 -p1
+%patch40 -p1
+%patch41 -p1
+%patch42 -p1
+%patch43 -p1
+%patch44 -p1
+%patch45 -p1
+%patch46 -p1
+%patch47 -p1
+%patch48 -p1
+%patch49 -p1
+%patch50 -p1
+%patch51 -p1
+%patch52 -p1
+%patch53 -p1
+%patch54 -p1
+%patch55 -p1
+%patch56 -p1
+%patch57 -p1
+%patch58 -p1
+%patch59 -p1
+%patch60 -p1
+%patch61 -p1
+%patch62 -p1
 
 
 %build
@@ -2524,6 +2607,49 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Mon Aug 28 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20200828.4
+ - Add add-checks-to-avoid-spoiling-if-conversion.patch
+ - Add add-option-fallow-store-data-races.patch
+ - Add complete-struct-reorg.patch
+ - Add cse-in-vectorization.patch
+ - Add enable-simd-math.patch
+ - Add fix-ICE-avoid-issueing-loads-in-SM-when-possible.patch
+ - Add fix-ICE-in-compute_live_loop_exits.patch
+ - Add fix-ICE-in-copy_reference_ops_from_ref.patch
+ - Add fix-ICE-in-declare-return-variable.patch
+ - Add fix-ICE-in-exact_div.patch
+ - Add fix-ICE-in-gimple_op.patch
+ - Add fix-ICE-in-model_update_limit_points_in_group.patch
+ - Add fix-ICE-in-reload.patch
+ - Add fix-ICE-in-store_constructor.patch
+ - Add fix-ICE-in-vec.patch
+ - Add fix-ICE-in-vect_create_epilog_for_reduction.patch
+ - Add fix-ICE-in-vect_create_epilog_for_reduction_2.patch
+ - Add fix-ICE-in-vect_create_epilog_for_reduction_3.patch
+ - Add fix-ICE-in-vect_get_vec_def_for_stmt_copy.patch
+ - Add fix-ICE-in-vect_slp_analyze_node_operations.patch
+ - Add fix-ICE-in-vect_stmt_to_vectorize.patch
+ - Add fix-ICE-in-vect_transform_stmt.patch
+ - Add fix-ICE-in-vectorizable_condition.patch
+ - Add fix-ICE-in-verify_ssa.patch
+ - Add fix-ICE-statement-uses-released-SSA-name.patch
+ - Add fix-ICE-when-vectorizing-nested-cycles.patch
+ - Add fix-SSA-update-for-vectorizer-epilogue.patch
+ - Add fix-do-not-build-op.patch
+ - Add fix-load-eliding-in-SM.patch
+ - Add fix-wrong-vectorizer-code.patch
+ - Add generate-csel-for-arrayref.patch
+ - Add ipa-const-prop-self-recursion-bugfix.patch
+ - Add ipa-const-prop.patch
+ - Add ipa-struct-reorg-bugfix.patch
+ - Add ipa-struct-reorg.patch
+ - Add medium-code-mode.patch
+ - Add reduction-chain-slp-option.patch
+ - Add reductions-slp-enhancement.patch
+ - Add simplify-removing-subregs.patch
+ - Add tighten-range-for-generating-csel.patch
+ - Add vectorization-enhancement.patch
+
 * Mon Jun 29 2020 eastb233 <xiezhiheng@huawei.com> - 9.3.1-20200629.3
 - gcc.spec: Change release version
 
diff --git a/generate-csel-for-arrayref.patch b/generate-csel-for-arrayref.patch
new file mode 100644
index 0000000..c94311e
--- /dev/null
+++ b/generate-csel-for-arrayref.patch
@@ -0,0 +1,218 @@
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c	2020-05-26 21:03:43.132721856 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c	2020-05-19 20:12:32.655794652 +0800
+@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
+         return a[0]+a[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c	2020-05-26 21:03:43.132721856 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c	2020-05-19 20:12:32.667794652 +0800
+@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
+         return a[0]+a[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c	2020-05-26 21:03:43.132721856 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c	2020-05-19 20:12:32.667794652 +0800
+@@ -13,4 +13,4 @@ int test(int b, int k) {
+     return a.data[0] + a.data[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
+diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
+--- a/gcc/tree-ssa-phiopt.c	2020-05-26 21:03:43.132721856 +0800
++++ b/gcc/tree-ssa-phiopt.c	2020-05-26 21:02:02.872006469 +0800
+@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
+ #include "params.h"
+ #include "case-cfn-macros.h"
+ #include "tree-eh.h"
++#include "inchash.h"
+ 
+ static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
+ static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
+@@ -1984,6 +1985,18 @@ struct name_to_bb
+   basic_block bb;
+ };
+ 
++/* A hash-table of ARRAY_REF with a base of VAR_DECL and an offset of
++   SSA_NAME, and in which basic block it was seen, which would constitute
++   a no-trap region for same accessed.  */
++struct array_ref_to_bb
++{
++  unsigned int ssa_name_ver;
++  unsigned int phase;
++  HOST_WIDE_INT size;
++  tree var_decl;
++  basic_block bb;
++};
++
+ /* Hashtable helpers.  */
+ 
+ struct ssa_names_hasher : free_ptr_hash <name_to_bb>
+@@ -1992,6 +2005,12 @@ struct ssa_names_hasher : free_ptr_hash
+   static inline bool equal (const name_to_bb *, const name_to_bb *);
+ };
+ 
++struct array_refs_hasher : free_ptr_hash <array_ref_to_bb>
++{
++  static inline hashval_t hash (const array_ref_to_bb *);
++  static inline bool equal (const array_ref_to_bb *, const array_ref_to_bb *);
++};
++
+ /* Used for quick clearing of the hash-table when we see calls.
+    Hash entries with phase < nt_call_phase are invalid.  */
+ static unsigned int nt_call_phase;
+@@ -2005,6 +2024,16 @@ ssa_names_hasher::hash (const name_to_bb
+          ^ (n->offset << 6) ^ (n->size << 3);
+ }
+ 
++inline hashval_t
++array_refs_hasher::hash (const array_ref_to_bb *n)
++{
++  inchash::hash hstate (0);
++  hstate.add_int (n->ssa_name_ver);
++  hstate.add_hwi (n->size);
++  hstate.add_ptr (n->var_decl);
++  return hstate.end ();
++}
++
+ /* The equality function of *P1 and *P2.  */
+ 
+ inline bool
+@@ -2016,11 +2045,21 @@ ssa_names_hasher::equal (const name_to_b
+          && n1->size == n2->size;
+ }
+ 
++inline bool
++array_refs_hasher::equal (const array_ref_to_bb *n1, const array_ref_to_bb *n2)
++{
++  return n1->ssa_name_ver == n2->ssa_name_ver
++	 && n1->size == n2->size
++	 && n1->var_decl == n2->var_decl;
++}
++
+ class nontrapping_dom_walker : public dom_walker
+ {
+ public:
+   nontrapping_dom_walker (cdi_direction direction, hash_set<tree> *ps)
+-    : dom_walker (direction), m_nontrapping (ps), m_seen_ssa_names (128) {}
++    : dom_walker (direction), m_nontrapping (ps),
++      m_seen_ssa_names (128), m_seen_array_refs (128)
++	{}
+ 
+   virtual edge before_dom_children (basic_block);
+   virtual void after_dom_children (basic_block);
+@@ -2028,16 +2067,18 @@ public:
+ private:
+ 
+   /* We see the expression EXP in basic block BB.  If it's an interesting
+-     expression (an MEM_REF through an SSA_NAME) possibly insert the
+-     expression into the set NONTRAP or the hash table of seen expressions.
+-     STORE is true if this expression is on the LHS, otherwise it's on
+-     the RHS.  */
++     expression (an MEM_REF through an SSA_NAME or an ARRAY_REF with a base
++     of VAR_DECL and an offset of SSA_NAME) possibly insert the expression
++     into the set NONTRAP or the hash table of seen expressions.  STORE
++     is true if this expression is on the LHS, otherwise it's on the RHS.  */
+   void add_or_mark_expr (basic_block, tree, bool);
++  void add_or_mark_array_ref (basic_block, tree);
+ 
+   hash_set<tree> *m_nontrapping;
+ 
+   /* The hash table for remembering what we've seen.  */
+   hash_table<ssa_names_hasher> m_seen_ssa_names;
++  hash_table<array_refs_hasher> m_seen_array_refs;
+ };
+ 
+ /* Called by walk_dominator_tree, when entering the block BB.  */
+@@ -2071,7 +2112,9 @@ nontrapping_dom_walker::before_dom_child
+       else if (gimple_assign_single_p (stmt) && !gimple_has_volatile_ops (stmt))
+ 	{
+ 	  add_or_mark_expr (bb, gimple_assign_lhs (stmt), true);
++	  add_or_mark_array_ref (bb, gimple_assign_lhs (stmt));
+ 	  add_or_mark_expr (bb, gimple_assign_rhs1 (stmt), false);
++	  add_or_mark_array_ref (bb, gimple_assign_rhs1 (stmt));
+ 	}
+     }
+   return NULL;
+@@ -2148,6 +2191,74 @@ nontrapping_dom_walker::add_or_mark_expr
+ 	    }
+ 	}
+     }
++}
++
++/* We see the expression EXP in basic block BB.  If it's an interesting
++   expression (an ARRAY_REF with a base of VAR_DECL and an offset of
++   SSA_NAME) possibly insert the expression into the set NONTRAP or the
++   hash table of seen expressions.  */
++void
++nontrapping_dom_walker::add_or_mark_array_ref (basic_block bb, tree exp)
++{
++  if (TREE_CODE (exp) == ARRAY_REF
++      && TREE_CODE (TREE_OPERAND (exp, 1)) == SSA_NAME
++      && int_size_in_bytes (TREE_TYPE (exp)) > 0)
++    {
++      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
++      tree base = get_base_address (exp);
++      /* if BASE is a local variable without address-taken, which can't be
++	 read-only, a dominating load can constitute a no-trap region for
++	 a store as well.  */
++      if (TREE_CODE (base) == VAR_DECL
++	  && auto_var_p (base) && !TREE_ADDRESSABLE (base))
++	{
++	  struct array_ref_to_bb array_map;
++	  basic_block found_array_bb = 0;
++
++	  /* Try to find the last seen ARRAY_REF with the same base and
++	     offset, which can trap.  */
++	  array_map.ssa_name_ver = SSA_NAME_VERSION (TREE_OPERAND (exp, 1));
++	  array_map.phase = 0;
++	  array_map.bb = 0;
++	  array_map.size = size;
++	  array_map.var_decl = base;
++
++	  array_ref_to_bb **slot
++		= m_seen_array_refs.find_slot (&array_map, INSERT);
++	  struct array_ref_to_bb *a2bb = *slot;
++	  if (a2bb != NULL && a2bb->phase >= nt_call_phase)
++	    {
++	      found_array_bb = a2bb->bb;
++	    }
++
++	  /* If we've found a trapping MEM_REF, _and_ it dominates EXP
++	     (it's in a basic block on the path from us to the dominator root)
++	     then we can't trap.  */
++	  if (found_array_bb && (((size_t)found_array_bb->aux) & 1) == 1)
++	    {
++	      m_nontrapping->add (exp);
++	    }
++	  else
++	    {
++	      /* EXP might trap, so insert it into the hash table.  */
++	      if (a2bb != NULL)
++		{
++		  a2bb->phase = nt_call_phase;
++		  a2bb->bb = bb;
++		}
++	      else
++		{
++		  a2bb = XNEW (struct array_ref_to_bb);
++		  a2bb->ssa_name_ver = SSA_NAME_VERSION (TREE_OPERAND (exp, 1));
++		  a2bb->phase = nt_call_phase;
++		  a2bb->bb = bb;
++		  a2bb->size = size;
++		  a2bb->var_decl = base;
++		  *slot = a2bb;
++		}
++	    }
++	}
++    }
+ }
+
+ /* This is the entry point of gathering non trapping memory accesses.
diff --git a/generate-csel.patch b/generate-csel.patch
index 41fb032..3aaf261 100644
--- a/generate-csel.patch
+++ b/generate-csel.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-re-PR-tree-optimization-89430-A-missing-ifcvt-optimi.patch
+b9ef6a2e04bfd01329902781818ef80c52cd8b97
+
 diff -uprN a/gcc/testsuite/gcc.dg/graphite/scop-21.c b/gcc/testsuite/gcc.dg/graphite/scop-21.c
 --- a/gcc/testsuite/gcc.dg/graphite/scop-21.c
 +++ b/gcc/testsuite/gcc.dg/graphite/scop-21.c
diff --git a/ipa-const-prop-self-recursion-bugfix.patch b/ipa-const-prop-self-recursion-bugfix.patch
new file mode 100644
index 0000000..9e878a3
--- /dev/null
+++ b/ipa-const-prop-self-recursion-bugfix.patch
@@ -0,0 +1,191 @@
+This patch is backport from gcc-trunk. It is a combined patch from
+
+Find matched aggregate lattice for self-recursive CP (PR ipa/93084)
+https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=42d73fa9d575e3c8c21e88bd7f65922e17b052f1
+
+and 
+
+Do not propagate self-dependent value (PR ipa/93763)
+https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=47772af10c00f7e1e95cd52557fc893dc602a420
+
+adapted the using of parameter to gcc9 style.
+
+diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c
+--- a/gcc/ipa-cp.c	2020-05-23 16:16:58.032000000 +0800
++++ b/gcc/ipa-cp.c	2020-05-22 18:03:41.980000000 +0800
+@@ -1766,8 +1766,8 @@ ipcp_lattice<valtype>::add_value (valtyp
+ }
+ 
+ /* Return true, if a ipcp_value VAL is orginated from parameter value of
+-   self-feeding recursive function by applying non-passthrough arithmetic
+-   transformation.  */
++   self-feeding recursive function via some kind of pass-through jump
++   function.  */
+ 
+ static bool
+ self_recursively_generated_p (ipcp_value<tree> *val)
+@@ -1778,19 +1778,36 @@ self_recursively_generated_p (ipcp_value
+     {
+       cgraph_edge *cs = src->cs;
+ 
+-      if (!src->val || cs->caller != cs->callee->function_symbol ()
+-	  || src->val == val)
++      if (!src->val || cs->caller != cs->callee->function_symbol ())
+ 	return false;
+ 
++      if (src->val == val)
++	continue;
++
+       if (!info)
+ 	info = IPA_NODE_REF (cs->caller);
+ 
+       class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
+ 								src->index);
+-      ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
+-						      : plats->aggs;
++      ipcp_lattice<tree> *src_lat;
+       ipcp_value<tree> *src_val;
+ 
++      if (src->offset == -1)
++	src_lat = &plats->itself;
++      else
++	{
++	  struct ipcp_agg_lattice *src_aglat;
++
++	  for (src_aglat = plats->aggs; src_aglat; src_aglat = src_aglat->next)
++	    if (src_aglat->offset == src->offset)
++	      break;
++
++	  if (!src_aglat)
++	    return false;
++
++	  src_lat = src_aglat;
++	}
++
+       for (src_val = src_lat->values; src_val; src_val = src_val->next)
+ 	if (src_val == val)
+ 	  break;
+@@ -1887,6 +1904,8 @@ propagate_vals_across_arith_jfunc (cgrap
+ 	    val_seeds.safe_push (src_val);
+ 	}
+ 
++      gcc_assert ((int) val_seeds.length ()
++		 <=  PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE));
+       /* Recursively generate lattice values with a limited count.  */
+       FOR_EACH_VEC_ELT (val_seeds, i, src_val)
+ 	{
+diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c
+--- a/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-3.c	2020-05-22 17:55:24.036000000 +0800
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8 --param ipa-cp-eval-threshold=1" } */
++
++struct V {
++  int f0;
++  int f1;
++};
++
++int data[100];
++
++int fn ();
++
++int recur_fn (struct V * __restrict v)
++{
++  int i = v->f0;
++  int j = v->f1;
++  struct V t;
++
++  if (j > 100)
++    {
++      fn ();
++      return 1;
++    }
++
++  data[i] = i;
++
++  t.f0 = i - 2;
++  t.f1 = j + 1;
++
++  recur_fn (&t);
++
++  return i * j;
++}
++
++int main ()
++{
++  struct V v = {1, 3};
++
++  return recur_fn (&v);
++}
++
++/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 8 "cp" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/ipa/pr93763.c b/gcc/testsuite/gcc.dg/ipa/pr93763.c
+--- a/gcc/testsuite/gcc.dg/ipa/pr93763.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/ipa/pr93763.c	2020-05-22 17:57:10.532000000 +0800
+@@ -0,0 +1,46 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++typedef struct a a;
++struct a {
++  a *b
++} d;
++e, k, ah, al;
++f(aa) {
++  if (aa & 1)
++    goto g;
++  f(aa | 2);
++g:
++  h();
++}
++l() {
++  {
++    f(072);
++    i(e, d, 92);
++  }
++}
++ag() {
++  { i(e, d, 36); }
++}
++ai(a *m, a *n, unsigned aa) {
++  f(aa);
++  j(k, l, ah, 1);
++}
++j(int c, a m, int aj, int aa) {
++  int ak = aa;
++  { i(e, d, ak); }
++}
++i(int c, a *m, unsigned aa) {
++  {
++    {             i(c, (*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(
++*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(
++*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*(*m).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b)
++.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b)
++.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b)
++.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b)
++.b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b).b, 0);
++    }
++  }
++  int am = aa;
++  ai(ag, al, am);
++}
+diff -Nurp a/gcc/testsuite/g++.dg/ipa/pr93763.C b/gcc/testsuite/g++.dg/ipa/pr93763.C
+--- a/gcc/testsuite/g++.dg/ipa/pr93763.C	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/g++.dg/ipa/pr93763.C	2020-05-22 17:57:10.532000000 +0800
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-options "-O3" } */
++
++struct search_param {
++  int total;
++};
++void search_trivial(search_param error_left) {
++  search_trivial(error_left);
++  search_param error_left2{error_left};
++  error_left2.total--;
++  search_trivial(error_left2);
++}
++void search_algo_uni(search_param error_left) { search_trivial(error_left); }
++void search_algo(search_param error_left) { search_algo_uni(error_left); }
++int main() { search_algo({}); return 0; }
diff --git a/ipa-const-prop.patch b/ipa-const-prop.patch
new file mode 100644
index 0000000..7cad13f
--- /dev/null
+++ b/ipa-const-prop.patch
@@ -0,0 +1,11040 @@
+This backport contains 50 patchs from gcc main stream tree. 
+The commit id of these patchs list as following in the order of time.
+
+ipa-const-prop-2019-06-10-add-ignore-edge-func.patch:
+commit 97e59627567757759b047479c75be2f238ea45c3
+
+ipa-const-prop-2019-06-14-prop-by-ref-to-callee.patch:
+commit 46771da57463c62f66af32e9189f1b6fb8bbe8c7
+
+ipa-const-prop-2019-07-05-add-tbaa-para.patch:
+ipa-const-prop-2019-07-05-add-tbaa-para-conflict-fix.patch
+commit fb4697e30bd0cd4bda66932e21c183273a5d1e63
+
+ipa-const-prop-2019-07-08-bugfix-drop-useless-instr.patch:
+ipa-const-prop-2019-07-08-bugfix-drop-useless-instr-conflict-fix.patch
+commit 38988cbf9ebaa96fb1e891a46aa063f0c298a2e2
+
+ipa-const-prop-2019-07-09-ipa-cp-class-change.patch
+ipa-const-prop-2019-07-09-ipa-fnsummary-class-change.patch
+ipa-const-prop-2019-07-09-ipa-inline-analysis-class-change.patch
+ipa-const-prop-2019-07-09-ipa-prop-class-change.patch
+ipa-const-prop-2019-07-09-ipa-prop-class-change-conflic-fix.patch
+ipa-const-prop-2019-07-09-ipa-predicate-class-change.patch
+commit 99b1c316ec974a39bdd949f8559bb28861b69592
+
+ipa-const-prop-2019-08-07-change-to-poly_64.patch:
+commit 8600364582f24d2a3f227111c6a87b7d98561c69
+
+ipa-const-prop-2019-08-12-bugfix-add-condition-fix.patch:
+commit 52c9b7face987062527c612e0a65f084e43c85fd
+
+ipa-const-prop-2019-09-17-new-para-ipa-max-switch.patch:
+commit 351e7c3b5fbd45bde3efb601f7fee9a31c4f2063
+
+ipa-const-prop-2019-09-19-auto-switch-predicate.patch:
+commit efe126563bb8d28cb3958423a735d0021e75702f
+
+ipa-const-prop-2019-10-03-generate-ipa-on-para-ref.patch:
+commit 4307a485c39fd1c317d6cead2707a903052c4753
+
+ipa-const-prop-2019-10-05-inline-size-para-change.patch:
+commit 6c291ad828fcb5f01a1d2cb23f6078e9a6f958b9
+
+ipa-const-prop-2019-10-10-bugfix-20040708-split-splay-tree.patch:
+commit 6488759f404f3aff6642b005242a9c82a1c2cee2
+
+ipa-const-prop-2019-10-23-bugfix-20040708-fix-uid-func.patch:
+commit b5b6485f1cc54f21713b5b03c5d63d56839ca458
+
+ipa-const-prop-2019-10-23-bugfix-20040708-fix-uid-func-2nd.patch:
+commit 45012be1f5c7e6039e594bab41ebb94d89a9aca0
+
+ipa-const-prop-2019-10-24-toggle-static-write.patch:
+commit abebffc609506176f8ba3f64533e15ece49446c0
+
+ipa-const-prop-2019-10-25-bugfix-empty-edge-ICE.patch:
+commit 5a0236f8ca9d239bb62ef54c9273e6ca3f068f87
+
+ipa-const-prop-2019-10-25-call-size-summary.patch:
+ipa-const-prop-2019-10-25-call-size-summary-confict-fix.patch
+commit f658ad3002a0afc8aa86d5646ee704921d969ebe
+
+ipa-const-prop-2019-10-27-bugfix-solve-LTO-ICE.patch:
+commit b1e655646f5b0be3d146825c130690078a8601c3
+
+ipa-const-prop-2019-10-27-do-not-move-jump.patch:
+commit 051d8a5faa3b37b0dda84c8382174ee70d5b7992
+
+ipa-const-prop-2019-10-27-drop-if-no-arg.patch:
+commit a33c028eb38268b5084ebc4cc17a1cb64b3a838b
+
+ipa-const-prop-2019-10-27-update-sum-after-expand.patch:
+commit a088d7b10f296dbd57bccbac1bfcf8abb207b034
+
+ipa-const-prop-2019-10-30-remove-global.patch:
+commit a62bfab5d2a332925fcf10c45b4c5d8ca499439d
+
+ipa-const-prop-2019-11-03-add-deplicate-form.patch:
+commit ac6f2e594886e2209446114023ecdff96b0bd7c4
+
+ipa-const-prop-2019-11-03-ipa-inline-analysis-conflict-fix.patch:
+ipa-const-prop-2019-11-03-improve-efficiency-of-ipa-poly.patch:
+commit 40a777e840f74dd5c19ea26c55d1248a335fd11b
+
+ipa-const-prop-2019-11-03-ipa-fnsummary-add-call-context.patch:
+commit 1532500ecbe8dbf59bef498e46b447b3a6b0fa65
+
+ipa-const-prop-2019-11-03-size-ahead-time.patch:
+commit 360386c7ef1c3fa30de216b1d68ed6a27296fd80
+
+ipa-const-prop-2019-11-04-ipa-inline-includes-ipa-utils.patch:
+commit 2bc2379be5c98d34ecbb347b2abf059aa6d94499
+
+ipa-const-prop-2019-11-09-add-ipacp-clone.patch:
+commit 6cf67b62c8cda035dccaca2ae6ff94d560b37a6f
+
+ipa-const-prop-2019-11-09-call-nodeRef-on-func-sym.patch:
+commit 2ee6e04aaecc856bced29711f9765660e0888994
+
+ipa-const-prop-2019-11-13-bugfix-inline-check-before-flatten.patch:
+commit 2895b172d56c355373b64517a3298a01a2f10ec0
+
+ipa-const-prop-2019-11-13-bugfix-inline-empty-edge.patch:
+commit 367c959f0303e11e0a6d875abba7d03c72686668
+
+ipa-const-prop-2019-11-13-bugfix-inline-small-function.patch:
+commit b914768c1968d924d77bbe3f4e707c6105f3682c
+
+ipa-const-prop-2019-11-13-bugfix-lto-ICE.patch:
+commit d200a49f5c83fa0f2e7332aecf69b6ab4a51b052
+
+ipa-const-prop-2019-11-13-fix-ipa-profile-indirect-call.patch:
+commit 7b34a284cab5d533552c1df995a88f7167d243bd
+
+ipa-const-prop-2019-11-14-by-ref-const-prop.patch:
+ipa-const-prop-2019-11-14-by-ref-const-prop-conflict-fix.patch
+commit eb270950acbae6f70e3487a6e63a26c1294656b3
+
+ipa-const-prop-2019-11-15-bugfix-segfault-with-null-top.patch:
+commit 1c3c3f455021130c429f57b09ef39bc218bd7fff
+
+ipa-const-prop-2019-11-18-bugfix-ICE-null-edge.patch:
+commit 8d890d37e0183735586c18f1f056deb5848617ca
+
+ipa-const-prop-2019-11-18-bug-fix-ICE.patch:
+commit 8d890d37e0183735586c18f1f056deb5848617ca
+
+ipa-const-prop-2019-12-02-recusion-versioning.patch:
+ipa-const-prop-2019-12-02-param-conflict-fix.patch
+commit 9b14fc3326e087975653b1af8ac54114041cde51
+
+The original of these commit can be found on
+   https://github.com/gcc-mirror/gcc 
+
+Not all these commits are applied directly. If the commit node contains 
+code that affact other modules that unrelated to ipa constant propgation 
+optimization, the part that the optimization need is regrouped into 
+a small new patch, which usually named conflict-fix.
+
+diff -Nurp a/gcc/cgraphbuild.c b/gcc/cgraphbuild.c
+--- a/gcc/cgraphbuild.c	2020-04-30 15:14:04.580000000 +0800
++++ b/gcc/cgraphbuild.c	2020-04-30 15:14:56.584000000 +0800
+@@ -428,7 +428,7 @@ cgraph_edge::rebuild_edges (void)
+ 	node->record_stmt_references (gsi_stmt (gsi));
+     }
+   record_eh_tables (node, cfun);
+-  gcc_assert (!node->global.inlined_to);
++  gcc_assert (!node->inlined_to);
+   return 0;
+ }
+ 
+diff -Nurp a/gcc/cgraph.c b/gcc/cgraph.c
+--- a/gcc/cgraph.c	2020-04-30 15:14:04.576000000 +0800
++++ b/gcc/cgraph.c	2020-04-30 15:14:56.584000000 +0800
+@@ -539,7 +539,7 @@ cgraph_node::get_create (tree decl)
+ {
+   cgraph_node *first_clone = cgraph_node::get (decl);
+ 
+-  if (first_clone && !first_clone->global.inlined_to)
++  if (first_clone && !first_clone->inlined_to)
+     return first_clone;
+ 
+   cgraph_node *node = cgraph_node::create (decl);
+@@ -659,7 +659,7 @@ cgraph_node::get_for_asmname (tree asmna
+        node = node->next_sharing_asm_name)
+     {
+       cgraph_node *cn = dyn_cast <cgraph_node *> (node);
+-      if (cn && !cn->global.inlined_to)
++      if (cn && !cn->inlined_to)
+ 	return cn;
+     }
+   return NULL;
+@@ -1857,7 +1857,7 @@ cgraph_node::remove (void)
+     {
+       cgraph_node *n = cgraph_node::get (decl);
+       if (!n
+-	  || (!n->clones && !n->clone_of && !n->global.inlined_to
++	  || (!n->clones && !n->clone_of && !n->inlined_to
+ 	      && ((symtab->global_info_ready || in_lto_p)
+ 		  && (TREE_ASM_WRITTEN (n->decl)
+ 		      || DECL_EXTERNAL (n->decl)
+@@ -1888,7 +1888,7 @@ cgraph_node::mark_address_taken (void)
+ {
+   /* Indirect inlining can figure out that all uses of the address are
+      inlined.  */
+-  if (global.inlined_to)
++  if (inlined_to)
+     {
+       gcc_assert (cfun->after_inlining);
+       gcc_assert (callers->indirect_inlining_edge);
+@@ -2012,10 +2012,10 @@ cgraph_node::dump (FILE *f)
+ 
+   dump_base (f);
+ 
+-  if (global.inlined_to)
++  if (inlined_to)
+     fprintf (f, "  Function %s is inline copy in %s\n",
+ 	     dump_name (),
+-	     global.inlined_to->dump_name ());
++	     inlined_to->dump_name ());
+   if (clone_of)
+     fprintf (f, "  Clone of %s\n", clone_of->dump_asm_name ());
+   if (symtab->function_flags_ready)
+@@ -2159,7 +2159,7 @@ cgraph_node::dump (FILE *f)
+ 	if (dyn_cast <cgraph_node *> (ref->referring)->count.initialized_p ())
+ 	  sum += dyn_cast <cgraph_node *> (ref->referring)->count.ipa ();
+   
+-      if (global.inlined_to
++      if (inlined_to
+ 	  || (symtab->state < EXPANSION
+ 	      && ultimate_alias_target () == this && only_called_directly_p ()))
+ 	ok = !count.ipa ().differs_from_p (sum);
+@@ -2259,14 +2259,14 @@ cgraph_node::get_availability (symtab_no
+     {
+       cgraph_node *cref = dyn_cast <cgraph_node *> (ref);
+       if (cref)
+-	ref = cref->global.inlined_to;
++	ref = cref->inlined_to;
+     }
+   enum availability avail;
+   if (!analyzed)
+     avail = AVAIL_NOT_AVAILABLE;
+   else if (local.local)
+     avail = AVAIL_LOCAL;
+-  else if (global.inlined_to)
++  else if (inlined_to)
+     avail = AVAIL_AVAILABLE;
+   else if (transparent_alias)
+     ultimate_alias_target (&avail, ref);
+@@ -2878,7 +2878,7 @@ bool
+ cgraph_node::will_be_removed_from_program_if_no_direct_calls_p
+ 	 (bool will_inline)
+ {
+-  gcc_assert (!global.inlined_to);
++  gcc_assert (!inlined_to);
+   if (DECL_EXTERNAL (decl))
+     return true;
+ 
+@@ -3065,7 +3065,7 @@ cgraph_edge::verify_corresponds_to_fndec
+ {
+   cgraph_node *node;
+ 
+-  if (!decl || callee->global.inlined_to)
++  if (!decl || callee->inlined_to)
+     return false;
+   if (symtab->state == LTO_STREAMING)
+     return false;
+@@ -3126,7 +3126,7 @@ cgraph_node::verify_node (void)
+       error ("cgraph count invalid");
+       error_found = true;
+     }
+-  if (global.inlined_to && same_comdat_group)
++  if (inlined_to && same_comdat_group)
+     {
+       error ("inline clone in same comdat group list");
+       error_found = true;
+@@ -3136,17 +3136,17 @@ cgraph_node::verify_node (void)
+       error ("local symbols must be defined");
+       error_found = true;
+     }
+-  if (global.inlined_to && externally_visible)
++  if (inlined_to && externally_visible)
+     {
+       error ("externally visible inline clone");
+       error_found = true;
+     }
+-  if (global.inlined_to && address_taken)
++  if (inlined_to && address_taken)
+     {
+       error ("inline clone with address taken");
+       error_found = true;
+     }
+-  if (global.inlined_to && force_output)
++  if (inlined_to && force_output)
+     {
+       error ("inline clone is forced to output");
+       error_found = true;
+@@ -3183,9 +3183,9 @@ cgraph_node::verify_node (void)
+ 	}
+       if (!e->inline_failed)
+ 	{
+-	  if (global.inlined_to
+-	      != (e->caller->global.inlined_to
+-		  ? e->caller->global.inlined_to : e->caller))
++	  if (inlined_to
++	      != (e->caller->inlined_to
++		  ? e->caller->inlined_to : e->caller))
+ 	    {
+ 	      error ("inlined_to pointer is wrong");
+ 	      error_found = true;
+@@ -3197,7 +3197,7 @@ cgraph_node::verify_node (void)
+ 	    }
+ 	}
+       else
+-	if (global.inlined_to)
++	if (inlined_to)
+ 	  {
+ 	    error ("inlined_to pointer set for noninline callers");
+ 	    error_found = true;
+@@ -3208,7 +3208,7 @@ cgraph_node::verify_node (void)
+       if (e->verify_count ())
+ 	error_found = true;
+       if (gimple_has_body_p (e->caller->decl)
+-	  && !e->caller->global.inlined_to
++	  && !e->caller->inlined_to
+ 	  && !e->speculative
+ 	  /* Optimized out calls are redirected to __builtin_unreachable.  */
+ 	  && (e->count.nonzero_p ()
+@@ -3233,7 +3233,7 @@ cgraph_node::verify_node (void)
+       if (e->verify_count ())
+ 	error_found = true;
+       if (gimple_has_body_p (e->caller->decl)
+-	  && !e->caller->global.inlined_to
++	  && !e->caller->inlined_to
+ 	  && !e->speculative
+ 	  && e->count.ipa_p ()
+ 	  && count
+@@ -3250,12 +3250,12 @@ cgraph_node::verify_node (void)
+ 	  error_found = true;
+ 	}
+     }
+-  if (!callers && global.inlined_to)
++  if (!callers && inlined_to)
+     {
+       error ("inlined_to pointer is set but no predecessors found");
+       error_found = true;
+     }
+-  if (global.inlined_to == this)
++  if (inlined_to == this)
+     {
+       error ("inlined_to pointer refers to itself");
+       error_found = true;
+@@ -3344,7 +3344,7 @@ cgraph_node::verify_node (void)
+ 	  error ("More than one edge out of thunk node");
+           error_found = true;
+ 	}
+-      if (gimple_has_body_p (decl) && !global.inlined_to)
++      if (gimple_has_body_p (decl) && !inlined_to)
+         {
+ 	  error ("Thunk is not supposed to have body");
+           error_found = true;
+@@ -3352,7 +3352,7 @@ cgraph_node::verify_node (void)
+     }
+   else if (analyzed && gimple_has_body_p (decl)
+ 	   && !TREE_ASM_WRITTEN (decl)
+-	   && (!DECL_EXTERNAL (decl) || global.inlined_to)
++	   && (!DECL_EXTERNAL (decl) || inlined_to)
+ 	   && !flag_wpa)
+     {
+       if (this_cfun->cfg)
+@@ -3623,7 +3623,7 @@ cgraph_node::get_body (void)
+      early.
+      TODO: Materializing clones here will likely lead to smaller LTRANS
+      footprint. */
+-  gcc_assert (!global.inlined_to && !clone_of);
++  gcc_assert (!inlined_to && !clone_of);
+   if (ipa_transforms_to_apply.exists ())
+     {
+       opt_pass *saved_current_pass = current_pass;
+@@ -3813,8 +3813,8 @@ cgraph_node::has_thunk_p (cgraph_node *n
+ sreal
+ cgraph_edge::sreal_frequency ()
+ {
+-  return count.to_sreal_scale (caller->global.inlined_to
+-			       ? caller->global.inlined_to->count
++  return count.to_sreal_scale (caller->inlined_to
++			       ? caller->inlined_to->count
+ 			       : caller->count);
+ }
+ 
+diff -Nurp a/gcc/cgraphclones.c b/gcc/cgraphclones.c
+--- a/gcc/cgraphclones.c	2020-04-30 15:14:04.644000000 +0800
++++ b/gcc/cgraphclones.c	2020-04-30 15:14:56.628000000 +0800
+@@ -458,8 +458,7 @@ cgraph_node::create_clone (tree new_decl
+   new_node->externally_visible = false;
+   new_node->no_reorder = no_reorder;
+   new_node->local.local = true;
+-  new_node->global = global;
+-  new_node->global.inlined_to = new_inlined_to;
++  new_node->inlined_to = new_inlined_to;
+   new_node->rtl = rtl;
+   new_node->frequency = frequency;
+   new_node->tp_first_run = tp_first_run;
+@@ -671,6 +670,7 @@ cgraph_node::create_virtual_clone (vec<c
+      ??? We cannot use COMDAT linkage because there is no
+      ABI support for this.  */
+   set_new_clone_decl_and_node_flags (new_node);
++  new_node->ipcp_clone = ipcp_clone;
+   new_node->clone.tree_map = tree_map;
+   if (!implicit_section)
+     new_node->set_section (get_section ());
+@@ -965,7 +965,7 @@ cgraph_node::create_version_clone (tree
+    new_version->externally_visible = false;
+    new_version->no_reorder = no_reorder;
+    new_version->local.local = new_version->definition;
+-   new_version->global = global;
++   new_version->inlined_to = inlined_to;
+    new_version->rtl = rtl;
+    new_version->count = count;
+ 
+diff -Nurp a/gcc/cgraph.h b/gcc/cgraph.h
+--- a/gcc/cgraph.h	2020-04-30 15:14:04.624000000 +0800
++++ b/gcc/cgraph.h	2020-04-30 15:14:56.628000000 +0800
+@@ -718,15 +718,6 @@ struct GTY(()) cgraph_local_info {
+   unsigned tm_may_enter_irr : 1;
+ };
+ 
+-/* Information about the function that needs to be computed globally
+-   once compilation is finished.  Available only with -funit-at-a-time.  */
+-
+-struct GTY(()) cgraph_global_info {
+-  /* For inline clones this points to the function they will be
+-     inlined into.  */
+-  cgraph_node *inlined_to;
+-};
+-
+ /* Represent which DECL tree (or reference to such tree)
+    will be replaced by another tree while versioning.  */
+ struct GTY(()) ipa_replace_map
+@@ -959,7 +950,7 @@ public:
+ 
+      If the new node is being inlined into another one, NEW_INLINED_TO should be
+      the outline function the new one is (even indirectly) inlined to.
+-     All hooks will see this in node's global.inlined_to, when invoked.
++     All hooks will see this in node's inlined_to, when invoked.
+      Can be NULL if the node is not inlined.  SUFFIX is string that is appended
+      to the original name.  */
+   cgraph_node *create_clone (tree decl, profile_count count,
+@@ -1420,7 +1411,11 @@ public:
+   vec<ipa_opt_pass> GTY((skip)) ipa_transforms_to_apply;
+ 
+   cgraph_local_info local;
+-  cgraph_global_info global;
++
++  /* For inline clones this points to the function they will be
++     inlined into.  */
++  cgraph_node *inlined_to;
++
+   struct cgraph_rtl_info *rtl;
+   cgraph_clone_info clone;
+   cgraph_thunk_info thunk;
+@@ -1474,6 +1469,8 @@ public:
+   unsigned split_part : 1;
+   /* True if the function appears as possible target of indirect call.  */
+   unsigned indirect_call_target : 1;
++  /* True if this was a clone created by ipa-cp.  */
++  unsigned ipcp_clone : 1;
+ 
+ private:
+   /* Unique id of the node.  */
+@@ -2474,7 +2471,7 @@ symtab_node::real_symbol_p (void)
+   if (!is_a <cgraph_node *> (this))
+     return true;
+   cnode = dyn_cast <cgraph_node *> (this);
+-  if (cnode->global.inlined_to)
++  if (cnode->inlined_to)
+     return false;
+   return true;
+ }
+@@ -2497,13 +2494,13 @@ symtab_node::in_same_comdat_group_p (sym
+ 
+   if (cgraph_node *cn = dyn_cast <cgraph_node *> (target))
+     {
+-      if (cn->global.inlined_to)
+-	source = cn->global.inlined_to;
++      if (cn->inlined_to)
++	source = cn->inlined_to;
+     }
+   if (cgraph_node *cn = dyn_cast <cgraph_node *> (target))
+     {
+-      if (cn->global.inlined_to)
+-	target = cn->global.inlined_to;
++      if (cn->inlined_to)
++	target = cn->inlined_to;
+     }
+ 
+   return source->get_comdat_group () == target->get_comdat_group ();
+@@ -2964,7 +2961,7 @@ struct GTY((for_user)) constant_descript
+ inline bool
+ cgraph_node::only_called_directly_or_aliased_p (void)
+ {
+-  gcc_assert (!global.inlined_to);
++  gcc_assert (!inlined_to);
+   return (!force_output && !address_taken
+ 	  && !ifunc_resolver
+ 	  && !used_from_other_partition
+@@ -2981,7 +2978,7 @@ cgraph_node::only_called_directly_or_ali
+ inline bool
+ cgraph_node::can_remove_if_no_direct_calls_and_refs_p (void)
+ {
+-  gcc_checking_assert (!global.inlined_to);
++  gcc_checking_assert (!inlined_to);
+   /* Extern inlines can always go, we will use the external definition.  */
+   if (DECL_EXTERNAL (decl))
+     return true;
+@@ -3152,8 +3149,8 @@ inline bool
+ cgraph_edge::recursive_p (void)
+ {
+   cgraph_node *c = callee->ultimate_alias_target ();
+-  if (caller->global.inlined_to)
+-    return caller->global.inlined_to->decl == c->decl;
++  if (caller->inlined_to)
++    return caller->inlined_to->decl == c->decl;
+   else
+     return caller->decl == c->decl;
+ }
+@@ -3190,8 +3187,8 @@ cgraph_edge::binds_to_current_def_p ()
+ inline int
+ cgraph_edge::frequency ()
+ {
+-  return count.to_cgraph_frequency (caller->global.inlined_to
+-				    ? caller->global.inlined_to->count
++  return count.to_cgraph_frequency (caller->inlined_to
++				    ? caller->inlined_to->count
+ 				    : caller->count);
+ }
+ 
+@@ -3213,7 +3210,7 @@ inline void
+ cgraph_node::mark_force_output (void)
+ {
+   force_output = 1;
+-  gcc_checking_assert (!global.inlined_to);
++  gcc_checking_assert (!inlined_to);
+ }
+ 
+ /* Return true if function should be optimized for size.  */
+diff -Nurp a/gcc/cgraphunit.c b/gcc/cgraphunit.c
+--- a/gcc/cgraphunit.c	2020-04-30 15:14:04.592000000 +0800
++++ b/gcc/cgraphunit.c	2020-04-30 15:14:56.584000000 +0800
+@@ -340,7 +340,10 @@ symbol_table::process_new_functions (voi
+ 		 and splitting.  This is redundant for functions added late.
+ 		 Just throw away whatever it did.  */
+ 	      if (!summaried_computed)
+-		ipa_free_fn_summary ();
++		{
++		  ipa_free_fn_summary ();
++		  ipa_free_size_summary ();
++		}
+ 	    }
+ 	  else if (ipa_fn_summaries != NULL)
+ 	    compute_fn_summary (node, true);
+@@ -389,7 +392,7 @@ cgraph_node::reset (void)
+ 
+   /* Reset our data structures so we can analyze the function again.  */
+   memset (&local, 0, sizeof (local));
+-  memset (&global, 0, sizeof (global));
++  inlined_to = NULL;
+   memset (&rtl, 0, sizeof (rtl));
+   analyzed = false;
+   definition = false;
+@@ -1504,7 +1507,7 @@ mark_functions_to_output (void)
+       if (node->analyzed
+ 	  && !node->thunk.thunk_p
+ 	  && !node->alias
+-	  && !node->global.inlined_to
++	  && !node->inlined_to
+ 	  && !TREE_ASM_WRITTEN (decl)
+ 	  && !DECL_EXTERNAL (decl))
+ 	{
+@@ -1529,7 +1532,7 @@ mark_functions_to_output (void)
+ 	{
+ 	  /* We should've reclaimed all functions that are not needed.  */
+ 	  if (flag_checking
+-	      && !node->global.inlined_to
++	      && !node->inlined_to
+ 	      && gimple_has_body_p (decl)
+ 	      /* FIXME: in ltrans unit when offline copy is outside partition but inline copies
+ 		 are inside partition, we can end up not removing the body since we no longer
+@@ -1542,7 +1545,7 @@ mark_functions_to_output (void)
+ 	      node->debug ();
+ 	      internal_error ("failed to reclaim unneeded function");
+ 	    }
+-	  gcc_assert (node->global.inlined_to
++	  gcc_assert (node->inlined_to
+ 		      || !gimple_has_body_p (decl)
+ 		      || node->in_other_partition
+ 		      || node->clones
+@@ -1557,7 +1560,7 @@ mark_functions_to_output (void)
+       if (node->same_comdat_group && !node->process)
+ 	{
+ 	  tree decl = node->decl;
+-	  if (!node->global.inlined_to
++	  if (!node->inlined_to
+ 	      && gimple_has_body_p (decl)
+ 	      /* FIXME: in an ltrans unit when the offline copy is outside a
+ 		 partition but inline copies are inside a partition, we can
+@@ -2118,7 +2121,7 @@ cgraph_node::assemble_thunks_and_aliases
+ 
+   for (e = callers; e;)
+     if (e->caller->thunk.thunk_p
+-	&& !e->caller->global.inlined_to)
++	&& !e->caller->inlined_to)
+       {
+ 	cgraph_node *thunk = e->caller;
+ 
+@@ -2155,7 +2158,7 @@ cgraph_node::expand (void)
+   location_t saved_loc;
+ 
+   /* We ought to not compile any inline clones.  */
+-  gcc_assert (!global.inlined_to);
++  gcc_assert (!inlined_to);
+ 
+   /* __RTL functions are compiled as soon as they are parsed, so don't
+      do it again.  */
+@@ -2707,7 +2710,7 @@ symbol_table::compile (void)
+       bool error_found = false;
+ 
+       FOR_EACH_DEFINED_FUNCTION (node)
+-	if (node->global.inlined_to
++	if (node->inlined_to
+ 	    || gimple_has_body_p (node->decl))
+ 	  {
+ 	    error_found = true;
+diff -Nurp a/gcc/data-streamer.h b/gcc/data-streamer.h
+--- a/gcc/data-streamer.h	2020-04-30 15:14:04.648000000 +0800
++++ b/gcc/data-streamer.h	2020-04-30 15:14:56.504000000 +0800
+@@ -53,6 +53,7 @@ HOST_WIDE_INT bp_unpack_var_len_int (str
+ void streamer_write_zero (struct output_block *);
+ void streamer_write_uhwi (struct output_block *, unsigned HOST_WIDE_INT);
+ void streamer_write_hwi (struct output_block *, HOST_WIDE_INT);
++void streamer_write_poly_uint64 (struct output_block *, poly_uint64);
+ void streamer_write_gcov_count (struct output_block *, gcov_type);
+ void streamer_write_string (struct output_block *, struct lto_output_stream *,
+ 			    const char *, bool);
+@@ -82,6 +83,7 @@ const char *bp_unpack_indexed_string (st
+ const char *bp_unpack_string (struct data_in *, struct bitpack_d *);
+ unsigned HOST_WIDE_INT streamer_read_uhwi (struct lto_input_block *);
+ HOST_WIDE_INT streamer_read_hwi (struct lto_input_block *);
++poly_uint64 streamer_read_poly_uint64 (struct lto_input_block *);
+ gcov_type streamer_read_gcov_count (struct lto_input_block *);
+ wide_int streamer_read_wide_int (struct lto_input_block *);
+ widest_int streamer_read_widest_int (struct lto_input_block *);
+diff -Nurp a/gcc/data-streamer-in.c b/gcc/data-streamer-in.c
+--- a/gcc/data-streamer-in.c	2020-04-30 15:14:04.628000000 +0800
++++ b/gcc/data-streamer-in.c	2020-04-30 15:14:56.504000000 +0800
+@@ -175,6 +175,17 @@ streamer_read_hwi (struct lto_input_bloc
+     }
+ }
+ 
++/* Read a poly_uint64 from IB.  */
++
++poly_uint64
++streamer_read_poly_uint64 (class lto_input_block *ib)
++{
++  poly_uint64 res;
++  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
++    res.coeffs[i] = streamer_read_uhwi (ib);
++  return res;
++}
++
+ /* Read gcov_type value from IB.  */
+ 
+ gcov_type
+diff -Nurp a/gcc/data-streamer-out.c b/gcc/data-streamer-out.c
+--- a/gcc/data-streamer-out.c	2020-04-30 15:14:04.600000000 +0800
++++ b/gcc/data-streamer-out.c	2020-04-30 15:14:56.504000000 +0800
+@@ -220,6 +220,15 @@ streamer_write_hwi (struct output_block
+   streamer_write_hwi_stream (ob->main_stream, work);
+ }
+ 
++/* Write a poly_uint64 value WORK to OB->main_stream.  */
++
++void
++streamer_write_poly_uint64 (struct output_block *ob, poly_uint64 work)
++{
++  for (int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
++    streamer_write_uhwi_stream (ob->main_stream, work.coeffs[i]);
++}
++
+ /* Write a gcov counter value WORK to OB->main_stream.  */
+ 
+ void
+diff -Nurp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+--- a/gcc/doc/invoke.texi	2020-04-30 15:14:04.664000000 +0800
++++ b/gcc/doc/invoke.texi	2020-04-30 15:14:56.692000000 +0800
+@@ -11836,6 +11836,13 @@ IPA-CP calculates its own score of cloni
+ and performs those cloning opportunities with scores that exceed
+ @option{ipa-cp-eval-threshold}.
+ 
++@item ipa-cp-max-recursive-depth
++Maximum depth of recursive cloning for self-recursive function.
++
++@item ipa-cp-min-recursive-probability
++Recursive cloning only when the probability of call being executed exceeds
++the parameter.
++
+ @item ipa-cp-recursion-penalty
+ Percentage penalty the recursive functions will receive when they
+ are evaluated for cloning.
+diff -Nurp a/gcc/gimple-fold.c b/gcc/gimple-fold.c
+--- a/gcc/gimple-fold.c	2020-04-30 15:14:04.632000000 +0800
++++ b/gcc/gimple-fold.c	2020-04-30 15:14:56.584000000 +0800
+@@ -135,7 +135,7 @@ can_refer_decl_in_current_unit_p (tree d
+       if (!snode || !snode->definition)
+ 	return false;
+       node = dyn_cast <cgraph_node *> (snode);
+-      return !node || !node->global.inlined_to;
++      return !node || !node->inlined_to;
+     }
+ 
+   /* We will later output the initializer, so we can refer to it.
+@@ -184,7 +184,7 @@ can_refer_decl_in_current_unit_p (tree d
+ 	      || (!snode->forced_by_abi && !snode->force_output))))
+     return false;
+   node = dyn_cast <cgraph_node *> (snode);
+-  return !node || !node->global.inlined_to;
++  return !node || !node->inlined_to;
+ }
+ 
+ /* Create a temporary for TYPE for a statement STMT.  If the current function
+diff -Nurp a/gcc/ipa.c b/gcc/ipa.c
+--- a/gcc/ipa.c	2020-04-30 15:14:04.636000000 +0800
++++ b/gcc/ipa.c	2020-04-30 15:14:56.588000000 +0800
+@@ -71,9 +71,9 @@ update_inlined_to_pointer (struct cgraph
+ {
+   struct cgraph_edge *e;
+   for (e = node->callees; e; e = e->next_callee)
+-    if (e->callee->global.inlined_to)
++    if (e->callee->inlined_to)
+       {
+-        e->callee->global.inlined_to = inlined_to;
++	e->callee->inlined_to = inlined_to;
+ 	update_inlined_to_pointer (e->callee, inlined_to);
+       }
+ }
+@@ -335,11 +335,11 @@ symbol_table::remove_unreachable_nodes (
+       node->used_as_abstract_origin = false;
+       node->indirect_call_target = false;
+       if (node->definition
+-	  && !node->global.inlined_to
++	  && !node->inlined_to
+ 	  && !node->in_other_partition
+ 	  && !node->can_remove_if_no_direct_calls_and_refs_p ())
+ 	{
+-	  gcc_assert (!node->global.inlined_to);
++	  gcc_assert (!node->inlined_to);
+ 	  reachable.add (node);
+ 	  enqueue_node (node, &first, &reachable);
+ 	}
+@@ -451,7 +451,7 @@ symbol_table::remove_unreachable_nodes (
+ 
+ 	      /* When inline clone exists, mark body to be preserved so when removing
+ 		 offline copy of the function we don't kill it.  */
+-	      if (cnode->global.inlined_to)
++	      if (cnode->inlined_to)
+ 	        body_needed_for_clonning.add (cnode->decl);
+ 
+ 	      /* For non-inline clones, force their origins to the boundary and ensure
+@@ -560,11 +560,11 @@ symbol_table::remove_unreachable_nodes (
+      to turn it into normal cone.  */
+   FOR_EACH_FUNCTION (node)
+     {
+-      if (node->global.inlined_to
++      if (node->inlined_to
+ 	  && !node->callers)
+ 	{
+ 	  gcc_assert (node->clones);
+-	  node->global.inlined_to = NULL;
++	  node->inlined_to = NULL;
+ 	  update_inlined_to_pointer (node, node);
+ 	}
+       node->aux = NULL;
+@@ -1207,8 +1207,8 @@ propagate_single_user (varpool_node *vno
+       struct cgraph_node *cnode = dyn_cast <cgraph_node *> (ref->referring);
+       if (cnode)
+ 	{
+-	  if (cnode->global.inlined_to)
+-	    cnode = cnode->global.inlined_to;
++	  if (cnode->inlined_to)
++	    cnode = cnode->inlined_to;
+ 	  if (!function)
+ 	    function = cnode;
+ 	  else if (function != cnode)
+diff -Nurp a/gcc/ipa-comdats.c b/gcc/ipa-comdats.c
+--- a/gcc/ipa-comdats.c	2020-04-30 15:14:04.612000000 +0800
++++ b/gcc/ipa-comdats.c	2020-04-30 15:14:56.584000000 +0800
+@@ -98,8 +98,8 @@ propagate_comdat_group (struct symtab_no
+ 
+       if (cgraph_node * cn = dyn_cast <cgraph_node *> (symbol2))
+ 	{
+-	  if (cn->global.inlined_to)
+-	    symbol2 = cn->global.inlined_to;
++	  if (cn->inlined_to)
++	    symbol2 = cn->inlined_to;
+ 	}
+ 
+       /* The actual merge operation.  */
+@@ -133,8 +133,8 @@ propagate_comdat_group (struct symtab_no
+ 	    /* If we see inline clone, its comdat group actually
+ 	       corresponds to the comdat group of the function it
+ 	       is inlined to.  */
+-	    if (cn->global.inlined_to)
+-	      symbol2 = cn->global.inlined_to;
++	    if (cn->inlined_to)
++	      symbol2 = cn->inlined_to;
+ 	  }
+ 
+         /* The actual merge operation.  */
+diff -Nurp a/gcc/ipa-cp.c b/gcc/ipa-cp.c
+--- a/gcc/ipa-cp.c	2020-04-30 15:14:04.592000000 +0800
++++ b/gcc/ipa-cp.c	2020-04-30 15:14:56.700000000 +0800
+@@ -229,7 +229,9 @@ public:
+   inline bool set_contains_variable ();
+   bool add_value (valtype newval, cgraph_edge *cs,
+ 		  ipcp_value<valtype> *src_val = NULL,
+-		  int src_idx = 0, HOST_WIDE_INT offset = -1);
++		  int src_idx = 0, HOST_WIDE_INT offset = -1,
++		  ipcp_value<valtype> **val_p = NULL,
++		  bool unlimited = false);
+   void print (FILE * f, bool dump_sources, bool dump_benefits);
+ };
+ 
+@@ -381,8 +383,8 @@ static hash_map<const char *, unsigned>
+ 
+ /* Return the param lattices structure corresponding to the Ith formal
+    parameter of the function described by INFO.  */
+-static inline struct ipcp_param_lattices *
+-ipa_get_parm_lattices (struct ipa_node_params *info, int i)
++static inline class ipcp_param_lattices *
++ipa_get_parm_lattices (class ipa_node_params *info, int i)
+ {
+   gcc_assert (i >= 0 && i < ipa_get_param_count (info));
+   gcc_checking_assert (!info->ipcp_orig_node);
+@@ -393,18 +395,18 @@ ipa_get_parm_lattices (struct ipa_node_p
+ /* Return the lattice corresponding to the scalar value of the Ith formal
+    parameter of the function described by INFO.  */
+ static inline ipcp_lattice<tree> *
+-ipa_get_scalar_lat (struct ipa_node_params *info, int i)
++ipa_get_scalar_lat (class ipa_node_params *info, int i)
+ {
+-  struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++  class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+   return &plats->itself;
+ }
+ 
+ /* Return the lattice corresponding to the scalar value of the Ith formal
+    parameter of the function described by INFO.  */
+ static inline ipcp_lattice<ipa_polymorphic_call_context> *
+-ipa_get_poly_ctx_lat (struct ipa_node_params *info, int i)
++ipa_get_poly_ctx_lat (class ipa_node_params *info, int i)
+ {
+-  struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++  class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+   return &plats->ctxlat;
+ }
+ 
+@@ -539,7 +541,7 @@ print_all_lattices (FILE * f, bool dump_
+   fprintf (f, "\nLattices:\n");
+   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+     {
+-      struct ipa_node_params *info;
++      class ipa_node_params *info;
+ 
+       info = IPA_NODE_REF (node);
+       /* Skip constprop clones since we don't make lattices for them.  */
+@@ -550,7 +552,7 @@ print_all_lattices (FILE * f, bool dump_
+       for (i = 0; i < count; i++)
+ 	{
+ 	  struct ipcp_agg_lattice *aglat;
+-	  struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++	  class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+ 	  fprintf (f, "    param [%d]: ", i);
+ 	  plats->itself.print (f, dump_sources, dump_benefits);
+ 	  fprintf (f, "         ctxs: ");
+@@ -585,7 +587,7 @@ print_all_lattices (FILE * f, bool dump_
+ 
+ static void
+ determine_versionability (struct cgraph_node *node,
+-			  struct ipa_node_params *info)
++			  class ipa_node_params *info)
+ {
+   const char *reason = NULL;
+ 
+@@ -656,7 +658,7 @@ determine_versionability (struct cgraph_
+ static bool
+ ipcp_versionable_function_p (struct cgraph_node *node)
+ {
+-  return IPA_NODE_REF (node)->versionable;
++  return IPA_NODE_REF (node) && IPA_NODE_REF (node)->versionable;
+ }
+ 
+ /* Structure holding accumulated information about callers of a node.  */
+@@ -731,7 +733,7 @@ ipcp_cloning_candidate_p (struct cgraph_
+   init_caller_stats (&stats);
+   node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats, false);
+ 
+-  if (ipa_fn_summaries->get (node)->self_size < stats.n_calls)
++  if (ipa_size_summaries->get (node)->self_size < stats.n_calls)
+     {
+       if (dump_file)
+ 	fprintf (dump_file, "Considering %s for cloning; code might shrink.\n",
+@@ -806,23 +808,39 @@ public:
+   {}
+ };
+ 
++/* Skip edges from and to nodes without ipa_cp enabled.
++   Ignore not available symbols.  */
++
++static bool
++ignore_edge_p (cgraph_edge *e)
++{
++  enum availability avail;
++  cgraph_node *ultimate_target
++    = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
++
++  return (avail <= AVAIL_INTERPOSABLE
++	  || !opt_for_fn (e->caller->decl, flag_ipa_cp)
++	  || !opt_for_fn (ultimate_target->decl, flag_ipa_cp));
++}
++
+ /* Allocate the arrays in TOPO and topologically sort the nodes into order.  */
+ 
+ static void
+-build_toporder_info (struct ipa_topo_info *topo)
++build_toporder_info (class ipa_topo_info *topo)
+ {
+   topo->order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
+   topo->stack = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
+ 
+   gcc_checking_assert (topo->stack_top == 0);
+-  topo->nnodes = ipa_reduced_postorder (topo->order, true, NULL);
++  topo->nnodes = ipa_reduced_postorder (topo->order, true,
++					ignore_edge_p);
+ }
+ 
+ /* Free information about strongly connected components and the arrays in
+    TOPO.  */
+ 
+ static void
+-free_toporder_info (struct ipa_topo_info *topo)
++free_toporder_info (class ipa_topo_info *topo)
+ {
+   ipa_free_postorder_info ();
+   free (topo->order);
+@@ -832,9 +850,9 @@ free_toporder_info (struct ipa_topo_info
+ /* Add NODE to the stack in TOPO, unless it is already there.  */
+ 
+ static inline void
+-push_node_to_stack (struct ipa_topo_info *topo, struct cgraph_node *node)
++push_node_to_stack (class ipa_topo_info *topo, struct cgraph_node *node)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   if (info->node_enqueued)
+     return;
+   info->node_enqueued = 1;
+@@ -845,7 +863,7 @@ push_node_to_stack (struct ipa_topo_info
+    is empty.  */
+ 
+ static struct cgraph_node *
+-pop_node_from_stack (struct ipa_topo_info *topo)
++pop_node_from_stack (class ipa_topo_info *topo)
+ {
+   if (topo->stack_top)
+     {
+@@ -887,7 +905,7 @@ ipcp_lattice<valtype>::set_contains_vari
+    not previously set as such.  */
+ 
+ static inline bool
+-set_agg_lats_to_bottom (struct ipcp_param_lattices *plats)
++set_agg_lats_to_bottom (class ipcp_param_lattices *plats)
+ {
+   bool ret = !plats->aggs_bottom;
+   plats->aggs_bottom = true;
+@@ -898,7 +916,7 @@ set_agg_lats_to_bottom (struct ipcp_para
+    return true if they were not previously marked as such.  */
+ 
+ static inline bool
+-set_agg_lats_contain_variable (struct ipcp_param_lattices *plats)
++set_agg_lats_contain_variable (class ipcp_param_lattices *plats)
+ {
+   bool ret = !plats->aggs_contain_variable;
+   plats->aggs_contain_variable = true;
+@@ -1108,7 +1126,7 @@ ipcp_bits_lattice::meet_with (ipcp_bits_
+    return true is any of them has not been marked as such so far.  */
+ 
+ static inline bool
+-set_all_contains_variable (struct ipcp_param_lattices *plats)
++set_all_contains_variable (class ipcp_param_lattices *plats)
+ {
+   bool ret;
+   ret = plats->itself.set_contains_variable ();
+@@ -1158,7 +1176,7 @@ set_single_call_flag (cgraph_node *node,
+ static void
+ initialize_node_lattices (struct cgraph_node *node)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   struct cgraph_edge *ie;
+   bool disable = false, variable = false;
+   int i;
+@@ -1188,7 +1206,7 @@ initialize_node_lattices (struct cgraph_
+ 
+   for (i = 0; i < ipa_get_param_count (info); i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       plats->m_value_range.init ();
+     }
+ 
+@@ -1196,7 +1214,7 @@ initialize_node_lattices (struct cgraph_
+     {
+       for (i = 0; i < ipa_get_param_count (info); i++)
+ 	{
+-	  struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++	  class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+ 	  if (disable)
+ 	    {
+ 	      plats->itself.set_to_bottom ();
+@@ -1224,23 +1242,23 @@ initialize_node_lattices (struct cgraph_
+       }
+ }
+ 
+-/* Return the result of a (possibly arithmetic) pass through jump function
+-   JFUNC on the constant value INPUT.  RES_TYPE is the type of the parameter
+-   to which the result is passed.  Return NULL_TREE if that cannot be
+-   determined or be considered an interprocedural invariant.  */
++/* Return the result of a (possibly arithmetic) operation on the constant
++   value INPUT.  OPERAND is 2nd operand for binary operation.  RES_TYPE is
++   the type of the parameter to which the result is passed.  Return
++   NULL_TREE if that cannot be determined or be considered an
++   interprocedural invariant.  */
+ 
+ static tree
+-ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
+-				tree res_type)
++ipa_get_jf_arith_result (enum tree_code opcode, tree input, tree operand,
++			 tree res_type)
+ {
+   tree res;
+ 
+-  if (ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
++  if (opcode == NOP_EXPR)
+     return input;
+   if (!is_gimple_ip_invariant (input))
+     return NULL_TREE;
+ 
+-  tree_code opcode = ipa_get_jf_pass_through_operation (jfunc);
+   if (!res_type)
+     {
+       if (TREE_CODE_CLASS (opcode) == tcc_comparison)
+@@ -1254,8 +1272,7 @@ ipa_get_jf_pass_through_result (struct i
+   if (TREE_CODE_CLASS (opcode) == tcc_unary)
+     res = fold_unary (opcode, res_type, input);
+   else
+-    res = fold_binary (opcode, res_type, input,
+-		       ipa_get_jf_pass_through_operand (jfunc));
++    res = fold_binary (opcode, res_type, input, operand);
+ 
+   if (res && !is_gimple_ip_invariant (res))
+     return NULL_TREE;
+@@ -1263,6 +1280,21 @@ ipa_get_jf_pass_through_result (struct i
+   return res;
+ }
+ 
++/* Return the result of a (possibly arithmetic) pass through jump function
++   JFUNC on the constant value INPUT.  RES_TYPE is the type of the parameter
++   to which the result is passed.  Return NULL_TREE if that cannot be
++   determined or be considered an interprocedural invariant.  */
++
++static tree
++ipa_get_jf_pass_through_result (struct ipa_jump_func *jfunc, tree input,
++				tree res_type)
++{
++  return ipa_get_jf_arith_result (ipa_get_jf_pass_through_operation (jfunc),
++				  input,
++				  ipa_get_jf_pass_through_operand (jfunc),
++				  res_type);
++}
++
+ /* Return the result of an ancestor jump function JFUNC on the constant value
+    INPUT.  Return NULL_TREE if that cannot be determined.  */
+ 
+@@ -1289,7 +1321,7 @@ ipa_get_jf_ancestor_result (struct ipa_j
+    passed.  */
+ 
+ tree
+-ipa_value_from_jfunc (struct ipa_node_params *info, struct ipa_jump_func *jfunc,
++ipa_value_from_jfunc (class ipa_node_params *info, struct ipa_jump_func *jfunc,
+ 		      tree parm_type)
+ {
+   if (jfunc->type == IPA_JF_CONST)
+@@ -1396,6 +1428,146 @@ ipa_context_from_jfunc (ipa_node_params
+   return ctx;
+ }
+ 
++/* See if NODE is a clone with a known aggregate value at a given OFFSET of a
++   parameter with the given INDEX.  */
++
++static tree
++get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
++		     int index)
++{
++  struct ipa_agg_replacement_value *aggval;
++
++  aggval = ipa_get_agg_replacements_for_node (node);
++  while (aggval)
++    {
++      if (aggval->offset == offset
++	  && aggval->index == index)
++	return aggval->value;
++      aggval = aggval->next;
++    }
++  return NULL_TREE;
++}
++
++/* Determine whether ITEM, jump function for an aggregate part, evaluates to a
++   single known constant value and if so, return it.  Otherwise return NULL.
++   NODE and INFO describes the caller node or the one it is inlined to, and
++   its related info.  */
++
++static tree
++ipa_agg_value_from_node (class ipa_node_params *info,
++			 struct cgraph_node *node,
++			 struct ipa_agg_jf_item *item)
++{
++  tree value = NULL_TREE;
++  int src_idx;
++
++  if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
++    return NULL_TREE;
++
++  if (item->jftype == IPA_JF_CONST)
++    return item->value.constant;
++
++  gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
++		       || item->jftype == IPA_JF_LOAD_AGG);
++
++  src_idx = item->value.pass_through.formal_id;
++
++  if (info->ipcp_orig_node)
++    {
++      if (item->jftype == IPA_JF_PASS_THROUGH)
++	value = info->known_csts[src_idx];
++      else
++	value = get_clone_agg_value (node, item->value.load_agg.offset,
++				     src_idx);
++    }
++  else if (info->lattices)
++    {
++      class ipcp_param_lattices *src_plats
++	= ipa_get_parm_lattices (info, src_idx);
++
++      if (item->jftype == IPA_JF_PASS_THROUGH)
++	{
++	  struct ipcp_lattice<tree> *lat = &src_plats->itself;
++
++	  if (!lat->is_single_const ())
++	    return NULL_TREE;
++
++	  value = lat->values->value;
++	}
++      else if (src_plats->aggs
++	       && !src_plats->aggs_bottom
++	       && !src_plats->aggs_contain_variable
++	       && src_plats->aggs_by_ref == item->value.load_agg.by_ref)
++	{
++	  struct ipcp_agg_lattice *aglat;
++
++	  for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
++	    {
++	      if (aglat->offset > item->value.load_agg.offset)
++		break;
++
++	      if (aglat->offset == item->value.load_agg.offset)
++		{
++		  if (aglat->is_single_const ())
++		    value = aglat->values->value;
++		  break;
++		}
++	    }
++	}
++    }
++
++  if (!value)
++    return NULL_TREE;
++
++  if (item->jftype == IPA_JF_LOAD_AGG)
++    {
++      tree load_type = item->value.load_agg.type;
++      tree value_type = TREE_TYPE (value);
++
++      /* Ensure value type is compatible with load type.  */
++      if (!useless_type_conversion_p (load_type, value_type))
++	return NULL_TREE;
++    }
++
++  return ipa_get_jf_arith_result (item->value.pass_through.operation,
++				  value,
++				  item->value.pass_through.operand,
++				  item->type);
++}
++
++/* Determine whether AGG_JFUNC evaluates to a set of known constant value for
++   an aggregate and if so, return it.  Otherwise return an empty set.  NODE
++   and INFO describes the caller node or the one it is inlined to, and its
++   related info.  */
++
++struct ipa_agg_value_set
++ipa_agg_value_set_from_jfunc (class ipa_node_params *info, cgraph_node *node,
++			      struct ipa_agg_jump_function *agg_jfunc)
++{
++  struct ipa_agg_value_set agg;
++  struct ipa_agg_jf_item *item;
++  int i;
++
++  agg.items = vNULL;
++  agg.by_ref = agg_jfunc->by_ref;
++
++  FOR_EACH_VEC_SAFE_ELT (agg_jfunc->items, i, item)
++    {
++      tree value = ipa_agg_value_from_node (info, node, item);
++
++      if (value)
++	{
++	  struct ipa_agg_value value_item;
++
++	  value_item.offset = item->offset;
++	  value_item.value = value;
++
++	  agg.items.safe_push (value_item);
++	}
++    }
++  return agg;
++}
++
+ /* If checking is enabled, verify that no lattice is in the TOP state, i.e. not
+    bottom, not containing a variable component and without any known value at
+    the same time.  */
+@@ -1407,7 +1579,9 @@ ipcp_verify_propagated_values (void)
+ 
+   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
+     {
+-      struct ipa_node_params *info = IPA_NODE_REF (node);
++      class ipa_node_params *info = IPA_NODE_REF (node);
++      if (!opt_for_fn (node->decl, flag_ipa_cp))
++	continue;
+       int i, count = ipa_get_param_count (info);
+ 
+       for (i = 0; i < count; i++)
+@@ -1516,22 +1690,32 @@ allocate_and_init_ipcp_value (ipa_polymo
+ /* Try to add NEWVAL to LAT, potentially creating a new ipcp_value for it.  CS,
+    SRC_VAL SRC_INDEX and OFFSET are meant for add_source and have the same
+    meaning.  OFFSET -1 means the source is scalar and not a part of an
+-   aggregate.  */
++   aggregate.  If non-NULL, VAL_P records address of existing or newly added
++   ipcp_value.  UNLIMITED means whether value count should not exceed the limit
++   given by PARAM_IPA_CP_VALUE_LIST_SIZE.  */
+ 
+ template <typename valtype>
+ bool
+ ipcp_lattice<valtype>::add_value (valtype newval, cgraph_edge *cs,
+ 				  ipcp_value<valtype> *src_val,
+-				  int src_idx, HOST_WIDE_INT offset)
++				  int src_idx, HOST_WIDE_INT offset,
++				  ipcp_value<valtype> **val_p,
++				  bool unlimited)
+ {
+-  ipcp_value<valtype> *val;
++  ipcp_value<valtype> *val, *last_val = NULL;
++
++  if (val_p)
++    *val_p = NULL;
+ 
+   if (bottom)
+     return false;
+ 
+-  for (val = values; val; val = val->next)
++  for (val = values; val; last_val = val, val = val->next)
+     if (values_equal_for_ipcp_p (val->value, newval))
+       {
++	if (val_p)
++	  *val_p = val;
++
+ 	if (ipa_edge_within_scc (cs))
+ 	  {
+ 	    ipcp_value_source<valtype> *s;
+@@ -1546,7 +1730,7 @@ ipcp_lattice<valtype>::add_value (valtyp
+ 	return false;
+       }
+ 
+-  if (values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
++  if (!unlimited && values_count == PARAM_VALUE (PARAM_IPA_CP_VALUE_LIST_SIZE))
+     {
+       /* We can only free sources, not the values themselves, because sources
+ 	 of other values in this SCC might point to them.   */
+@@ -1559,7 +1743,6 @@ ipcp_lattice<valtype>::add_value (valtyp
+ 	      ipcp_sources_pool.remove ((ipcp_value_source<tree>*)src);
+ 	    }
+ 	}
+-
+       values = NULL;
+       return set_to_bottom ();
+     }
+@@ -1567,41 +1750,177 @@ ipcp_lattice<valtype>::add_value (valtyp
+   values_count++;
+   val = allocate_and_init_ipcp_value (newval);
+   val->add_source (cs, src_val, src_idx, offset);
+-  val->next = values;
+-  values = val;
++  val->next = NULL;
++
++  /* Add the new value to end of value list, which can reduce iterations
++     of propagation stage for recursive function.  */
++  if (last_val)
++    last_val->next = val;
++  else
++    values = val;
++
++  if (val_p)
++    *val_p = val;
++
+   return true;
+ }
+ 
+-/* Propagate values through a pass-through jump function JFUNC associated with
+-   edge CS, taking values from SRC_LAT and putting them into DEST_LAT.  SRC_IDX
+-   is the index of the source parameter.  PARM_TYPE is the type of the
+-   parameter to which the result is passed.  */
++/* Return true, if a ipcp_value VAL is orginated from parameter value of
++   self-feeding recursive function by applying non-passthrough arithmetic
++   transformation.  */
+ 
+ static bool
+-propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
+-				    ipcp_lattice<tree> *src_lat,
+-				    ipcp_lattice<tree> *dest_lat, int src_idx,
+-				    tree parm_type)
++self_recursively_generated_p (ipcp_value<tree> *val)
++{
++  class ipa_node_params *info = NULL;
++
++  for (ipcp_value_source<tree> *src = val->sources; src; src = src->next)
++    {
++      cgraph_edge *cs = src->cs;
++
++      if (!src->val || cs->caller != cs->callee->function_symbol ()
++	  || src->val == val)
++	return false;
++
++      if (!info)
++	info = IPA_NODE_REF (cs->caller);
++
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info,
++								src->index);
++      ipcp_lattice<tree> *src_lat = src->offset == -1 ? &plats->itself
++						      : plats->aggs;
++      ipcp_value<tree> *src_val;
++
++      for (src_val = src_lat->values; src_val; src_val = src_val->next)
++	if (src_val == val)
++	  break;
++
++      if (!src_val)
++	return false;
++    }
++
++  return true;
++}
++
++/* A helper function that returns result of operation specified by OPCODE on
++   the value of SRC_VAL.  If non-NULL, OPND1_TYPE is expected type for the
++   value of SRC_VAL.  If the operation is binary, OPND2 is a constant value
++   acting as its second operand.  If non-NULL, RES_TYPE is expected type of
++   the result.  */
++
++static tree
++get_val_across_arith_op (enum tree_code opcode,
++			 tree opnd1_type,
++			 tree opnd2,
++			 ipcp_value<tree> *src_val,
++			 tree res_type)
++{
++  tree opnd1 = src_val->value;
++
++  /* Skip source values that is incompatible with specified type.  */
++  if (opnd1_type
++      && !useless_type_conversion_p (opnd1_type, TREE_TYPE (opnd1)))
++    return NULL_TREE;
++
++  return ipa_get_jf_arith_result (opcode, opnd1, opnd2, res_type);
++}
++
++/* Propagate values through an arithmetic transformation described by a jump
++   function associated with edge CS, taking values from SRC_LAT and putting
++   them into DEST_LAT.  OPND1_TYPE is expected type for the values in SRC_LAT.
++   OPND2 is a constant value if transformation is a binary operation.
++   SRC_OFFSET specifies offset in an aggregate if SRC_LAT describes lattice of
++   a part of the aggregate.  SRC_IDX is the index of the source parameter.
++   RES_TYPE is the value type of result being propagated into.  Return true if
++   DEST_LAT changed.  */
++
++static bool
++propagate_vals_across_arith_jfunc (cgraph_edge *cs,
++				   enum tree_code opcode,
++				   tree opnd1_type,
++				   tree opnd2,
++				   ipcp_lattice<tree> *src_lat,
++				   ipcp_lattice<tree> *dest_lat,
++				   HOST_WIDE_INT src_offset,
++				   int src_idx,
++				   tree res_type)
+ {
+   ipcp_value<tree> *src_val;
+   bool ret = false;
+ 
+-  /* Do not create new values when propagating within an SCC because if there
+-     are arithmetic functions with circular dependencies, there is infinite
+-     number of them and we would just make lattices bottom.  If this condition
+-     is ever relaxed we have to detect self-feeding recursive calls in
+-     cgraph_edge_brings_value_p in a smarter way.  */
+-  if ((ipa_get_jf_pass_through_operation (jfunc) != NOP_EXPR)
+-      && ipa_edge_within_scc (cs))
+-    ret = dest_lat->set_contains_variable ();
++  /* Due to circular dependencies, propagating within an SCC through arithmetic
++     transformation would create infinite number of values.  But for
++     self-feeding recursive function, we could allow propagation in a limited
++     count, and this can enable a simple kind of recursive function versioning.
++     For other scenario, we would just make lattices bottom.  */
++  if (opcode != NOP_EXPR && ipa_edge_within_scc (cs))
++    {
++      int i;
++
++      if (src_lat != dest_lat || PARAM_VALUE(PARAM_IPA_CP_MAX_RECURSIVE_DEPTH) < 1)
++	return dest_lat->set_contains_variable ();
++
++      /* No benefit if recursive execution is in low probability.  */
++      if (cs->sreal_frequency () * 100
++	  <= ((sreal) 1) * PARAM_VALUE(PARAM_IPA_CP_MIN_RECURSIVE_PROBABILITY))
++	return dest_lat->set_contains_variable ();
++
++      auto_vec<ipcp_value<tree> *, 8> val_seeds;
++
++      for (src_val = src_lat->values; src_val; src_val = src_val->next)
++	{
++	  /* Now we do not use self-recursively generated value as propagation
++	     source, this is absolutely conservative, but could avoid explosion
++	     of lattice's value space, especially when one recursive function
++	     calls another recursive.  */
++	  if (self_recursively_generated_p (src_val))
++	    {
++	      ipcp_value_source<tree> *s;
++
++	      /* If the lattice has already been propagated for the call site,
++		 no need to do that again.  */
++	      for (s = src_val->sources; s; s = s->next)
++		if (s->cs == cs)
++		  return dest_lat->set_contains_variable ();
++	    }
++	  else
++	    val_seeds.safe_push (src_val);
++	}
++
++      /* Recursively generate lattice values with a limited count.  */
++      FOR_EACH_VEC_ELT (val_seeds, i, src_val)
++	{
++	  for (int j = 1; j < PARAM_VALUE(PARAM_IPA_CP_MAX_RECURSIVE_DEPTH); j++)
++	    {
++	      tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
++						     src_val, res_type);
++	      if (!cstval)
++		break;
++
++	      ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
++					  src_offset, &src_val, true);
++	      gcc_checking_assert (src_val);
++	    }
++	}
++      ret |= dest_lat->set_contains_variable ();
++    }
+   else
+     for (src_val = src_lat->values; src_val; src_val = src_val->next)
+       {
+-	tree cstval = ipa_get_jf_pass_through_result (jfunc, src_val->value,
+-						      parm_type);
++	/* Now we do not use self-recursively generated value as propagation
++	   source, otherwise it is easy to make value space of normal lattice
++	   overflow.  */
++	if (self_recursively_generated_p (src_val))
++	  {
++	    ret |= dest_lat->set_contains_variable ();
++	    continue;
++	  }
+ 
++	tree cstval = get_val_across_arith_op (opcode, opnd1_type, opnd2,
++					       src_val, res_type);
+ 	if (cstval)
+-	  ret |= dest_lat->add_value (cstval, cs, src_val, src_idx);
++	  ret |= dest_lat->add_value (cstval, cs, src_val, src_idx,
++				      src_offset);
+ 	else
+ 	  ret |= dest_lat->set_contains_variable ();
+       }
+@@ -1609,6 +1928,24 @@ propagate_vals_across_pass_through (cgra
+   return ret;
+ }
+ 
++/* Propagate values through a pass-through jump function JFUNC associated with
++   edge CS, taking values from SRC_LAT and putting them into DEST_LAT.  SRC_IDX
++   is the index of the source parameter.  PARM_TYPE is the type of the
++   parameter to which the result is passed.  */
++
++static bool
++propagate_vals_across_pass_through (cgraph_edge *cs, ipa_jump_func *jfunc,
++				    ipcp_lattice<tree> *src_lat,
++				    ipcp_lattice<tree> *dest_lat, int src_idx,
++				    tree parm_type)
++{
++  return propagate_vals_across_arith_jfunc (cs,
++				ipa_get_jf_pass_through_operation (jfunc),
++				NULL_TREE,
++				ipa_get_jf_pass_through_operand (jfunc),
++				src_lat, dest_lat, -1, src_idx, parm_type);
++}
++
+ /* Propagate values through an ancestor jump function JFUNC associated with
+    edge CS, taking values from SRC_LAT and putting them into DEST_LAT.  SRC_IDX
+    is the index of the source parameter.  */
+@@ -1659,7 +1996,7 @@ propagate_scalar_across_jump_function (s
+   else if (jfunc->type == IPA_JF_PASS_THROUGH
+ 	   || jfunc->type == IPA_JF_ANCESTOR)
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       ipcp_lattice<tree> *src_lat;
+       int src_idx;
+       bool ret;
+@@ -1721,7 +2058,7 @@ propagate_context_across_jump_function (
+   if (jfunc->type == IPA_JF_PASS_THROUGH
+       || jfunc->type == IPA_JF_ANCESTOR)
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       int src_idx;
+       ipcp_lattice<ipa_polymorphic_call_context> *src_lat;
+ 
+@@ -1769,7 +2106,6 @@ propagate_context_across_jump_function (
+ 	      added_sth = true;
+ 	    }
+ 	}
+-
+     }
+ 
+  prop_fail:
+@@ -1797,7 +2133,7 @@ propagate_bits_across_jump_function (cgr
+ 
+   enum availability availability;
+   cgraph_node *callee = cs->callee->function_symbol (&availability);
+-  struct ipa_node_params *callee_info = IPA_NODE_REF (callee);
++  class ipa_node_params *callee_info = IPA_NODE_REF (callee);
+   tree parm_type = ipa_get_type (callee_info, idx);
+ 
+   /* For K&R C programs, ipa_get_type() could return NULL_TREE.  Avoid the
+@@ -1820,7 +2156,7 @@ propagate_bits_across_jump_function (cgr
+   if (jfunc->type == IPA_JF_PASS_THROUGH
+       || jfunc->type == IPA_JF_ANCESTOR)
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       tree operand = NULL_TREE;
+       enum tree_code code;
+       unsigned src_idx;
+@@ -1840,7 +2176,7 @@ propagate_bits_across_jump_function (cgr
+ 	  operand = build_int_cstu (size_type_node, offset);
+ 	}
+ 
+-      struct ipcp_param_lattices *src_lats
++      class ipcp_param_lattices *src_lats
+ 	= ipa_get_parm_lattices (caller_info, src_idx);
+ 
+       /* Try to propagate bits if src_lattice is bottom, but jfunc is known.
+@@ -1894,7 +2230,7 @@ ipa_vr_operation_and_type_effects (value
+ 
+ static bool
+ propagate_vr_across_jump_function (cgraph_edge *cs, ipa_jump_func *jfunc,
+-				   struct ipcp_param_lattices *dest_plats,
++				   class ipcp_param_lattices *dest_plats,
+ 				   tree param_type)
+ {
+   ipcp_vr_lattice *dest_lat = &dest_plats->m_value_range;
+@@ -1913,10 +2249,10 @@ propagate_vr_across_jump_function (cgrap
+ 
+       if (TREE_CODE_CLASS (operation) == tcc_unary)
+ 	{
+-	  struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++	  class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+ 	  int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
+ 	  tree operand_type = ipa_get_type (caller_info, src_idx);
+-	  struct ipcp_param_lattices *src_lats
++	  class ipcp_param_lattices *src_lats
+ 	    = ipa_get_parm_lattices (caller_info, src_idx);
+ 
+ 	  if (src_lats->m_value_range.bottom_p ())
+@@ -1959,7 +2295,7 @@ propagate_vr_across_jump_function (cgrap
+    aggs_by_ref to NEW_AGGS_BY_REF.  */
+ 
+ static bool
+-set_check_aggs_by_ref (struct ipcp_param_lattices *dest_plats,
++set_check_aggs_by_ref (class ipcp_param_lattices *dest_plats,
+ 		       bool new_aggs_by_ref)
+ {
+   if (dest_plats->aggs)
+@@ -1986,7 +2322,7 @@ set_check_aggs_by_ref (struct ipcp_param
+    true.  */
+ 
+ static bool
+-merge_agg_lats_step (struct ipcp_param_lattices *dest_plats,
++merge_agg_lats_step (class ipcp_param_lattices *dest_plats,
+ 		     HOST_WIDE_INT offset, HOST_WIDE_INT val_size,
+ 		     struct ipcp_agg_lattice ***aglat,
+ 		     bool pre_existing, bool *change)
+@@ -2064,8 +2400,8 @@ set_chain_of_aglats_contains_variable (s
+ 
+ static bool
+ merge_aggregate_lattices (struct cgraph_edge *cs,
+-			  struct ipcp_param_lattices *dest_plats,
+-			  struct ipcp_param_lattices *src_plats,
++			  class ipcp_param_lattices *dest_plats,
++			  class ipcp_param_lattices *src_plats,
+ 			  int src_idx, HOST_WIDE_INT offset_delta)
+ {
+   bool pre_existing = dest_plats->aggs != NULL;
+@@ -2119,7 +2455,7 @@ merge_aggregate_lattices (struct cgraph_
+    rules about propagating values passed by reference.  */
+ 
+ static bool
+-agg_pass_through_permissible_p (struct ipcp_param_lattices *src_plats,
++agg_pass_through_permissible_p (class ipcp_param_lattices *src_plats,
+ 				struct ipa_jump_func *jfunc)
+ {
+   return src_plats->aggs
+@@ -2127,13 +2463,92 @@ agg_pass_through_permissible_p (struct i
+ 	|| ipa_get_jf_pass_through_agg_preserved (jfunc));
+ }
+ 
++/* Propagate values through ITEM, jump function for a part of an aggregate,
++   into corresponding aggregate lattice AGLAT.  CS is the call graph edge
++   associated with the jump function.  Return true if AGLAT changed in any
++   way.  */
++
++static bool
++propagate_aggregate_lattice (struct cgraph_edge *cs,
++			     struct ipa_agg_jf_item *item,
++			     struct ipcp_agg_lattice *aglat)
++{
++  class ipa_node_params *caller_info;
++  class ipcp_param_lattices *src_plats;
++  struct ipcp_lattice<tree> *src_lat;
++  HOST_WIDE_INT src_offset;
++  int src_idx;
++  tree load_type;
++  bool ret;
++
++  if (item->jftype == IPA_JF_CONST)
++    {
++      tree value = item->value.constant;
++
++      gcc_checking_assert (is_gimple_ip_invariant (value));
++      return aglat->add_value (value, cs, NULL, 0);
++    }
++
++  gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
++		       || item->jftype == IPA_JF_LOAD_AGG);
++
++  caller_info = IPA_NODE_REF (cs->caller);
++  src_idx = item->value.pass_through.formal_id;
++  src_plats = ipa_get_parm_lattices (caller_info, src_idx);
++
++  if (item->jftype == IPA_JF_PASS_THROUGH)
++    {
++      load_type = NULL_TREE;
++      src_lat = &src_plats->itself;
++      src_offset = -1;
++    }
++  else
++    {
++      HOST_WIDE_INT load_offset = item->value.load_agg.offset;
++      struct ipcp_agg_lattice *src_aglat;
++
++      for (src_aglat = src_plats->aggs; src_aglat; src_aglat = src_aglat->next)
++	if (src_aglat->offset >= load_offset)
++	  break;
++
++      load_type = item->value.load_agg.type;
++      if (!src_aglat
++	  || src_aglat->offset > load_offset
++	  || src_aglat->size != tree_to_shwi (TYPE_SIZE (load_type))
++	  || src_plats->aggs_by_ref != item->value.load_agg.by_ref)
++	return aglat->set_contains_variable ();
++
++      src_lat = src_aglat;
++      src_offset = load_offset;
++    }
++
++  if (src_lat->bottom
++      || (!ipcp_versionable_function_p (cs->caller)
++	  && !src_lat->is_single_const ()))
++    return aglat->set_contains_variable ();
++
++  ret = propagate_vals_across_arith_jfunc (cs,
++					   item->value.pass_through.operation,
++					   load_type,
++					   item->value.pass_through.operand,
++					   src_lat, aglat,
++					   src_offset,
++					   src_idx,
++					   item->type);
++
++  if (src_lat->contains_variable)
++    ret |= aglat->set_contains_variable ();
++
++  return ret;
++}
++
+ /* Propagate scalar values across jump function JFUNC that is associated with
+    edge CS and put the values into DEST_LAT.  */
+ 
+ static bool
+ propagate_aggs_across_jump_function (struct cgraph_edge *cs,
+ 				     struct ipa_jump_func *jfunc,
+-				     struct ipcp_param_lattices *dest_plats)
++				     class ipcp_param_lattices *dest_plats)
+ {
+   bool ret = false;
+ 
+@@ -2143,9 +2558,9 @@ propagate_aggs_across_jump_function (str
+   if (jfunc->type == IPA_JF_PASS_THROUGH
+       && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
+-      struct ipcp_param_lattices *src_plats;
++      class ipcp_param_lattices *src_plats;
+ 
+       src_plats = ipa_get_parm_lattices (caller_info, src_idx);
+       if (agg_pass_through_permissible_p (src_plats, jfunc))
+@@ -2162,9 +2577,9 @@ propagate_aggs_across_jump_function (str
+   else if (jfunc->type == IPA_JF_ANCESTOR
+ 	   && ipa_get_jf_ancestor_agg_preserved (jfunc))
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
+-      struct ipcp_param_lattices *src_plats;
++      class ipcp_param_lattices *src_plats;
+ 
+       src_plats = ipa_get_parm_lattices (caller_info, src_idx);
+       if (src_plats->aggs && src_plats->aggs_by_ref)
+@@ -2194,15 +2609,14 @@ propagate_aggs_across_jump_function (str
+ 	{
+ 	  HOST_WIDE_INT val_size;
+ 
+-	  if (item->offset < 0)
++	  if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
+ 	    continue;
+-	  gcc_checking_assert (is_gimple_ip_invariant (item->value));
+-	  val_size = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (item->value)));
++	  val_size = tree_to_shwi (TYPE_SIZE (item->type));
+ 
+ 	  if (merge_agg_lats_step (dest_plats, item->offset, val_size,
+ 				   &aglat, pre_existing, &ret))
+ 	    {
+-	      ret |= (*aglat)->add_value (item->value, cs, NULL, 0, 0);
++	      ret |= propagate_aggregate_lattice (cs, item, *aglat);
+ 	      aglat = &(*aglat)->next;
+ 	    }
+ 	  else if (dest_plats->aggs_bottom)
+@@ -2235,10 +2649,10 @@ call_passes_through_thunk_p (cgraph_edge
+ static bool
+ propagate_constants_across_call (struct cgraph_edge *cs)
+ {
+-  struct ipa_node_params *callee_info;
++  class ipa_node_params *callee_info;
+   enum availability availability;
+   cgraph_node *callee;
+-  struct ipa_edge_args *args;
++  class ipa_edge_args *args;
+   bool ret = false;
+   int i, args_count, parms_count;
+ 
+@@ -2247,12 +2661,21 @@ propagate_constants_across_call (struct
+     return false;
+   gcc_checking_assert (callee->has_gimple_body_p ());
+   callee_info = IPA_NODE_REF (callee);
++  if (!callee_info)
++    return false;
+ 
+   args = IPA_EDGE_REF (cs);
+-  args_count = ipa_get_cs_argument_count (args);
+   parms_count = ipa_get_param_count (callee_info);
+   if (parms_count == 0)
+     return false;
++  if (!args)
++    {
++      for (i = 0; i < parms_count; i++)
++	ret |= set_all_contains_variable (ipa_get_parm_lattices (callee_info,
++								 i));
++      return ret;
++    }
++  args_count = ipa_get_cs_argument_count (args);
+ 
+   /* If this call goes through a thunk we must not propagate to the first (0th)
+      parameter.  However, we might need to uncover a thunk from below a series
+@@ -2269,7 +2692,7 @@ propagate_constants_across_call (struct
+   for (; (i < args_count) && (i < parms_count); i++)
+     {
+       struct ipa_jump_func *jump_func = ipa_get_ith_jump_func (args, i);
+-      struct ipcp_param_lattices *dest_plats;
++      class ipcp_param_lattices *dest_plats;
+       tree param_type = ipa_get_type (callee_info, i);
+ 
+       dest_plats = ipa_get_parm_lattices (callee_info, i);
+@@ -2308,7 +2731,7 @@ static tree
+ ipa_get_indirect_edge_target_1 (struct cgraph_edge *ie,
+ 				vec<tree> known_csts,
+ 				vec<ipa_polymorphic_call_context> known_contexts,
+-				vec<ipa_agg_jump_function_p> known_aggs,
++				vec<ipa_agg_value_set> known_aggs,
+ 				struct ipa_agg_replacement_value *agg_reps,
+ 				bool *speculative)
+ {
+@@ -2346,9 +2769,9 @@ ipa_get_indirect_edge_target_1 (struct c
+ 	    }
+ 	  if (!t)
+ 	    {
+-	      struct ipa_agg_jump_function *agg;
++	      struct ipa_agg_value_set *agg;
+ 	      if (known_aggs.length () > (unsigned int) param_index)
+-		agg = known_aggs[param_index];
++		agg = &known_aggs[param_index];
+ 	      else
+ 		agg = NULL;
+ 	      bool from_global_constant;
+@@ -2402,8 +2825,7 @@ ipa_get_indirect_edge_target_1 (struct c
+   if (!t && known_aggs.length () > (unsigned int) param_index
+       && !ie->indirect_info->by_ref)
+     {
+-      struct ipa_agg_jump_function *agg;
+-      agg = known_aggs[param_index];
++      struct ipa_agg_value_set *agg = &known_aggs[param_index];
+       t = ipa_find_agg_cst_for_param (agg, known_csts[param_index],
+ 				      ie->indirect_info->offset, true);
+     }
+@@ -2526,7 +2948,7 @@ tree
+ ipa_get_indirect_edge_target (struct cgraph_edge *ie,
+ 			      vec<tree> known_csts,
+ 			      vec<ipa_polymorphic_call_context> known_contexts,
+-			      vec<ipa_agg_jump_function_p> known_aggs,
++			      vec<ipa_agg_value_set> known_aggs,
+ 			      bool *speculative)
+ {
+   return ipa_get_indirect_edge_target_1 (ie, known_csts, known_contexts,
+@@ -2540,7 +2962,7 @@ static int
+ devirtualization_time_bonus (struct cgraph_node *node,
+ 			     vec<tree> known_csts,
+ 			     vec<ipa_polymorphic_call_context> known_contexts,
+-			     vec<ipa_agg_jump_function_p> known_aggs)
++			     vec<ipa_agg_value_set> known_aggs)
+ {
+   struct cgraph_edge *ie;
+   int res = 0;
+@@ -2548,7 +2970,7 @@ devirtualization_time_bonus (struct cgra
+   for (ie = node->indirect_calls; ie; ie = ie->next_callee)
+     {
+       struct cgraph_node *callee;
+-      struct ipa_fn_summary *isummary;
++      class ipa_fn_summary *isummary;
+       enum availability avail;
+       tree target;
+       bool speculative;
+@@ -2570,13 +2992,14 @@ devirtualization_time_bonus (struct cgra
+       if (!isummary || !isummary->inlinable)
+ 	continue;
+ 
++      int size = ipa_size_summaries->get (callee)->size;
+       /* FIXME: The values below need re-considering and perhaps also
+ 	 integrating into the cost metrics, at lest in some very basic way.  */
+-      if (isummary->size <= MAX_INLINE_INSNS_AUTO / 4)
++      if (size <= MAX_INLINE_INSNS_AUTO / 4)
+ 	res += 31 / ((int)speculative + 1);
+-      else if (isummary->size <= MAX_INLINE_INSNS_AUTO / 2)
++      else if (size <= MAX_INLINE_INSNS_AUTO / 2)
+ 	res += 15 / ((int)speculative + 1);
+-      else if (isummary->size <= MAX_INLINE_INSNS_AUTO
++      else if (size <= MAX_INLINE_INSNS_AUTO
+ 	       || DECL_DECLARED_INLINE_P (callee->decl))
+ 	res += 7 / ((int)speculative + 1);
+     }
+@@ -2601,7 +3024,7 @@ hint_time_bonus (ipa_hints hints)
+ static inline int64_t
+ incorporate_penalties (ipa_node_params *info, int64_t evaluation)
+ {
+-  if (info->node_within_scc)
++  if (info->node_within_scc && !info->node_is_self_scc)
+     evaluation = (evaluation
+ 		  * (100 - PARAM_VALUE (PARAM_IPA_CP_RECURSION_PENALTY))) / 100;
+ 
+@@ -2628,7 +3051,7 @@ good_cloning_opportunity_p (struct cgrap
+ 
+   gcc_assert (size_cost > 0);
+ 
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   if (max_count > profile_count::zero ())
+     {
+       int factor = RDIV (count_sum.probability_in
+@@ -2645,7 +3068,8 @@ good_cloning_opportunity_p (struct cgrap
+ 	  count_sum.dump (dump_file);
+ 	  fprintf (dump_file, "%s%s) -> evaluation: " "%" PRId64
+ 		 ", threshold: %i\n",
+-		 info->node_within_scc ? ", scc" : "",
++		 info->node_within_scc
++		   ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
+ 		 info->node_calling_single_call ? ", single_call" : "",
+ 		 evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD));
+ 	}
+@@ -2663,7 +3087,8 @@ good_cloning_opportunity_p (struct cgrap
+ 		 "size: %i, freq_sum: %i%s%s) -> evaluation: "
+ 		 "%" PRId64 ", threshold: %i\n",
+ 		 time_benefit, size_cost, freq_sum,
+-		 info->node_within_scc ? ", scc" : "",
++		 info->node_within_scc
++		   ? (info->node_is_self_scc ? ", self_scc" : ", scc") : "",
+ 		 info->node_calling_single_call ? ", single_call" : "",
+ 		 evaluation, PARAM_VALUE (PARAM_IPA_CP_EVAL_THRESHOLD));
+ 
+@@ -2674,25 +3099,25 @@ good_cloning_opportunity_p (struct cgrap
+ /* Return all context independent values from aggregate lattices in PLATS in a
+    vector.  Return NULL if there are none.  */
+ 
+-static vec<ipa_agg_jf_item, va_gc> *
+-context_independent_aggregate_values (struct ipcp_param_lattices *plats)
++static vec<ipa_agg_value>
++context_independent_aggregate_values (class ipcp_param_lattices *plats)
+ {
+-  vec<ipa_agg_jf_item, va_gc> *res = NULL;
++  vec<ipa_agg_value> res = vNULL;
+ 
+   if (plats->aggs_bottom
+       || plats->aggs_contain_variable
+       || plats->aggs_count == 0)
+-    return NULL;
++    return vNULL;
+ 
+   for (struct ipcp_agg_lattice *aglat = plats->aggs;
+        aglat;
+        aglat = aglat->next)
+     if (aglat->is_single_const ())
+       {
+-	struct ipa_agg_jf_item item;
++	struct ipa_agg_value item;
+ 	item.offset = aglat->offset;
+ 	item.value = aglat->values->value;
+-	vec_safe_push (res, item);
++	res.safe_push (item);
+       }
+   return res;
+ }
+@@ -2704,11 +3129,11 @@ context_independent_aggregate_values (st
+    it.  */
+ 
+ static bool
+-gather_context_independent_values (struct ipa_node_params *info,
++gather_context_independent_values (class ipa_node_params *info,
+ 				   vec<tree> *known_csts,
+ 				   vec<ipa_polymorphic_call_context>
+ 				   *known_contexts,
+-				   vec<ipa_agg_jump_function> *known_aggs,
++				   vec<ipa_agg_value_set> *known_aggs,
+ 				   int *removable_params_cost)
+ {
+   int i, count = ipa_get_param_count (info);
+@@ -2729,7 +3154,7 @@ gather_context_independent_values (struc
+ 
+   for (i = 0; i < count; i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       ipcp_lattice<tree> *lat = &plats->itself;
+ 
+       if (lat->is_single_const ())
+@@ -2758,40 +3183,20 @@ gather_context_independent_values (struc
+ 
+       if (known_aggs)
+ 	{
+-	  vec<ipa_agg_jf_item, va_gc> *agg_items;
+-	  struct ipa_agg_jump_function *ajf;
++	  vec<ipa_agg_value> agg_items;
++	  struct ipa_agg_value_set *agg;
+ 
+ 	  agg_items = context_independent_aggregate_values (plats);
+-	  ajf = &(*known_aggs)[i];
+-	  ajf->items = agg_items;
+-	  ajf->by_ref = plats->aggs_by_ref;
+-	  ret |= agg_items != NULL;
++	  agg = &(*known_aggs)[i];
++	  agg->items = agg_items;
++	  agg->by_ref = plats->aggs_by_ref;
++	  ret |= !agg_items.is_empty ();
+ 	}
+     }
+ 
+   return ret;
+ }
+ 
+-/* The current interface in ipa-inline-analysis requires a pointer vector.
+-   Create it.
+-
+-   FIXME: That interface should be re-worked, this is slightly silly.  Still,
+-   I'd like to discuss how to change it first and this demonstrates the
+-   issue.  */
+-
+-static vec<ipa_agg_jump_function_p>
+-agg_jmp_p_vec_for_t_vec (vec<ipa_agg_jump_function> known_aggs)
+-{
+-  vec<ipa_agg_jump_function_p> ret;
+-  struct ipa_agg_jump_function *ajf;
+-  int i;
+-
+-  ret.create (known_aggs.length ());
+-  FOR_EACH_VEC_ELT (known_aggs, i, ajf)
+-    ret.quick_push (ajf);
+-  return ret;
+-}
+-
+ /* Perform time and size measurement of NODE with the context given in
+    KNOWN_CSTS, KNOWN_CONTEXTS and KNOWN_AGGS, calculate the benefit and cost
+    given BASE_TIME of the node without specialization, REMOVABLE_PARAMS_COST of
+@@ -2801,7 +3206,7 @@ agg_jmp_p_vec_for_t_vec (vec<ipa_agg_jum
+ static void
+ perform_estimation_of_a_value (cgraph_node *node, vec<tree> known_csts,
+ 			       vec<ipa_polymorphic_call_context> known_contexts,
+-			       vec<ipa_agg_jump_function_p> known_aggs_ptrs,
++			       vec<ipa_agg_value_set> known_aggs,
+ 			       int removable_params_cost,
+ 			       int est_move_cost, ipcp_value_base *val)
+ {
+@@ -2810,7 +3215,7 @@ perform_estimation_of_a_value (cgraph_no
+   ipa_hints hints;
+ 
+   estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts,
+-				     known_aggs_ptrs, &size, &time,
++				     known_aggs, &size, &time,
+ 				     &base_time, &hints);
+   base_time -= time;
+   if (base_time > 65535)
+@@ -2824,7 +3229,7 @@ perform_estimation_of_a_value (cgraph_no
+   else
+     time_benefit = base_time.to_int ()
+       + devirtualization_time_bonus (node, known_csts, known_contexts,
+-				     known_aggs_ptrs)
++				     known_aggs)
+       + hint_time_bonus (hints)
+       + removable_params_cost + est_move_cost;
+ 
+@@ -2846,12 +3251,11 @@ perform_estimation_of_a_value (cgraph_no
+ static void
+ estimate_local_effects (struct cgraph_node *node)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   int i, count = ipa_get_param_count (info);
+   vec<tree> known_csts;
+   vec<ipa_polymorphic_call_context> known_contexts;
+-  vec<ipa_agg_jump_function> known_aggs;
+-  vec<ipa_agg_jump_function_p> known_aggs_ptrs;
++  vec<ipa_agg_value_set> known_aggs;
+   bool always_const;
+   int removable_params_cost;
+ 
+@@ -2864,9 +3268,8 @@ estimate_local_effects (struct cgraph_no
+   always_const = gather_context_independent_values (info, &known_csts,
+ 						    &known_contexts, &known_aggs,
+ 						    &removable_params_cost);
+-  known_aggs_ptrs = agg_jmp_p_vec_for_t_vec (known_aggs);
+   int devirt_bonus = devirtualization_time_bonus (node, known_csts,
+-					   known_contexts, known_aggs_ptrs);
++					   known_contexts, known_aggs);
+   if (always_const || devirt_bonus
+       || (removable_params_cost && node->local.can_change_signature))
+     {
+@@ -2879,7 +3282,7 @@ estimate_local_effects (struct cgraph_no
+       node->call_for_symbol_thunks_and_aliases (gather_caller_stats, &stats,
+ 					      false);
+       estimate_ipcp_clone_size_and_time (node, known_csts, known_contexts,
+-					 known_aggs_ptrs, &size, &time,
++					 known_aggs, &size, &time,
+ 					 &base_time, &hints);
+       time -= devirt_bonus;
+       time -= hint_time_bonus (hints);
+@@ -2926,7 +3329,7 @@ estimate_local_effects (struct cgraph_no
+ 
+   for (i = 0; i < count; i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       ipcp_lattice<tree> *lat = &plats->itself;
+       ipcp_value<tree> *val;
+ 
+@@ -2942,7 +3345,7 @@ estimate_local_effects (struct cgraph_no
+ 
+ 	  int emc = estimate_move_cost (TREE_TYPE (val->value), true);
+ 	  perform_estimation_of_a_value (node, known_csts, known_contexts,
+-					 known_aggs_ptrs,
++					 known_aggs,
+ 					 removable_params_cost, emc, val);
+ 
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+@@ -2960,7 +3363,7 @@ estimate_local_effects (struct cgraph_no
+ 
+   for (i = 0; i < count; i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+ 
+       if (!plats->virt_call)
+ 	continue;
+@@ -2977,7 +3380,7 @@ estimate_local_effects (struct cgraph_no
+ 	{
+ 	  known_contexts[i] = val->value;
+ 	  perform_estimation_of_a_value (node, known_csts, known_contexts,
+-					 known_aggs_ptrs,
++					 known_aggs,
+ 					 removable_params_cost, 0, val);
+ 
+ 	  if (dump_file && (dump_flags & TDF_DETAILS))
+@@ -2995,14 +3398,14 @@ estimate_local_effects (struct cgraph_no
+ 
+   for (i = 0; i < count; i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+-      struct ipa_agg_jump_function *ajf;
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      struct ipa_agg_value_set *agg;
+       struct ipcp_agg_lattice *aglat;
+ 
+       if (plats->aggs_bottom || !plats->aggs)
+ 	continue;
+ 
+-      ajf = &known_aggs[i];
++      agg = &known_aggs[i];
+       for (aglat = plats->aggs; aglat; aglat = aglat->next)
+ 	{
+ 	  ipcp_value<tree> *val;
+@@ -3014,14 +3417,14 @@ estimate_local_effects (struct cgraph_no
+ 
+ 	  for (val = aglat->values; val; val = val->next)
+ 	    {
+-	      struct ipa_agg_jf_item item;
++	      struct ipa_agg_value item;
+ 
+ 	      item.offset = aglat->offset;
+ 	      item.value = val->value;
+-	      vec_safe_push (ajf->items, item);
++	      agg->items.safe_push (item);
+ 
+ 	      perform_estimation_of_a_value (node, known_csts, known_contexts,
+-					     known_aggs_ptrs,
++					     known_aggs,
+ 					     removable_params_cost, 0, val);
+ 
+ 	      if (dump_file && (dump_flags & TDF_DETAILS))
+@@ -3037,18 +3440,14 @@ estimate_local_effects (struct cgraph_no
+ 			   val->local_time_benefit, val->local_size_cost);
+ 		}
+ 
+-	      ajf->items->pop ();
++	      agg->items.pop ();
+ 	    }
+ 	}
+     }
+ 
+-  for (i = 0; i < count; i++)
+-    vec_free (known_aggs[i].items);
+-
+   known_csts.release ();
+   known_contexts.release ();
+-  known_aggs.release ();
+-  known_aggs_ptrs.release ();
++  ipa_release_agg_values (known_aggs);
+ }
+ 
+ 
+@@ -3112,12 +3511,12 @@ value_topo_info<valtype>::add_val (ipcp_
+ static void
+ add_all_node_vals_to_toposort (cgraph_node *node, ipa_topo_info *topo)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   int i, count = ipa_get_param_count (info);
+ 
+   for (i = 0; i < count; i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       ipcp_lattice<tree> *lat = &plats->itself;
+       struct ipcp_agg_lattice *aglat;
+ 
+@@ -3152,7 +3551,7 @@ add_all_node_vals_to_toposort (cgraph_no
+    connected components.  */
+ 
+ static void
+-propagate_constants_topo (struct ipa_topo_info *topo)
++propagate_constants_topo (class ipa_topo_info *topo)
+ {
+   int i;
+ 
+@@ -3166,20 +3565,46 @@ propagate_constants_topo (struct ipa_top
+ 	 until all lattices stabilize.  */
+       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
+ 	if (v->has_gimple_body_p ())
+-	  push_node_to_stack (topo, v);
++	  {
++	    if (opt_for_fn (v->decl, flag_ipa_cp))
++	      push_node_to_stack (topo, v);
++	    /* When V is not optimized, we can not push it to stac, but
++	       still we need to set all its callees lattices to bottom.  */
++	    else
++	      {
++		for (cgraph_edge *cs = v->callees; cs; cs = cs->next_callee)
++	           propagate_constants_across_call (cs);
++	      }
++	  }
+ 
+       v = pop_node_from_stack (topo);
+       while (v)
+ 	{
+ 	  struct cgraph_edge *cs;
++	  class ipa_node_params *info = NULL;
++	  bool self_scc = true;
+ 
+ 	  for (cs = v->callees; cs; cs = cs->next_callee)
+ 	    if (ipa_edge_within_scc (cs))
+ 	      {
+-		IPA_NODE_REF (v)->node_within_scc = true;
++		cgraph_node *callee = cs->callee->function_symbol ();
++
++		if (v != callee)
++		  self_scc = false;
++
++		if (!info)
++		  {
++		    info = IPA_NODE_REF (v);
++		    info->node_within_scc = true;
++		  }
++
+ 		if (propagate_constants_across_call (cs))
+-		  push_node_to_stack (topo, cs->callee->function_symbol ());
++		  push_node_to_stack (topo, callee);
+ 	      }
++
++	  if (info)
++	    info->node_is_self_scc = self_scc;
++
+ 	  v = pop_node_from_stack (topo);
+ 	}
+ 
+@@ -3187,7 +3612,8 @@ propagate_constants_topo (struct ipa_top
+ 	 the local effects of the discovered constants and all valid values to
+ 	 their topological sort.  */
+       FOR_EACH_VEC_ELT (cycle_nodes, j, v)
+-	if (v->has_gimple_body_p ())
++	if (v->has_gimple_body_p ()
++	    && opt_for_fn (v->decl, flag_ipa_cp))
+ 	  {
+ 	    struct cgraph_edge *cs;
+ 
+@@ -3255,7 +3681,7 @@ value_topo_info<valtype>::propagate_effe
+    summaries interprocedurally.  */
+ 
+ static void
+-ipcp_propagate_stage (struct ipa_topo_info *topo)
++ipcp_propagate_stage (class ipa_topo_info *topo)
+ {
+   struct cgraph_node *node;
+ 
+@@ -3266,16 +3692,15 @@ ipcp_propagate_stage (struct ipa_topo_in
+ 
+   FOR_EACH_DEFINED_FUNCTION (node)
+   {
+-    struct ipa_node_params *info = IPA_NODE_REF (node);
+-
+-    determine_versionability (node, info);
+-    if (node->has_gimple_body_p ())
++    if (node->has_gimple_body_p () && opt_for_fn (node->decl, flag_ipa_cp))
+       {
+-	info->lattices = XCNEWVEC (struct ipcp_param_lattices,
++        class ipa_node_params *info = IPA_NODE_REF (node);
++        determine_versionability (node, info);
++	info->lattices = XCNEWVEC (class ipcp_param_lattices,
+ 				   ipa_get_param_count (info));
+ 	initialize_node_lattices (node);
+       }
+-    ipa_fn_summary *s = ipa_fn_summaries->get (node);
++    ipa_size_summary *s = ipa_size_summaries->get (node);
+     if (node->definition && !node->alias && s != NULL)
+       overall_size += s->self_size;
+     max_count = max_count.max (node->count.ipa ());
+@@ -3335,7 +3760,7 @@ ipcp_discover_new_direct_edges (struct c
+ 
+ 	  if (cs && !agg_contents && !polymorphic)
+ 	    {
+-	      struct ipa_node_params *info = IPA_NODE_REF (node);
++	      class ipa_node_params *info = IPA_NODE_REF (node);
+ 	      int c = ipa_get_controlled_uses (info, param_index);
+ 	      if (c != IPA_UNDESCRIBED_USE)
+ 		{
+@@ -3415,26 +3840,6 @@ edge_clone_summary_t::duplicate (cgraph_
+   src_data->next_clone = dst_edge;
+ }
+ 
+-/* See if NODE is a clone with a known aggregate value at a given OFFSET of a
+-   parameter with the given INDEX.  */
+-
+-static tree
+-get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
+-		     int index)
+-{
+-  struct ipa_agg_replacement_value *aggval;
+-
+-  aggval = ipa_get_agg_replacements_for_node (node);
+-  while (aggval)
+-    {
+-      if (aggval->offset == offset
+-	  && aggval->index == index)
+-	return aggval->value;
+-      aggval = aggval->next;
+-    }
+-  return NULL_TREE;
+-}
+-
+ /* Return true is NODE is DEST or its clone for all contexts.  */
+ 
+ static bool
+@@ -3443,7 +3848,7 @@ same_node_or_its_all_contexts_clone_p (c
+   if (node == dest)
+     return true;
+ 
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   return info->is_all_contexts_clone && info->ipcp_orig_node == dest;
+ }
+ 
+@@ -3454,12 +3859,12 @@ static bool
+ cgraph_edge_brings_value_p (cgraph_edge *cs, ipcp_value_source<tree> *src,
+ 			    cgraph_node *dest, ipcp_value<tree> *dest_val)
+ {
+-  struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++  class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+   enum availability availability;
+   cgraph_node *real_dest = cs->callee->function_symbol (&availability);
+ 
+-  if (!same_node_or_its_all_contexts_clone_p (real_dest, dest)
+-      || availability <= AVAIL_INTERPOSABLE
++  if (availability <= AVAIL_INTERPOSABLE
++      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+       || caller_info->node_dead)
+     return false;
+ 
+@@ -3485,7 +3890,7 @@ cgraph_edge_brings_value_p (cgraph_edge
+ 	return true;
+ 
+       struct ipcp_agg_lattice *aglat;
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
+ 								 src->index);
+       if (src->offset == -1)
+ 	return (plats->itself.is_single_const ()
+@@ -3514,10 +3919,12 @@ cgraph_edge_brings_value_p (cgraph_edge
+ 			    cgraph_node *dest,
+ 			    ipcp_value<ipa_polymorphic_call_context> *)
+ {
+-  struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+-  cgraph_node *real_dest = cs->callee->function_symbol ();
++  class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++  enum availability avail;
++  cgraph_node *real_dest = cs->callee->function_symbol (&avail);
+ 
+-  if (!same_node_or_its_all_contexts_clone_p (real_dest, dest)
++  if (avail <= AVAIL_INTERPOSABLE
++      || !same_node_or_its_all_contexts_clone_p (real_dest, dest)
+       || caller_info->node_dead)
+     return false;
+   if (!src->val)
+@@ -3528,7 +3935,7 @@ cgraph_edge_brings_value_p (cgraph_edge
+       && values_equal_for_ipcp_p (src->val->value,
+ 				  caller_info->known_contexts[src->index]);
+ 
+-  struct ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
++  class ipcp_param_lattices *plats = ipa_get_parm_lattices (caller_info,
+ 							     src->index);
+   return plats->ctxlat.is_single_const ()
+     && values_equal_for_ipcp_p (src->val->value,
+@@ -3575,6 +3982,9 @@ get_info_about_necessary_edges (ipcp_val
+ 	      hot |= cs->maybe_hot_p ();
+ 	      if (cs->caller != dest)
+ 		non_self_recursive = true;
++	      else if (src->val)
++		gcc_assert (values_equal_for_ipcp_p (src->val->value,
++						     val->value));
+ 	    }
+ 	  cs = get_next_cgraph_edge_clone (cs);
+ 	}
+@@ -3588,6 +3998,19 @@ get_info_about_necessary_edges (ipcp_val
+   *freq_sum = freq;
+   *count_sum = cnt;
+   *caller_count = count;
++
++  if (!hot && IPA_NODE_REF (dest)->node_within_scc)
++    {
++      struct cgraph_edge *cs;
++
++      /* Cold non-SCC source edge could trigger hot recursive execution of
++	 function. Consider the case as hot and rely on following cost model
++	 computation to further select right one.  */
++      for (cs = dest->callers; cs; cs = cs->next_caller)
++	if (cs->caller == dest && cs->maybe_hot_p ())
++	  return true;
++    }
++
+   return hot;
+ }
+ 
+@@ -3621,7 +4044,7 @@ gather_edges_for_value (ipcp_value<valty
+    Return it or NULL if for some reason it cannot be created.  */
+ 
+ static struct ipa_replace_map *
+-get_replacement_map (struct ipa_node_params *info, tree value, int parm_num)
++get_replacement_map (class ipa_node_params *info, tree value, int parm_num)
+ {
+   struct ipa_replace_map *replace_map;
+ 
+@@ -3790,7 +4213,7 @@ create_specialized_node (struct cgraph_n
+ 			 struct ipa_agg_replacement_value *aggvals,
+ 			 vec<cgraph_edge *> callers)
+ {
+-  struct ipa_node_params *new_info, *info = IPA_NODE_REF (node);
++  class ipa_node_params *new_info, *info = IPA_NODE_REF (node);
+   vec<ipa_replace_map *, va_gc> *replace_trees = NULL;
+   struct ipa_agg_replacement_value *av;
+   struct cgraph_node *new_node;
+@@ -3891,6 +4314,7 @@ create_specialized_node (struct cgraph_n
+   update_profiling_info (node, new_node);
+   new_info = IPA_NODE_REF (new_node);
+   new_info->ipcp_orig_node = node;
++  new_node->ipcp_clone = true;
+   new_info->known_csts = known_csts;
+   new_info->known_contexts = known_contexts;
+ 
+@@ -3924,7 +4348,7 @@ find_more_scalar_values_for_callers_subs
+ 					    vec<tree> known_csts,
+ 					    vec<cgraph_edge *> callers)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   int i, count = ipa_get_param_count (info);
+ 
+   for (i = 0; i < count; i++)
+@@ -3946,7 +4370,8 @@ find_more_scalar_values_for_callers_subs
+ 	  if (IPA_NODE_REF (cs->caller)->node_dead)
+ 	    continue;
+ 
+-	  if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
++	  if (!IPA_EDGE_REF (cs)
++	      || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
+ 	      || (i == 0
+ 		  && call_passes_through_thunk_p (cs)))
+ 	    {
+@@ -4015,7 +4440,8 @@ find_more_contexts_for_caller_subset (cg
+ 
+       FOR_EACH_VEC_ELT (callers, j, cs)
+ 	{
+-	  if (i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)))
++	  if (!IPA_EDGE_REF (cs)
++	      || i >= ipa_get_cs_argument_count (IPA_EDGE_REF (cs)))
+ 	    return;
+ 	  ipa_jump_func *jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs),
+ 							    i);
+@@ -4056,10 +4482,10 @@ find_more_contexts_for_caller_subset (cg
+ /* Go through PLATS and create a vector of values consisting of values and
+    offsets (minus OFFSET) of lattices that contain only a single value.  */
+ 
+-static vec<ipa_agg_jf_item>
+-copy_plats_to_inter (struct ipcp_param_lattices *plats, HOST_WIDE_INT offset)
++static vec<ipa_agg_value>
++copy_plats_to_inter (class ipcp_param_lattices *plats, HOST_WIDE_INT offset)
+ {
+-  vec<ipa_agg_jf_item> res = vNULL;
++  vec<ipa_agg_value> res = vNULL;
+ 
+   if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
+     return vNULL;
+@@ -4067,7 +4493,7 @@ copy_plats_to_inter (struct ipcp_param_l
+   for (struct ipcp_agg_lattice *aglat = plats->aggs; aglat; aglat = aglat->next)
+     if (aglat->is_single_const ())
+       {
+-	struct ipa_agg_jf_item ti;
++	struct ipa_agg_value ti;
+ 	ti.offset = aglat->offset - offset;
+ 	ti.value = aglat->values->value;
+ 	res.safe_push (ti);
+@@ -4079,12 +4505,12 @@ copy_plats_to_inter (struct ipcp_param_l
+    subtracting OFFSET).  */
+ 
+ static void
+-intersect_with_plats (struct ipcp_param_lattices *plats,
+-		      vec<ipa_agg_jf_item> *inter,
++intersect_with_plats (class ipcp_param_lattices *plats,
++		      vec<ipa_agg_value> *inter,
+ 		      HOST_WIDE_INT offset)
+ {
+   struct ipcp_agg_lattice *aglat;
+-  struct ipa_agg_jf_item *item;
++  struct ipa_agg_value *item;
+   int k;
+ 
+   if (!plats->aggs || plats->aggs_contain_variable || plats->aggs_bottom)
+@@ -4122,18 +4548,18 @@ intersect_with_plats (struct ipcp_param_
+ /* Copy aggregate replacement values of NODE (which is an IPA-CP clone) to the
+    vector result while subtracting OFFSET from the individual value offsets.  */
+ 
+-static vec<ipa_agg_jf_item>
++static vec<ipa_agg_value>
+ agg_replacements_to_vector (struct cgraph_node *node, int index,
+ 			    HOST_WIDE_INT offset)
+ {
+   struct ipa_agg_replacement_value *av;
+-  vec<ipa_agg_jf_item> res = vNULL;
++  vec<ipa_agg_value> res = vNULL;
+ 
+   for (av = ipa_get_agg_replacements_for_node (node); av; av = av->next)
+     if (av->index == index
+ 	&& (av->offset - offset) >= 0)
+     {
+-      struct ipa_agg_jf_item item;
++      struct ipa_agg_value item;
+       gcc_checking_assert (av->value);
+       item.offset = av->offset - offset;
+       item.value = av->value;
+@@ -4149,11 +4575,11 @@ agg_replacements_to_vector (struct cgrap
+ 
+ static void
+ intersect_with_agg_replacements (struct cgraph_node *node, int index,
+-				 vec<ipa_agg_jf_item> *inter,
++				 vec<ipa_agg_value> *inter,
+ 				 HOST_WIDE_INT offset)
+ {
+   struct ipa_agg_replacement_value *srcvals;
+-  struct ipa_agg_jf_item *item;
++  struct ipa_agg_value *item;
+   int i;
+ 
+   srcvals = ipa_get_agg_replacements_for_node (node);
+@@ -4190,22 +4616,22 @@ intersect_with_agg_replacements (struct
+    copy all incoming values to it.  If we determine we ended up with no values
+    whatsoever, return a released vector.  */
+ 
+-static vec<ipa_agg_jf_item>
++static vec<ipa_agg_value>
+ intersect_aggregates_with_edge (struct cgraph_edge *cs, int index,
+-				vec<ipa_agg_jf_item> inter)
++				vec<ipa_agg_value> inter)
+ {
+   struct ipa_jump_func *jfunc;
+   jfunc = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), index);
+   if (jfunc->type == IPA_JF_PASS_THROUGH
+       && ipa_get_jf_pass_through_operation (jfunc) == NOP_EXPR)
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       int src_idx = ipa_get_jf_pass_through_formal_id (jfunc);
+ 
+       if (caller_info->ipcp_orig_node)
+ 	{
+ 	  struct cgraph_node *orig_node = caller_info->ipcp_orig_node;
+-	  struct ipcp_param_lattices *orig_plats;
++	  class ipcp_param_lattices *orig_plats;
+ 	  orig_plats = ipa_get_parm_lattices (IPA_NODE_REF (orig_node),
+ 					      src_idx);
+ 	  if (agg_pass_through_permissible_p (orig_plats, jfunc))
+@@ -4224,7 +4650,7 @@ intersect_aggregates_with_edge (struct c
+ 	}
+       else
+ 	{
+-	  struct ipcp_param_lattices *src_plats;
++	  class ipcp_param_lattices *src_plats;
+ 	  src_plats = ipa_get_parm_lattices (caller_info, src_idx);
+ 	  if (agg_pass_through_permissible_p (src_plats, jfunc))
+ 	    {
+@@ -4246,9 +4672,9 @@ intersect_aggregates_with_edge (struct c
+   else if (jfunc->type == IPA_JF_ANCESTOR
+ 	   && ipa_get_jf_ancestor_agg_preserved (jfunc))
+     {
+-      struct ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
+       int src_idx = ipa_get_jf_ancestor_formal_id (jfunc);
+-      struct ipcp_param_lattices *src_plats;
++      class ipcp_param_lattices *src_plats;
+       HOST_WIDE_INT delta = ipa_get_jf_ancestor_offset (jfunc);
+ 
+       if (caller_info->ipcp_orig_node)
+@@ -4273,12 +4699,26 @@ intersect_aggregates_with_edge (struct c
+     }
+   else if (jfunc->agg.items)
+     {
+-      struct ipa_agg_jf_item *item;
++      class ipa_node_params *caller_info = IPA_NODE_REF (cs->caller);
++      struct ipa_agg_value *item;
+       int k;
+ 
+       if (!inter.exists ())
+ 	for (unsigned i = 0; i < jfunc->agg.items->length (); i++)
+-	  inter.safe_push ((*jfunc->agg.items)[i]);
++	  {
++	    struct ipa_agg_jf_item *agg_item = &(*jfunc->agg.items)[i];
++	    tree value = ipa_agg_value_from_node (caller_info, cs->caller,
++						  agg_item);
++	    if (value)
++	      {
++		struct ipa_agg_value agg_value;
++
++		agg_value.offset = agg_item->offset;
++		agg_value.value = value;
++
++		inter.safe_push (agg_value);
++	      }
++	  }
+       else
+ 	FOR_EACH_VEC_ELT (inter, k, item)
+ 	  {
+@@ -4296,9 +4736,10 @@ intersect_aggregates_with_edge (struct c
+ 		  break;
+ 		if (ti->offset == item->offset)
+ 		  {
+-		    gcc_checking_assert (ti->value);
+-		    if (values_equal_for_ipcp_p (item->value,
+-						 ti->value))
++		    tree value = ipa_agg_value_from_node (caller_info,
++							  cs->caller, ti);
++		    if (value
++			&& values_equal_for_ipcp_p (item->value, value))
+ 		      found = true;
+ 		    break;
+ 		  }
+@@ -4311,7 +4752,7 @@ intersect_aggregates_with_edge (struct c
+   else
+     {
+       inter.release ();
+-      return vec<ipa_agg_jf_item>();
++      return vNULL;
+     }
+   return inter;
+ }
+@@ -4323,7 +4764,7 @@ static struct ipa_agg_replacement_value
+ find_aggregate_values_for_callers_subset (struct cgraph_node *node,
+ 					  vec<cgraph_edge *> callers)
+ {
+-  struct ipa_node_params *dest_info = IPA_NODE_REF (node);
++  class ipa_node_params *dest_info = IPA_NODE_REF (node);
+   struct ipa_agg_replacement_value *res;
+   struct ipa_agg_replacement_value **tail = &res;
+   struct cgraph_edge *cs;
+@@ -4331,6 +4772,11 @@ find_aggregate_values_for_callers_subset
+ 
+   FOR_EACH_VEC_ELT (callers, j, cs)
+     {
++      if (!IPA_EDGE_REF (cs))
++	{
++	  count = 0;
++	  break;
++	}
+       int c = ipa_get_cs_argument_count (IPA_EDGE_REF (cs));
+       if (c < count)
+ 	count = c;
+@@ -4339,9 +4785,9 @@ find_aggregate_values_for_callers_subset
+   for (i = 0; i < count; i++)
+     {
+       struct cgraph_edge *cs;
+-      vec<ipa_agg_jf_item> inter = vNULL;
+-      struct ipa_agg_jf_item *item;
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
++      vec<ipa_agg_value> inter = vNULL;
++      struct ipa_agg_value *item;
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (dest_info, i);
+       int j;
+ 
+       /* Among other things, the following check should deal with all by_ref
+@@ -4394,10 +4840,10 @@ static bool
+ cgraph_edge_brings_all_scalars_for_node (struct cgraph_edge *cs,
+ 					 struct cgraph_node *node)
+ {
+-  struct ipa_node_params *dest_info = IPA_NODE_REF (node);
++  class ipa_node_params *dest_info = IPA_NODE_REF (node);
+   int count = ipa_get_param_count (dest_info);
+-  struct ipa_node_params *caller_info;
+-  struct ipa_edge_args *args;
++  class ipa_node_params *caller_info;
++  class ipa_edge_args *args;
+   int i;
+ 
+   caller_info = IPA_NODE_REF (cs->caller);
+@@ -4428,8 +4874,7 @@ static bool
+ cgraph_edge_brings_all_agg_vals_for_node (struct cgraph_edge *cs,
+ 					  struct cgraph_node *node)
+ {
+-  struct ipa_node_params *orig_caller_info = IPA_NODE_REF (cs->caller);
+-  struct ipa_node_params *orig_node_info;
++  class ipa_node_params *orig_node_info;
+   struct ipa_agg_replacement_value *aggval;
+   int i, ec, count;
+ 
+@@ -4445,12 +4890,10 @@ cgraph_edge_brings_all_agg_vals_for_node
+ 	return false;
+ 
+   orig_node_info = IPA_NODE_REF (IPA_NODE_REF (node)->ipcp_orig_node);
+-  if (orig_caller_info->ipcp_orig_node)
+-    orig_caller_info = IPA_NODE_REF (orig_caller_info->ipcp_orig_node);
+ 
+   for (i = 0; i < count; i++)
+     {
+-      struct ipcp_param_lattices *plats;
++      class ipcp_param_lattices *plats;
+       bool interesting = false;
+       for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
+ 	if (aggval->index == i)
+@@ -4465,15 +4908,14 @@ cgraph_edge_brings_all_agg_vals_for_node
+       if (plats->aggs_bottom)
+ 	return false;
+ 
+-      vec<ipa_agg_jf_item> values
+-	= intersect_aggregates_with_edge (cs, i, vNULL);
++      vec<ipa_agg_value> values = intersect_aggregates_with_edge (cs, i, vNULL);
+       if (!values.exists ())
+ 	return false;
+ 
+       for (struct ipa_agg_replacement_value *av = aggval; av; av = av->next)
+ 	if (aggval->index == i)
+ 	  {
+-	    struct ipa_agg_jf_item *item;
++	    struct ipa_agg_value *item;
+ 	    int j;
+ 	    bool found = false;
+ 	    FOR_EACH_VEC_ELT (values, j, item)
+@@ -4708,11 +5150,10 @@ decide_about_value (struct cgraph_node *
+ static bool
+ decide_whether_version_node (struct cgraph_node *node)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   int i, count = ipa_get_param_count (info);
+   vec<tree> known_csts;
+   vec<ipa_polymorphic_call_context> known_contexts;
+-  vec<ipa_agg_jump_function> known_aggs = vNULL;
+   bool ret = false;
+ 
+   if (count == 0)
+@@ -4723,12 +5164,11 @@ decide_whether_version_node (struct cgra
+ 	     node->dump_name ());
+ 
+   gather_context_independent_values (info, &known_csts, &known_contexts,
+-				  info->do_clone_for_all_contexts ? &known_aggs
+-				  : NULL, NULL);
++				     NULL, NULL);
+ 
+   for (i = 0; i < count;i++)
+     {
+-      struct ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
++      class ipcp_param_lattices *plats = ipa_get_parm_lattices (info, i);
+       ipcp_lattice<tree> *lat = &plats->itself;
+       ipcp_lattice<ipa_polymorphic_call_context> *ctxlat = &plats->ctxlat;
+ 
+@@ -4793,9 +5233,6 @@ decide_whether_version_node (struct cgra
+       info = IPA_NODE_REF (node);
+       info->do_clone_for_all_contexts = false;
+       IPA_NODE_REF (clone)->is_all_contexts_clone = true;
+-      for (i = 0; i < count; i++)
+-	vec_free (known_aggs[i].items);
+-      known_aggs.release ();
+       ret = true;
+     }
+   else
+@@ -4818,7 +5255,7 @@ spread_undeadness (struct cgraph_node *n
+     if (ipa_edge_within_scc (cs))
+       {
+ 	struct cgraph_node *callee;
+-	struct ipa_node_params *info;
++	class ipa_node_params *info;
+ 
+ 	callee = cs->callee->function_symbol (NULL);
+ 	info = IPA_NODE_REF (callee);
+@@ -4881,7 +5318,7 @@ identify_dead_nodes (struct cgraph_node
+    TOPO and make specialized clones if deemed beneficial.  */
+ 
+ static void
+-ipcp_decision_stage (struct ipa_topo_info *topo)
++ipcp_decision_stage (class ipa_topo_info *topo)
+ {
+   int i;
+ 
+@@ -4923,7 +5360,7 @@ ipcp_store_bits_results (void)
+       bool dumped_sth = false;
+       bool found_useful_result = false;
+ 
+-      if (!opt_for_fn (node->decl, flag_ipa_bit_cp))
++      if (!opt_for_fn (node->decl, flag_ipa_bit_cp) || !info)
+ 	{
+ 	  if (dump_file)
+ 	    fprintf (dump_file, "Not considering %s for ipa bitwise propagation "
+@@ -5055,7 +5492,7 @@ ipcp_store_vr_results (void)
+ static unsigned int
+ ipcp_driver (void)
+ {
+-  struct ipa_topo_info topo;
++  class ipa_topo_info topo;
+ 
+   if (edge_clone_summaries == NULL)
+     edge_clone_summaries = new edge_clone_summary_t (symtab);
+diff -Nurp a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
+--- a/gcc/ipa-devirt.c	2020-04-30 15:14:04.624000000 +0800
++++ b/gcc/ipa-devirt.c	2020-04-30 15:14:56.624000000 +0800
+@@ -172,6 +172,11 @@ struct default_hash_traits <type_pair>
+     }
+ };
+ 
++/* HACK alert: this is used to communicate with ipa-inline-transform that
++   thunk is being expanded and there is no need to clear the polymorphic
++   call target cache.  */
++bool thunk_expansion;
++
+ static bool odr_types_equivalent_p (tree, tree, bool, bool *,
+ 				    hash_set<type_pair> *,
+ 				    location_t, location_t);
+@@ -2557,7 +2562,7 @@ maybe_record_node (vec <cgraph_node *> &
+ 	       || target_node->definition)
+ 	   && target_node->real_symbol_p ())
+     {
+-      gcc_assert (!target_node->global.inlined_to);
++      gcc_assert (!target_node->inlined_to);
+       gcc_assert (target_node->real_symbol_p ());
+       /* When sanitizing, do not assume that __cxa_pure_virtual is not called
+ 	 by valid program.  */
+@@ -2892,6 +2897,7 @@ static void
+ devirt_node_removal_hook (struct cgraph_node *n, void *d ATTRIBUTE_UNUSED)
+ {
+   if (cached_polymorphic_call_targets
++      && !thunk_expansion
+       && cached_polymorphic_call_targets->contains (n))
+     free_polymorphic_call_targets_hash ();
+ }
+diff -Nurp a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
+--- a/gcc/ipa-fnsummary.c	2020-04-30 15:14:04.568000000 +0800
++++ b/gcc/ipa-fnsummary.c	2020-04-30 15:14:56.664000000 +0800
+@@ -86,6 +86,7 @@ along with GCC; see the file COPYING3.
+ 
+ /* Summaries.  */
+ fast_function_summary <ipa_fn_summary *, va_gc> *ipa_fn_summaries;
++fast_function_summary <ipa_size_summary *, va_heap> *ipa_size_summaries;
+ fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
+ 
+ /* Edge predicates goes here.  */
+@@ -207,7 +208,7 @@ ipa_fn_summary::account_size_time (int s
+     }
+   if (!found)
+     {
+-      struct size_time_entry new_entry;
++      class size_time_entry new_entry;
+       new_entry.size = size;
+       new_entry.time = time;
+       new_entry.exec_predicate = exec_pred;
+@@ -236,7 +237,7 @@ redirect_to_unreachable (struct cgraph_e
+     e->make_direct (target);
+   else
+     e->redirect_callee (target);
+-  struct ipa_call_summary *es = ipa_call_summaries->get (e);
++  class ipa_call_summary *es = ipa_call_summaries->get (e);
+   e->inline_failed = CIF_UNREACHABLE;
+   e->count = profile_count::zero ();
+   es->call_stmt_size = 0;
+@@ -261,7 +262,7 @@ edge_set_predicate (struct cgraph_edge *
+       && (!e->speculative || e->callee))
+     e = redirect_to_unreachable (e);
+ 
+-  struct ipa_call_summary *es = ipa_call_summaries->get (e);
++  class ipa_call_summary *es = ipa_call_summaries->get (e);
+   if (predicate && *predicate != true)
+     {
+       if (!es->predicate)
+@@ -306,9 +307,9 @@ set_hint_predicate (predicate **p, predi
+    the fact that parameter is indeed a constant.
+ 
+    KNOWN_VALS is partial mapping of parameters of NODE to constant values.
+-   KNOWN_AGGS is a vector of aggreggate jump functions for each parameter.
+-   Return clause of possible truths. When INLINE_P is true, assume that we are
+-   inlining.
++   KNOWN_AGGS is a vector of aggreggate known offset/value set for each
++   parameter.  Return clause of possible truths.  When INLINE_P is true, assume
++   that we are inlining.
+ 
+    ERROR_MARK means compile time invariant.  */
+ 
+@@ -316,14 +317,13 @@ static void
+ evaluate_conditions_for_known_args (struct cgraph_node *node,
+ 				    bool inline_p,
+ 				    vec<tree> known_vals,
+-				    vec<ipa_agg_jump_function_p>
+-				    known_aggs,
++				    vec<ipa_agg_value_set> known_aggs,
+ 				    clause_t *ret_clause,
+ 				    clause_t *ret_nonspec_clause)
+ {
+   clause_t clause = inline_p ? 0 : 1 << predicate::not_inlined_condition;
+   clause_t nonspec_clause = 1 << predicate::not_inlined_condition;
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
++  class ipa_fn_summary *info = ipa_fn_summaries->get (node);
+   int i;
+   struct condition *c;
+ 
+@@ -331,6 +331,8 @@ evaluate_conditions_for_known_args (stru
+     {
+       tree val;
+       tree res;
++      int j;
++      struct expr_eval_op *op;
+ 
+       /* We allow call stmt to have fewer arguments than the callee function
+          (especially for K&R style programs).  So bound check here (we assume
+@@ -347,7 +349,7 @@ evaluate_conditions_for_known_args (stru
+ 
+       if (c->agg_contents)
+ 	{
+-	  struct ipa_agg_jump_function *agg;
++	  struct ipa_agg_value_set *agg;
+ 
+ 	  if (c->code == predicate::changed
+ 	      && !c->by_ref
+@@ -356,7 +358,7 @@ evaluate_conditions_for_known_args (stru
+ 
+ 	  if (known_aggs.exists ())
+ 	    {
+-	      agg = known_aggs[c->operand_num];
++	      agg = &known_aggs[c->operand_num];
+ 	      val = ipa_find_agg_cst_for_param (agg, known_vals[c->operand_num],
+ 						c->offset, c->by_ref);
+ 	    }
+@@ -382,7 +384,7 @@ evaluate_conditions_for_known_args (stru
+ 	  continue;
+ 	}
+ 
+-      if (tree_to_shwi (TYPE_SIZE (TREE_TYPE (val))) != c->size)
++      if (TYPE_SIZE (c->type) != TYPE_SIZE (TREE_TYPE (val)))
+ 	{
+ 	  clause |= 1 << (i + predicate::first_dynamic_condition);
+ 	  nonspec_clause |= 1 << (i + predicate::first_dynamic_condition);
+@@ -394,7 +396,30 @@ evaluate_conditions_for_known_args (stru
+ 	  continue;
+ 	}
+ 
+-      val = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (c->val), val);
++      val = fold_unary (VIEW_CONVERT_EXPR, c->type, val);
++      for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
++	{
++	  if (!val)
++	    break;
++	  if (!op->val[0])
++	    val = fold_unary (op->code, op->type, val);
++	  else if (!op->val[1])
++	    val = fold_binary (op->code, op->type,
++			       op->index ? op->val[0] : val,
++			       op->index ? val : op->val[0]);
++	  else if (op->index == 0)
++	    val = fold_ternary (op->code, op->type,
++				val, op->val[0], op->val[1]);
++	  else if (op->index == 1)
++	    val = fold_ternary (op->code, op->type,
++				op->val[0], val, op->val[1]);
++	  else if (op->index == 2)
++	    val = fold_ternary (op->code, op->type,
++				op->val[0], op->val[1], val);
++	  else
++	    val = NULL_TREE;
++	}
++
+       res = val
+ 	? fold_binary_to_constant (c->code, boolean_type_node, val, c->val)
+ 	: NULL;
+@@ -420,12 +445,13 @@ evaluate_properties_for_edge (struct cgr
+ 			      vec<tree> *known_vals_ptr,
+ 			      vec<ipa_polymorphic_call_context>
+ 			      *known_contexts_ptr,
+-			      vec<ipa_agg_jump_function_p> *known_aggs_ptr)
++			      vec<ipa_agg_value_set> *known_aggs_ptr)
+ {
+   struct cgraph_node *callee = e->callee->ultimate_alias_target ();
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get (callee);
++  class ipa_fn_summary *info = ipa_fn_summaries->get (callee);
+   vec<tree> known_vals = vNULL;
+-  vec<ipa_agg_jump_function_p> known_aggs = vNULL;
++  vec<ipa_agg_value_set> known_aggs = vNULL;
++  class ipa_edge_args *args;
+ 
+   if (clause_ptr)
+     *clause_ptr = inline_p ? 0 : 1 << predicate::not_inlined_condition;
+@@ -436,18 +462,20 @@ evaluate_properties_for_edge (struct cgr
+ 
+   if (ipa_node_params_sum
+       && !e->call_stmt_cannot_inline_p
+-      && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr))
++      && ((clause_ptr && info->conds) || known_vals_ptr || known_contexts_ptr)
++      && (args = IPA_EDGE_REF (e)) != NULL)
+     {
+-      struct ipa_node_params *caller_parms_info, *callee_pi;
+-      struct ipa_edge_args *args = IPA_EDGE_REF (e);
+-      struct ipa_call_summary *es = ipa_call_summaries->get (e);
++      struct cgraph_node *caller;
++      class ipa_node_params *caller_parms_info, *callee_pi;
++      class ipa_call_summary *es = ipa_call_summaries->get (e);
+       int i, count = ipa_get_cs_argument_count (args);
+ 
+-      if (e->caller->global.inlined_to)
+-	caller_parms_info = IPA_NODE_REF (e->caller->global.inlined_to);
++      if (e->caller->inlined_to)
++	caller = e->caller->inlined_to;
+       else
+-	caller_parms_info = IPA_NODE_REF (e->caller);
+-      callee_pi = IPA_NODE_REF (e->callee);
++	caller = e->caller;
++      caller_parms_info = IPA_NODE_REF (caller);
++      callee_pi = IPA_NODE_REF (callee);
+ 
+       if (count && (info->conds || known_vals_ptr))
+ 	known_vals.safe_grow_cleared (count);
+@@ -456,36 +484,38 @@ evaluate_properties_for_edge (struct cgr
+       if (count && known_contexts_ptr)
+ 	known_contexts_ptr->safe_grow_cleared (count);
+ 
+-      for (i = 0; i < count; i++)
+-	{
+-	  struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
+-	  tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
+-					   ipa_get_type (callee_pi, i));
+-
+-	  if (!cst && e->call_stmt
+-	      && i < (int)gimple_call_num_args (e->call_stmt))
+-	    {
+-	      cst = gimple_call_arg (e->call_stmt, i);
+-	      if (!is_gimple_min_invariant (cst))
+-		cst = NULL;
+-	    }
+-	  if (cst)
+-	    {
+-	      gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
+-	      if (known_vals.exists ())
+-		known_vals[i] = cst;
+-	    }
+-	  else if (inline_p && !es->param[i].change_prob)
+-	    known_vals[i] = error_mark_node;
+-
+-	  if (known_contexts_ptr)
+-	    (*known_contexts_ptr)[i]
+-	      = ipa_context_from_jfunc (caller_parms_info, e, i, jf);
+-	  /* TODO: When IPA-CP starts propagating and merging aggregate jump
+-	     functions, use its knowledge of the caller too, just like the
+-	     scalar case above.  */
+-	  known_aggs[i] = &jf->agg;
+-	}
++      if (callee_pi)
++	for (i = 0; i < count; i++)
++	  {
++	    struct ipa_jump_func *jf = ipa_get_ith_jump_func (args, i);
++	    tree cst = ipa_value_from_jfunc (caller_parms_info, jf,
++					     ipa_get_type (callee_pi, i));
++
++	    if (!cst && e->call_stmt
++		&& i < (int)gimple_call_num_args (e->call_stmt))
++	      {
++		cst = gimple_call_arg (e->call_stmt, i);
++		if (!is_gimple_min_invariant (cst))
++		  cst = NULL;
++	      }
++	    if (cst)
++	      {
++		gcc_checking_assert (TREE_CODE (cst) != TREE_BINFO);
++		if (known_vals.exists ())
++		  known_vals[i] = cst;
++	      }
++	    else if (inline_p && !es->param[i].change_prob)
++	      known_vals[i] = error_mark_node;
++
++	    if (known_contexts_ptr)
++	      (*known_contexts_ptr)[i]
++		= ipa_context_from_jfunc (caller_parms_info, e, i, jf);
++	
++	    known_aggs[i] = ipa_agg_value_set_from_jfunc (caller_parms_info,
++							  caller, &jf->agg);
++	  }
++	else
++	  gcc_assert (callee->thunk.thunk_p);
+     }
+   else if (e->call_stmt && !e->call_stmt_cannot_inline_p
+ 	   && ((clause_ptr && info->conds) || known_vals_ptr))
+@@ -516,7 +546,7 @@ evaluate_properties_for_edge (struct cgr
+   if (known_aggs_ptr)
+     *known_aggs_ptr = known_aggs;
+   else
+-    known_aggs.release ();
++    ipa_release_agg_values (known_aggs);
+ }
+ 
+ 
+@@ -527,6 +557,8 @@ ipa_fn_summary_alloc (void)
+ {
+   gcc_checking_assert (!ipa_fn_summaries);
+   ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
++  ipa_size_summaries = new fast_function_summary <ipa_size_summary *, va_heap>
++							 (symtab);
+   ipa_call_summaries = new ipa_call_summary_t (symtab);
+ }
+ 
+@@ -597,7 +629,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
+     {
+       vec<size_time_entry, va_gc> *entry = info->size_time_table;
+       /* Use SRC parm info since it may not be copied yet.  */
+-      struct ipa_node_params *parms_info = IPA_NODE_REF (src);
++      class ipa_node_params *parms_info = IPA_NODE_REF (src);
+       vec<tree> known_vals = vNULL;
+       int count = ipa_get_param_count (parms_info);
+       int i, j;
+@@ -661,7 +693,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
+       for (edge = dst->callees; edge; edge = next)
+ 	{
+ 	  predicate new_predicate;
+-	  struct ipa_call_summary *es = ipa_call_summaries->get_create (edge);
++	  class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+ 	  next = edge->next_callee;
+ 
+ 	  if (!edge->inline_failed)
+@@ -680,7 +712,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
+       for (edge = dst->indirect_calls; edge; edge = next)
+ 	{
+ 	  predicate new_predicate;
+-	  struct ipa_call_summary *es = ipa_call_summaries->get_create (edge);
++	  class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+ 	  next = edge->next_callee;
+ 
+ 	  gcc_checking_assert (edge->inline_failed);
+@@ -719,7 +751,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
+ 	  set_hint_predicate (&info->loop_stride, p);
+ 	}
+     }
+-  if (!dst->global.inlined_to)
++  if (!dst->inlined_to)
+     ipa_update_overall_fn_summary (dst);
+ }
+ 
+@@ -729,8 +761,8 @@ ipa_fn_summary_t::duplicate (cgraph_node
+ void
+ ipa_call_summary_t::duplicate (struct cgraph_edge *src,
+ 			       struct cgraph_edge *dst,
+-			       struct ipa_call_summary *srcinfo,
+-			       struct ipa_call_summary *info)
++			       class ipa_call_summary *srcinfo,
++			       class ipa_call_summary *info)
+ {
+   new (info) ipa_call_summary (*srcinfo);
+   info->predicate = NULL;
+@@ -750,12 +782,12 @@ ipa_call_summary_t::duplicate (struct cg
+ 
+ static void
+ dump_ipa_call_summary (FILE *f, int indent, struct cgraph_node *node,
+-		       struct ipa_fn_summary *info)
++		       class ipa_fn_summary *info)
+ {
+   struct cgraph_edge *edge;
+   for (edge = node->callees; edge; edge = edge->next_callee)
+     {
+-      struct ipa_call_summary *es = ipa_call_summaries->get (edge);
++      class ipa_call_summary *es = ipa_call_summaries->get (edge);
+       struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
+       int i;
+ 
+@@ -768,9 +800,10 @@ dump_ipa_call_summary (FILE *f, int inde
+ 	       es->call_stmt_size, es->call_stmt_time);
+ 
+       ipa_fn_summary *s = ipa_fn_summaries->get (callee);
++      ipa_size_summary *ss = ipa_size_summaries->get (callee);
+       if (s != NULL)
+-	fprintf (f, "callee size:%2i stack:%2i",
+-		 (int) (s->size / ipa_fn_summary::size_scale),
++	fprintf (f, " callee size:%2i stack:%2i",
++		 (int) (ss->size / ipa_fn_summary::size_scale),
+ 		 (int) s->estimated_stack_size);
+ 
+       if (es->predicate)
+@@ -794,19 +827,17 @@ dump_ipa_call_summary (FILE *f, int inde
+ 	  }
+       if (!edge->inline_failed)
+ 	{
+-	  ipa_fn_summary *s = ipa_fn_summaries->get (callee);
+-	  fprintf (f, "%*sStack frame offset %i, callee self size %i,"
+-		   " callee size %i\n",
++	  ipa_size_summary *ss = ipa_size_summaries->get (callee);
++	  fprintf (f, "%*sStack frame offset %i, callee self size %i\n",
+ 		   indent + 2, "",
+-		   (int) s->stack_frame_offset,
+-		   (int) s->estimated_self_stack_size,
+-		   (int) s->estimated_stack_size);
++		   (int) ipa_get_stack_frame_offset (callee),
++		   (int) ss->estimated_self_stack_size);
+ 	  dump_ipa_call_summary (f, indent + 2, callee, info);
+ 	}
+     }
+   for (edge = node->indirect_calls; edge; edge = edge->next_callee)
+     {
+-      struct ipa_call_summary *es = ipa_call_summaries->get (edge);
++      class ipa_call_summary *es = ipa_call_summaries->get (edge);
+       fprintf (f, "%*sindirect call loop depth:%2i freq:%4.2f size:%2i"
+ 	       " time: %2i",
+ 	       indent, "",
+@@ -829,7 +860,8 @@ ipa_dump_fn_summary (FILE *f, struct cgr
+ {
+   if (node->definition)
+     {
+-      struct ipa_fn_summary *s = ipa_fn_summaries->get (node);
++      class ipa_fn_summary *s = ipa_fn_summaries->get (node);
++      class ipa_size_summary *ss = ipa_size_summaries->get (node);
+       if (s != NULL)
+ 	{
+ 	  size_time_entry *e;
+@@ -842,11 +874,11 @@ ipa_dump_fn_summary (FILE *f, struct cgr
+ 	  if (s->fp_expressions)
+ 	    fprintf (f, " fp_expression");
+ 	  fprintf (f, "\n  global time:     %f\n", s->time.to_double ());
+-	  fprintf (f, "  self size:       %i\n", s->self_size);
+-	  fprintf (f, "  global size:     %i\n", s->size);
++	  fprintf (f, "  self size:       %i\n", ss->self_size);
++	  fprintf (f, "  global size:     %i\n", ss->size);
+ 	  fprintf (f, "  min size:       %i\n", s->min_size);
+ 	  fprintf (f, "  self stack:      %i\n",
+-		   (int) s->estimated_self_stack_size);
++		   (int) ss->estimated_self_stack_size);
+ 	  fprintf (f, "  global stack:    %i\n", (int) s->estimated_stack_size);
+ 	  if (s->growth)
+ 	    fprintf (f, "  estimated growth:%i\n", (int) s->growth);
+@@ -900,7 +932,7 @@ ipa_dump_fn_summaries (FILE *f)
+   struct cgraph_node *node;
+ 
+   FOR_EACH_DEFINED_FUNCTION (node)
+-    if (!node->global.inlined_to)
++    if (!node->inlined_to)
+       ipa_dump_fn_summary (f, node);
+ }
+ 
+@@ -922,7 +954,7 @@ mark_modified (ao_ref *ao ATTRIBUTE_UNUS
+ 
+ static tree
+ unmodified_parm_1 (ipa_func_body_info *fbi, gimple *stmt, tree op,
+-		   HOST_WIDE_INT *size_p)
++		   poly_int64 *size_p)
+ {
+   /* SSA_NAME referring to parm default def?  */
+   if (TREE_CODE (op) == SSA_NAME
+@@ -930,7 +962,7 @@ unmodified_parm_1 (ipa_func_body_info *f
+       && TREE_CODE (SSA_NAME_VAR (op)) == PARM_DECL)
+     {
+       if (size_p)
+-	*size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
++	*size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
+       return SSA_NAME_VAR (op);
+     }
+   /* Non-SSA parm reference?  */
+@@ -951,7 +983,7 @@ unmodified_parm_1 (ipa_func_body_info *f
+       if (!modified)
+ 	{
+ 	  if (size_p)
+-	    *size_p = tree_to_shwi (TYPE_SIZE (TREE_TYPE (op)));
++	    *size_p = tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (op)));
+ 	  return op;
+ 	}
+     }
+@@ -965,7 +997,7 @@ unmodified_parm_1 (ipa_func_body_info *f
+ 
+ static tree
+ unmodified_parm (ipa_func_body_info *fbi, gimple *stmt, tree op,
+-		 HOST_WIDE_INT *size_p)
++		 poly_int64 *size_p)
+ {
+   tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
+   if (res)
+@@ -990,7 +1022,7 @@ unmodified_parm (ipa_func_body_info *fbi
+ static bool
+ unmodified_parm_or_parm_agg_item (struct ipa_func_body_info *fbi,
+ 				  gimple *stmt, tree op, int *index_p,
+-				  HOST_WIDE_INT *size_p,
++				  poly_int64 *size_p,
+ 				  struct agg_position_info *aggpos)
+ {
+   tree res = unmodified_parm_1 (fbi, stmt, op, size_p);
+@@ -1157,25 +1189,147 @@ eliminated_by_inlining_prob (ipa_func_bo
+     }
+ }
+ 
++/* Analyze EXPR if it represents a series of simple operations performed on
++   a function parameter and return true if so.  FBI, STMT, EXPR, INDEX_P and
++   AGGPOS have the same meaning like in unmodified_parm_or_parm_agg_item.
++   Type of the parameter or load from an aggregate via the parameter is
++   stored in *TYPE_P.  Operations on the parameter are recorded to
++   PARAM_OPS_P if it is not NULL.  */
++
++static bool
++decompose_param_expr (struct ipa_func_body_info *fbi,
++		      gimple *stmt, tree expr,
++		      int *index_p, tree *type_p,
++		      struct agg_position_info *aggpos,
++		      expr_eval_ops *param_ops_p = NULL)
++{
++  int op_limit = PARAM_VALUE (PARAM_IPA_MAX_PARAM_EXPR_OPS);
++  int op_count = 0;
++
++  if (param_ops_p)
++    *param_ops_p = NULL;
++
++  while (true)
++    {
++      expr_eval_op eval_op;
++      unsigned rhs_count;
++      unsigned cst_count = 0;
++
++      if (unmodified_parm_or_parm_agg_item (fbi, stmt, expr, index_p, NULL,
++					    aggpos))
++	{
++	  tree type = TREE_TYPE (expr);
++
++	  if (aggpos->agg_contents)
++	    {
++	      /* Stop if containing bit-field.  */
++	      if (TREE_CODE (expr) == BIT_FIELD_REF
++		  || contains_bitfld_component_ref_p (expr))
++		break;
++	    }
++
++	  *type_p = type;
++	  return true;
++	}
++
++      if (TREE_CODE (expr) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (expr))
++	break;
++
++      if (!is_gimple_assign (stmt = SSA_NAME_DEF_STMT (expr)))
++	break;
++
++      switch (gimple_assign_rhs_class (stmt))
++	{
++	case GIMPLE_SINGLE_RHS:
++	  expr = gimple_assign_rhs1 (stmt);
++	  continue;
++
++	case GIMPLE_UNARY_RHS:
++	  rhs_count = 1;
++	  break;
++
++	case GIMPLE_BINARY_RHS:
++	  rhs_count = 2;
++	  break;
++
++	case GIMPLE_TERNARY_RHS:
++	  rhs_count = 3;
++	  break;
++
++	default:
++	  goto fail;
++	}
++
++      /* Stop if expression is too complex.  */
++      if (op_count++ == op_limit)
++	break;
++
++      if (param_ops_p)
++	{
++	  eval_op.code = gimple_assign_rhs_code (stmt);
++	  eval_op.type = TREE_TYPE (gimple_assign_lhs (stmt));
++	  eval_op.val[0] = NULL_TREE;
++	  eval_op.val[1] = NULL_TREE;
++	}
++
++      expr = NULL_TREE;
++      for (unsigned i = 0; i < rhs_count; i++)
++	{
++	  tree op = gimple_op (stmt, i + 1);
++
++	  gcc_assert (op && !TYPE_P (op));
++	  if (is_gimple_ip_invariant (op))
++	    {
++	      if (++cst_count == rhs_count)
++		goto fail;
++
++	      eval_op.val[cst_count - 1] = op;
++	    }
++	  else if (!expr)
++	    {
++	      /* Found a non-constant operand, and record its index in rhs
++		 operands.  */
++	      eval_op.index = i;
++	      expr = op;
++	    }
++	  else
++	    {
++	      /* Found more than one non-constant operands.  */
++	      goto fail;
++	    }
++	}
++
++      if (param_ops_p)
++	vec_safe_insert (*param_ops_p, 0, eval_op);
++    }
++
++  /* Failed to decompose, free resource and return.  */
++fail:
++  if (param_ops_p)
++    vec_free (*param_ops_p);
++
++  return false;
++}
+ 
+ /* If BB ends by a conditional we can turn into predicates, attach corresponding
+    predicates to the CFG edges.   */
+ 
+ static void
+ set_cond_stmt_execution_predicate (struct ipa_func_body_info *fbi,
+-				   struct ipa_fn_summary *summary,
++				   class ipa_fn_summary *summary,
++				   class ipa_node_params *params_summary,
+ 				   basic_block bb)
+ {
+   gimple *last;
+-  tree op;
++  tree op, op2;
+   int index;
+-  HOST_WIDE_INT size;
+   struct agg_position_info aggpos;
+   enum tree_code code, inverted_code;
+   edge e;
+   edge_iterator ei;
+   gimple *set_stmt;
+-  tree op2;
++  tree param_type;
++  expr_eval_ops param_ops;
+ 
+   last = last_stmt (bb);
+   if (!last || gimple_code (last) != GIMPLE_COND)
+@@ -1183,10 +1337,9 @@ set_cond_stmt_execution_predicate (struc
+   if (!is_gimple_ip_invariant (gimple_cond_rhs (last)))
+     return;
+   op = gimple_cond_lhs (last);
+-  /* TODO: handle conditionals like
+-     var = op0 < 4;
+-     if (var != 0).  */
+-  if (unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
++
++  if (decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
++			    &param_ops))
+     {
+       code = gimple_cond_code (last);
+       inverted_code = invert_tree_comparison (code, HONOR_NANS (op));
+@@ -1197,17 +1350,24 @@ set_cond_stmt_execution_predicate (struc
+ 				      ? code : inverted_code);
+ 	  /* invert_tree_comparison will return ERROR_MARK on FP
+ 	     comparsions that are not EQ/NE instead of returning proper
+-	     unordered one.  Be sure it is not confused with NON_CONSTANT.  */
+-	  if (this_code != ERROR_MARK)
++	     unordered one.  Be sure it is not confused with NON_CONSTANT.
++
++	     And if the edge's target is the final block of diamond CFG graph
++	     of this conditional statement, we do not need to compute
++	     predicate for the edge because the final block's predicate must
++	     be at least as that of the first block of the statement.  */
++	  if (this_code != ERROR_MARK
++	      && !dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
+ 	    {
+ 	      predicate p
+-		= add_condition (summary, index, size, &aggpos, this_code,
+-				 unshare_expr_without_location
+-				 (gimple_cond_rhs (last)));
++		= add_condition (summary, params_summary, index,
++			       	 param_type, &aggpos,
++				 this_code, gimple_cond_rhs (last), param_ops);
+ 	      e->aux = edge_predicate_pool.allocate ();
+ 	      *(predicate *) e->aux = p;
+ 	    }
+ 	}
++      vec_free (param_ops);
+     }
+ 
+   if (TREE_CODE (op) != SSA_NAME)
+@@ -1230,12 +1390,12 @@ set_cond_stmt_execution_predicate (struc
+       || gimple_call_num_args (set_stmt) != 1)
+     return;
+   op2 = gimple_call_arg (set_stmt, 0);
+-  if (!unmodified_parm_or_parm_agg_item (fbi, set_stmt, op2, &index, &size,
+-					 &aggpos))
++  if (!decompose_param_expr (fbi, set_stmt, op2, &index, &param_type, &aggpos))
+     return;
+   FOR_EACH_EDGE (e, ei, bb->succs) if (e->flags & EDGE_FALSE_VALUE)
+     {
+-      predicate p = add_condition (summary, index, size, &aggpos,
++      predicate p = add_condition (summary, params_summary, index,
++		     		   param_type, &aggpos,
+ 				   predicate::is_not_constant, NULL_TREE);
+       e->aux = edge_predicate_pool.allocate ();
+       *(predicate *) e->aux = p;
+@@ -1248,63 +1408,200 @@ set_cond_stmt_execution_predicate (struc
+ 
+ static void
+ set_switch_stmt_execution_predicate (struct ipa_func_body_info *fbi,
+-				     struct ipa_fn_summary *summary,
++				     class ipa_fn_summary *summary,
++				     class ipa_node_params *params_summary,
+ 				     basic_block bb)
+ {
+   gimple *lastg;
+   tree op;
+   int index;
+-  HOST_WIDE_INT size;
+   struct agg_position_info aggpos;
+   edge e;
+   edge_iterator ei;
+   size_t n;
+   size_t case_idx;
++  tree param_type;
++  expr_eval_ops param_ops;
+ 
+   lastg = last_stmt (bb);
+   if (!lastg || gimple_code (lastg) != GIMPLE_SWITCH)
+     return;
+   gswitch *last = as_a <gswitch *> (lastg);
+   op = gimple_switch_index (last);
+-  if (!unmodified_parm_or_parm_agg_item (fbi, last, op, &index, &size, &aggpos))
++  if (!decompose_param_expr (fbi, last, op, &index, &param_type, &aggpos,
++			     &param_ops))
+     return;
+ 
++  auto_vec<std::pair<tree, tree> > ranges;
++  tree type = TREE_TYPE (op);
++  int bound_limit = PARAM_VALUE (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS);
++  int bound_count = 0;
++  wide_int vr_wmin, vr_wmax;
++  value_range_kind vr_type = get_range_info (op, &vr_wmin, &vr_wmax);
++
+   FOR_EACH_EDGE (e, ei, bb->succs)
+     {
+       e->aux = edge_predicate_pool.allocate ();
+       *(predicate *) e->aux = false;
+     }
++
++  e = gimple_switch_edge (cfun, last, 0);
++  /* Set BOUND_COUNT to maximum count to bypass computing predicate for
++     default case if its target basic block is in convergence point of all
++     switch cases, which can be determined by checking whether it
++     post-dominates the switch statement.  */
++  if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
++    bound_count = INT_MAX;
++
+   n = gimple_switch_num_labels (last);
+-  for (case_idx = 0; case_idx < n; ++case_idx)
++  for (case_idx = 1; case_idx < n; ++case_idx)
+     {
+       tree cl = gimple_switch_label (last, case_idx);
+-      tree min, max;
++      tree min = CASE_LOW (cl);
++      tree max = CASE_HIGH (cl);
+       predicate p;
+ 
+       e = gimple_switch_edge (cfun, last, case_idx);
+-      min = CASE_LOW (cl);
+-      max = CASE_HIGH (cl);
+ 
+-      /* For default we might want to construct predicate that none
+-         of cases is met, but it is bit hard to do not having negations
+-         of conditionals handy.  */
+-      if (!min && !max)
++      /* The case value might not have same type as switch expression,
++	 extend the value based on the expression type.  */
++      if (TREE_TYPE (min) != type)
++	min = wide_int_to_tree (type, wi::to_wide (min));
++
++      if (!max)
++	max = min;
++      else if (TREE_TYPE (max) != type)
++	max = wide_int_to_tree (type, wi::to_wide (max));
++
++      /* The case's target basic block is in convergence point of all switch
++	 cases, its predicate should be at least as that of the switch
++	 statement.  */
++      if (dominated_by_p (CDI_POST_DOMINATORS, bb, e->dest))
+ 	p = true;
+-      else if (!max)
+-	p = add_condition (summary, index, size, &aggpos, EQ_EXPR,
+-			   unshare_expr_without_location (min));
++      else if (min == max)
++	p = add_condition (summary, params_summary, index, param_type,
++		           &aggpos, EQ_EXPR, min, param_ops);
+       else
+ 	{
+ 	  predicate p1, p2;
+-	  p1 = add_condition (summary, index, size, &aggpos, GE_EXPR,
+-			      unshare_expr_without_location (min));
+-	  p2 = add_condition (summary, index, size, &aggpos, LE_EXPR,
+-			      unshare_expr_without_location (max));
++	  p1 = add_condition (summary, params_summary, index, param_type,
++			      &aggpos, GE_EXPR, min, param_ops);
++	  p2 = add_condition (summary,  params_summary,index, param_type,
++			      &aggpos, LE_EXPR, max, param_ops);
+ 	  p = p1 & p2;
+ 	}
+-      *(struct predicate *) e->aux
+-	= p.or_with (summary->conds, *(struct predicate *) e->aux);
++      *(class predicate *) e->aux
++	= p.or_with (summary->conds, *(class predicate *) e->aux);
++
++      /* If there are too many disjoint case ranges, predicate for default
++	 case might become too complicated.  So add a limit here.  */
++      if (bound_count > bound_limit)
++	continue;
++
++      bool new_range = true;
++
++      if (!ranges.is_empty ())
++	{
++	  wide_int curr_wmin = wi::to_wide (min);
++	  wide_int last_wmax = wi::to_wide (ranges.last ().second);
++
++	  /* Merge case ranges if they are continuous.  */
++	  if (curr_wmin == last_wmax + 1)
++	    new_range = false;
++	  else if (vr_type == VR_ANTI_RANGE)
++	    {
++	      /* If two disjoint case ranges can be connected by anti-range
++		 of switch index, combine them to one range.  */
++	      if (wi::lt_p (vr_wmax, curr_wmin - 1, TYPE_SIGN (type)))
++		vr_type = VR_UNDEFINED;
++	      else if (wi::le_p (vr_wmin, last_wmax + 1, TYPE_SIGN (type)))
++		new_range = false;
++	    }
++	}
++
++      /* Create/extend a case range.  And we count endpoints of range set,
++	 this number nearly equals to number of conditions that we will create
++	 for predicate of default case.  */
++      if (new_range)
++	{
++	  bound_count += (min == max) ? 1 : 2;
++	  ranges.safe_push (std::make_pair (min, max));
++	}
++      else
++	{
++	  bound_count += (ranges.last ().first == ranges.last ().second);
++	  ranges.last ().second = max;
++	}
++    }
++
++  e = gimple_switch_edge (cfun, last, 0);
++  if (bound_count > bound_limit)
++    {
++      *(class predicate *) e->aux = true;
++      vec_free (param_ops);
++      return;
++    }
++
++  predicate p_seg = true;
++  predicate p_all = false;
++
++  if (vr_type != VR_RANGE)
++    {
++      vr_wmin = wi::to_wide (TYPE_MIN_VALUE (type));
++      vr_wmax = wi::to_wide (TYPE_MAX_VALUE (type));
+     }
++
++  /* Construct predicate to represent default range set that is negation of
++     all case ranges.  Case range is classified as containing single/non-single
++     values.  Suppose a piece of case ranges in the following.
++
++                [D1...D2]  [S1] ... [Sn]  [D3...D4]
++
++     To represent default case's range sets between two non-single value
++     case ranges (From D2 to D3), we construct predicate as:
++
++              D2 < x < D3 && x != S1 && ... && x != Sn
++   */
++  for (size_t i = 0; i < ranges.length (); i++)
++    {
++      tree min = ranges[i].first;
++      tree max = ranges[i].second;
++
++      if (min == max)
++	p_seg &= add_condition (summary, params_summary, index,
++		       		param_type, &aggpos, NE_EXPR,
++				min, param_ops);
++      else
++	{
++	  /* Do not create sub-predicate for range that is beyond low bound
++	     of switch index.  */
++	  if (wi::lt_p (vr_wmin, wi::to_wide (min), TYPE_SIGN (type)))
++	    {
++	      p_seg &= add_condition (summary, params_summary, index,
++			     	      param_type, &aggpos,
++				      LT_EXPR, min, param_ops);
++	      p_all = p_all.or_with (summary->conds, p_seg);
++	    }
++
++	  /* Do not create sub-predicate for range that is beyond up bound
++	     of switch index.  */
++	  if (wi::le_p (vr_wmax, wi::to_wide (max), TYPE_SIGN (type)))
++	    {
++	      p_seg = false;
++	      break;
++	    }
++
++	  p_seg = add_condition (summary, params_summary, index,
++			 	 param_type, &aggpos, GT_EXPR,
++				 max, param_ops);
++	}
++    }
++
++  p_all = p_all.or_with (summary->conds, p_seg);
++  *(class predicate *) e->aux
++    = p_all.or_with (summary->conds, *(class predicate *) e->aux);
++
++  vec_free (param_ops);
+ }
+ 
+ 
+@@ -1314,7 +1611,8 @@ set_switch_stmt_execution_predicate (str
+ static void
+ compute_bb_predicates (struct ipa_func_body_info *fbi,
+ 		       struct cgraph_node *node,
+-		       struct ipa_fn_summary *summary)
++		       class ipa_fn_summary *summary,
++		       class ipa_node_params *params_summary)
+ {
+   struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
+   bool done = false;
+@@ -1322,8 +1620,8 @@ compute_bb_predicates (struct ipa_func_b
+ 
+   FOR_EACH_BB_FN (bb, my_function)
+     {
+-      set_cond_stmt_execution_predicate (fbi, summary, bb);
+-      set_switch_stmt_execution_predicate (fbi, summary, bb);
++      set_cond_stmt_execution_predicate (fbi, summary, params_summary, bb);
++      set_switch_stmt_execution_predicate (fbi, summary, params_summary, bb);
+     }
+ 
+   /* Entry block is always executable.  */
+@@ -1348,16 +1646,16 @@ compute_bb_predicates (struct ipa_func_b
+ 		  predicate this_bb_predicate
+ 		    = *(predicate *) e->src->aux;
+ 		  if (e->aux)
+-		    this_bb_predicate &= (*(struct predicate *) e->aux);
++		    this_bb_predicate &= (*(class predicate *) e->aux);
+ 		  p = p.or_with (summary->conds, this_bb_predicate);
+ 		  if (p == true)
+ 		    break;
+ 		}
+ 	    }
+-	  if (p == false)
+-	    gcc_checking_assert (!bb->aux);
+-	  else
++	  if (p != false)
+ 	    {
++	      basic_block pdom_bb;
++
+ 	      if (!bb->aux)
+ 		{
+ 		  done = false;
+@@ -1376,6 +1674,34 @@ compute_bb_predicates (struct ipa_func_b
+ 		      *((predicate *) bb->aux) = p;
+ 		    }
+ 		}
++
++	      /* For switch/if statement, we can OR-combine predicates of all
++		 its cases/branches to get predicate for basic block in their
++		 convergence point, but sometimes this will generate very
++		 complicated predicate.  Actually, we can get simplified
++		 predicate in another way by using the fact that predicate
++		 for a basic block must also hold true for its post dominators.
++		 To be specific, basic block in convergence point of
++		 conditional statement should include predicate of the
++		 statement.  */
++	      pdom_bb = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
++	      if (pdom_bb == EXIT_BLOCK_PTR_FOR_FN (my_function) || !pdom_bb)
++		;
++	      else if (!pdom_bb->aux)
++		{
++		  done = false;
++		  pdom_bb->aux = edge_predicate_pool.allocate ();
++		  *((predicate *) pdom_bb->aux) = p;
++		}
++	      else if (p != *(predicate *) pdom_bb->aux)
++		{
++		  p = p.or_with (summary->conds, *(predicate *)pdom_bb->aux);
++		  if (p != *(predicate *) pdom_bb->aux)
++		    {
++		      done = false;
++		      *((predicate *) pdom_bb->aux) = p;
++		    }
++		}
+ 	    }
+ 	}
+     }
+@@ -1387,21 +1713,21 @@ compute_bb_predicates (struct ipa_func_b
+ 
+ static predicate
+ will_be_nonconstant_expr_predicate (ipa_func_body_info *fbi,
+-				    struct ipa_fn_summary *summary,
++				    class ipa_fn_summary *summary,
++				    class ipa_node_params *params_summary,
+ 				    tree expr,
+ 				    vec<predicate> nonconstant_names)
+ {
+   tree parm;
+   int index;
+-  HOST_WIDE_INT size;
+ 
+   while (UNARY_CLASS_P (expr))
+     expr = TREE_OPERAND (expr, 0);
+ 
+-  parm = unmodified_parm (fbi, NULL, expr, &size);
++  parm = unmodified_parm (fbi, NULL, expr, NULL);
+   if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
+-    return add_condition (summary, index, size, NULL, predicate::changed,
+-			  NULL_TREE);
++    return add_condition (summary, params_summary, index, TREE_TYPE (parm), NULL,
++			  predicate::changed, NULL_TREE);
+   if (is_gimple_min_invariant (expr))
+     return false;
+   if (TREE_CODE (expr) == SSA_NAME)
+@@ -1410,6 +1736,7 @@ will_be_nonconstant_expr_predicate (ipa_
+     {
+       predicate p1
+ 	= will_be_nonconstant_expr_predicate (fbi, summary,
++					      params_summary,
+ 					      TREE_OPERAND (expr, 0),
+ 					      nonconstant_names);
+       if (p1 == true)
+@@ -1417,6 +1744,7 @@ will_be_nonconstant_expr_predicate (ipa_
+ 
+       predicate p2
+ 	= will_be_nonconstant_expr_predicate (fbi, summary,
++					      params_summary,
+ 					      TREE_OPERAND (expr, 1),
+ 					      nonconstant_names);
+       return p1.or_with (summary->conds, p2);
+@@ -1425,6 +1753,7 @@ will_be_nonconstant_expr_predicate (ipa_
+     {
+       predicate p1
+ 	= will_be_nonconstant_expr_predicate (fbi, summary,
++					      params_summary,
+ 					      TREE_OPERAND (expr, 0),
+ 					      nonconstant_names);
+       if (p1 == true)
+@@ -1432,12 +1761,14 @@ will_be_nonconstant_expr_predicate (ipa_
+ 
+       predicate p2
+ 	= will_be_nonconstant_expr_predicate (fbi, summary,
++					      params_summary,
+ 					      TREE_OPERAND (expr, 1),
+ 					      nonconstant_names);
+       if (p2 == true)
+ 	return p2;
+       p1 = p1.or_with (summary->conds, p2);
+       p2 = will_be_nonconstant_expr_predicate (fbi, summary,
++					       params_summary,
+ 					       TREE_OPERAND (expr, 2),
+ 					       nonconstant_names);
+       return p2.or_with (summary->conds, p1);
+@@ -1458,17 +1789,18 @@ will_be_nonconstant_expr_predicate (ipa_
+ 
+ static predicate
+ will_be_nonconstant_predicate (struct ipa_func_body_info *fbi,
+-			       struct ipa_fn_summary *summary,
++			       class ipa_fn_summary *summary,
++			       class ipa_node_params *params_summary,
+ 			       gimple *stmt,
+ 			       vec<predicate> nonconstant_names)
+ {
+   predicate p = true;
+   ssa_op_iter iter;
+   tree use;
++  tree param_type = NULL_TREE;
+   predicate op_non_const;
+   bool is_load;
+   int base_index;
+-  HOST_WIDE_INT size;
+   struct agg_position_info aggpos;
+ 
+   /* What statments might be optimized away
+@@ -1489,11 +1821,9 @@ will_be_nonconstant_predicate (struct ip
+   /* Loads can be optimized when the value is known.  */
+   if (is_load)
+     {
+-      tree op;
+-      gcc_assert (gimple_assign_single_p (stmt));
+-      op = gimple_assign_rhs1 (stmt);
+-      if (!unmodified_parm_or_parm_agg_item (fbi, stmt, op, &base_index, &size,
+-					     &aggpos))
++      tree op = gimple_assign_rhs1 (stmt);
++      if (!decompose_param_expr (fbi, stmt, op, &base_index, &param_type,
++				 &aggpos))
+ 	return p;
+     }
+   else
+@@ -1518,21 +1848,22 @@ will_be_nonconstant_predicate (struct ip
+ 
+   if (is_load)
+     op_non_const =
+-      add_condition (summary, base_index, size, &aggpos, predicate::changed,
+-		     NULL);
++      add_condition (summary, params_summary,
++		     base_index, param_type, &aggpos,
++		     predicate::changed, NULL_TREE);
+   else
+     op_non_const = false;
+   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
+     {
+-      HOST_WIDE_INT size;
+-      tree parm = unmodified_parm (fbi, stmt, use, &size);
++      tree parm = unmodified_parm (fbi, stmt, use, NULL);
+       int index;
+ 
+       if (parm && (index = ipa_get_param_decl_index (fbi->info, parm)) >= 0)
+ 	{
+ 	  if (index != base_index)
+-	    p = add_condition (summary, index, size, NULL, predicate::changed,
+-			       NULL_TREE);
++	    p = add_condition (summary, params_summary, index,
++			       TREE_TYPE (parm), NULL,
++			       predicate::changed, NULL_TREE);
+ 	  else
+ 	    continue;
+ 	}
+@@ -1566,7 +1897,7 @@ struct record_modified_bb_info
+ static basic_block
+ get_minimal_bb (basic_block init_bb, basic_block use_bb)
+ {
+-  struct loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
++  class loop *l = find_common_loop (init_bb->loop_father, use_bb->loop_father);
+   if (l && l->header->count < init_bb->count)
+     return l->header;
+   return init_bb;
+@@ -1664,7 +1995,7 @@ param_change_prob (ipa_func_body_info *f
+ 	return REG_BR_PROB_BASE;
+       if (dump_file)
+ 	{
+-	  fprintf (dump_file, "     Analyzing param change probablity of ");
++	  fprintf (dump_file, "     Analyzing param change probability of ");
+           print_generic_expr (dump_file, op, TDF_SLIM);
+ 	  fprintf (dump_file, "\n");
+ 	}
+@@ -1718,7 +2049,9 @@ param_change_prob (ipa_func_body_info *f
+ 
+ static bool
+ phi_result_unknown_predicate (ipa_func_body_info *fbi,
+-			      ipa_fn_summary *summary, basic_block bb,
++			      ipa_fn_summary *summary,
++			      class ipa_node_params *params_summary,
++			      basic_block bb,
+ 			      predicate *p,
+ 			      vec<predicate> nonconstant_names)
+ {
+@@ -1762,7 +2095,7 @@ phi_result_unknown_predicate (ipa_func_b
+       || !is_gimple_ip_invariant (gimple_cond_rhs (stmt)))
+     return false;
+ 
+-  *p = will_be_nonconstant_expr_predicate (fbi, summary,
++  *p = will_be_nonconstant_expr_predicate (fbi, summary, params_summary,
+ 					   gimple_cond_lhs (stmt),
+ 					   nonconstant_names);
+   if (*p == true)
+@@ -1777,7 +2110,7 @@ phi_result_unknown_predicate (ipa_func_b
+    NONCONSTANT_NAMES, if possible.  */
+ 
+ static void
+-predicate_for_phi_result (struct ipa_fn_summary *summary, gphi *phi,
++predicate_for_phi_result (class ipa_fn_summary *summary, gphi *phi,
+ 			  predicate *p,
+ 			  vec<predicate> nonconstant_names)
+ {
+@@ -1954,7 +2287,8 @@ analyze_function_body (struct cgraph_nod
+   basic_block bb;
+   struct function *my_function = DECL_STRUCT_FUNCTION (node->decl);
+   sreal freq;
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
++  class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
++  class ipa_node_params *params_summary = early ? NULL : IPA_NODE_REF (node);
+   predicate bb_predicate;
+   struct ipa_func_body_info fbi;
+   vec<predicate> nonconstant_names = vNULL;
+@@ -1980,6 +2314,7 @@ analyze_function_body (struct cgraph_nod
+   if (opt_for_fn (node->decl, optimize))
+     {
+       calculate_dominance_info (CDI_DOMINATORS);
++      calculate_dominance_info (CDI_POST_DOMINATORS);
+       if (!early)
+         loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
+       else
+@@ -2019,7 +2354,7 @@ analyze_function_body (struct cgraph_nod
+ 		           bb_predicate);
+ 
+   if (fbi.info)
+-    compute_bb_predicates (&fbi, node, info);
++    compute_bb_predicates (&fbi, node, info, params_summary);
+   order = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
+   nblocks = pre_and_rev_post_order_compute (NULL, order, false);
+   for (n = 0; n < nblocks; n++)
+@@ -2061,7 +2396,9 @@ analyze_function_body (struct cgraph_nod
+ 	       gsi_next (&bsi))
+ 	    {
+ 	      if (first_phi
+-		  && !phi_result_unknown_predicate (&fbi, info, bb,
++		  && !phi_result_unknown_predicate (&fbi, info,
++			  			    params_summary,
++			 			    bb,
+ 						    &phi_predicate,
+ 						    nonconstant_names))
+ 		break;
+@@ -2159,7 +2496,7 @@ analyze_function_body (struct cgraph_nod
+ 	     just maximum of the possible paths.  */
+ 	  if (fbi.info)
+ 	    will_be_nonconstant
+-	      = will_be_nonconstant_predicate (&fbi, info,
++	      = will_be_nonconstant_predicate (&fbi, info, params_summary,
+ 					       stmt, nonconstant_names);
+ 	  else
+ 	    will_be_nonconstant = true;
+@@ -2174,7 +2511,7 @@ analyze_function_body (struct cgraph_nod
+ 	      if (prob == 2 && dump_file && (dump_flags & TDF_DETAILS))
+ 		fprintf (dump_file, "\t\tWill be eliminated by inlining\n");
+ 
+-	      struct predicate p = bb_predicate & will_be_nonconstant;
++	      class predicate p = bb_predicate & will_be_nonconstant;
+ 
+ 	      /* We can ignore statement when we proved it is never going
+ 		 to happen, but we cannot do that for call statements
+@@ -2226,7 +2563,8 @@ analyze_function_body (struct cgraph_nod
+ 		    predicate p = bb_predicate;
+ 		    if (fbi.info)
+ 		      p = p & will_be_nonconstant_expr_predicate
+-				 (&fbi, info, TREE_OPERAND (op, 1),
++				 (&fbi, info, params_summary,
++				  TREE_OPERAND (op, 1),
+ 			          nonconstant_names);
+ 		    if (p != false)
+ 		      {
+@@ -2249,7 +2587,7 @@ analyze_function_body (struct cgraph_nod
+ 
+   if (nonconstant_names.exists () && !early)
+     {
+-      struct loop *loop;
++      class loop *loop;
+       predicate loop_iterations = true;
+       predicate loop_stride = true;
+ 
+@@ -2261,7 +2599,7 @@ analyze_function_body (struct cgraph_nod
+ 	  vec<edge> exits;
+ 	  edge ex;
+ 	  unsigned int j;
+-	  struct tree_niter_desc niter_desc;
++	  class tree_niter_desc niter_desc;
+ 	  bb_predicate = *(predicate *) loop->header->aux;
+ 
+ 	  exits = get_loop_exit_edges (loop);
+@@ -2271,6 +2609,7 @@ analyze_function_body (struct cgraph_nod
+ 	    {
+ 	      predicate will_be_nonconstant
+ 		= will_be_nonconstant_expr_predicate (&fbi, info,
++						      params_summary,
+ 						      niter_desc.niter,
+ 						      nonconstant_names);
+ 	      if (will_be_nonconstant != true)
+@@ -2315,7 +2654,9 @@ analyze_function_body (struct cgraph_nod
+ 		    continue;
+ 
+ 		  predicate will_be_nonconstant
+-		    = will_be_nonconstant_expr_predicate (&fbi, info, iv.step,
++		    = will_be_nonconstant_expr_predicate (&fbi, info,
++				    			  params_summary,
++				   			  iv.step,
+ 							  nonconstant_names);
+ 		  if (will_be_nonconstant != true)
+ 		    will_be_nonconstant = bb_predicate & will_be_nonconstant;
+@@ -2349,8 +2690,9 @@ analyze_function_body (struct cgraph_nod
+ 	}
+     }
+   ipa_fn_summary *s = ipa_fn_summaries->get (node);
++  ipa_size_summary *ss = ipa_size_summaries->get (node);
+   s->time = time;
+-  s->self_size = size;
++  ss->self_size = size;
+   nonconstant_names.release ();
+   ipa_release_body_info (&fbi);
+   if (opt_for_fn (node->decl, optimize))
+@@ -2360,6 +2702,7 @@ analyze_function_body (struct cgraph_nod
+       else if (!ipa_edge_args_sum)
+ 	ipa_free_all_node_params ();
+       free_dominance_info (CDI_DOMINATORS);
++      free_dominance_info (CDI_POST_DOMINATORS);
+     }
+   if (dump_file)
+     {
+@@ -2377,9 +2720,8 @@ compute_fn_summary (struct cgraph_node *
+ {
+   HOST_WIDE_INT self_stack_size;
+   struct cgraph_edge *e;
+-  struct ipa_fn_summary *info;
+ 
+-  gcc_assert (!node->global.inlined_to);
++  gcc_assert (!node->inlined_to);
+ 
+   if (!ipa_fn_summaries)
+     ipa_fn_summary_alloc ();
+@@ -2387,14 +2729,14 @@ compute_fn_summary (struct cgraph_node *
+   /* Create a new ipa_fn_summary.  */
+   ((ipa_fn_summary_t *)ipa_fn_summaries)->remove_callees (node);
+   ipa_fn_summaries->remove (node);
+-  info = ipa_fn_summaries->get_create (node);
++  class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
++  class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
+ 
+   /* Estimate the stack size for the function if we're optimizing.  */
+   self_stack_size = optimize && !node->thunk.thunk_p
+ 		    ? estimated_stack_frame_size (node) : 0;
+-  info->estimated_self_stack_size = self_stack_size;
++  size_info->estimated_self_stack_size = self_stack_size;
+   info->estimated_stack_size = self_stack_size;
+-  info->stack_frame_offset = 0;
+ 
+   if (node->thunk.thunk_p)
+     {
+@@ -2412,7 +2754,7 @@ compute_fn_summary (struct cgraph_node *
+       t = predicate::not_inlined ();
+       info->account_size_time (2 * ipa_fn_summary::size_scale, 0, t, t);
+       ipa_update_overall_fn_summary (node);
+-      info->self_size = info->size;
++      size_info->self_size = size_info->size;
+       if (stdarg_p (TREE_TYPE (node->decl)))
+ 	{
+ 	  info->inlinable = false;
+@@ -2468,16 +2810,15 @@ compute_fn_summary (struct cgraph_node *
+   node->calls_comdat_local = (e != NULL);
+ 
+   /* Inlining characteristics are maintained by the cgraph_mark_inline.  */
+-  info->size = info->self_size;
+-  info->stack_frame_offset = 0;
+-  info->estimated_stack_size = info->estimated_self_stack_size;
++  size_info->size = size_info->self_size;
++  info->estimated_stack_size = size_info->estimated_self_stack_size;
+ 
+   /* Code above should compute exactly the same result as
+      ipa_update_overall_fn_summary but because computation happens in
+      different order the roundoff errors result in slight changes.  */
+   ipa_update_overall_fn_summary (node);
+   /* In LTO mode we may have speculative edges set.  */
+-  gcc_assert (in_lto_p || info->size == info->self_size);
++  gcc_assert (in_lto_p || size_info->size == size_info->self_size);
+ }
+ 
+ 
+@@ -2499,11 +2840,11 @@ estimate_edge_devirt_benefit (struct cgr
+ 			      int *size, int *time,
+ 			      vec<tree> known_vals,
+ 			      vec<ipa_polymorphic_call_context> known_contexts,
+-			      vec<ipa_agg_jump_function_p> known_aggs)
++			      vec<ipa_agg_value_set> known_aggs)
+ {
+   tree target;
+   struct cgraph_node *callee;
+-  struct ipa_fn_summary *isummary;
++  class ipa_fn_summary *isummary;
+   enum availability avail;
+   bool speculative;
+ 
+@@ -2548,10 +2889,10 @@ estimate_edge_size_and_time (struct cgra
+ 			     int prob,
+ 			     vec<tree> known_vals,
+ 			     vec<ipa_polymorphic_call_context> known_contexts,
+-			     vec<ipa_agg_jump_function_p> known_aggs,
++			     vec<ipa_agg_value_set> known_aggs,
+ 			     ipa_hints *hints)
+ {
+-  struct ipa_call_summary *es = ipa_call_summaries->get (e);
++  class ipa_call_summary *es = ipa_call_summaries->get (e);
+   int call_size = es->call_stmt_size;
+   int call_time = es->call_stmt_time;
+   int cur_size;
+@@ -2583,12 +2924,12 @@ estimate_calls_size_and_time (struct cgr
+ 			      clause_t possible_truths,
+ 			      vec<tree> known_vals,
+ 			      vec<ipa_polymorphic_call_context> known_contexts,
+-			      vec<ipa_agg_jump_function_p> known_aggs)
++			      vec<ipa_agg_value_set> known_aggs)
+ {
+   struct cgraph_edge *e;
+   for (e = node->callees; e; e = e->next_callee)
+     {
+-      struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
++      class ipa_call_summary *es = ipa_call_summaries->get_create (e);
+ 
+       /* Do not care about zero sized builtins.  */
+       if (e->inline_failed && !es->call_stmt_size)
+@@ -2619,7 +2960,7 @@ estimate_calls_size_and_time (struct cgr
+     }
+   for (e = node->indirect_calls; e; e = e->next_callee)
+     {
+-      struct ipa_call_summary *es = ipa_call_summaries->get_create (e);
++      class ipa_call_summary *es = ipa_call_summaries->get_create (e);
+       if (!es->predicate
+ 	  || es->predicate->evaluate (possible_truths))
+ 	estimate_edge_size_and_time (e, size,
+@@ -2630,31 +2971,250 @@ estimate_calls_size_and_time (struct cgr
+     }
+ }
+ 
++/* Default constructor for ipa call context.
++   Memory alloction of known_vals, known_contexts
++   and known_aggs vectors is owned by the caller, but can
++   be release by ipa_call_context::release.  
++   
++   inline_param_summary is owned by the caller.  */
++ipa_call_context::ipa_call_context (cgraph_node *node,
++				    clause_t possible_truths,
++				    clause_t nonspec_possible_truths,
++				    vec<tree> known_vals,
++				    vec<ipa_polymorphic_call_context>
++				   	 known_contexts,
++				    vec<ipa_agg_value_set> known_aggs,
++				    vec<inline_param_summary>
++				   	 inline_param_summary)
++: m_node (node), m_possible_truths (possible_truths),
++  m_nonspec_possible_truths (nonspec_possible_truths),
++  m_inline_param_summary (inline_param_summary),
++  m_known_vals (known_vals),
++  m_known_contexts (known_contexts),
++  m_known_aggs (known_aggs)
++{
++}
++
++/* Set THIS to be a duplicate of CTX.  Copy all relevant info.  */
++
++void
++ipa_call_context::duplicate_from (const ipa_call_context &ctx)
++{
++  m_node = ctx.m_node;
++  m_possible_truths = ctx.m_possible_truths;
++  m_nonspec_possible_truths = ctx.m_nonspec_possible_truths;
++  class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
++  unsigned int nargs = params_summary
++		       ? ipa_get_param_count (params_summary) : 0;
++
++  m_inline_param_summary = vNULL;
++  /* Copy the info only if there is at least one useful entry.  */
++  if (ctx.m_inline_param_summary.exists ())
++    {
++      unsigned int n = MIN (ctx.m_inline_param_summary.length (), nargs);
++
++      for (unsigned int i = 0; i < n; i++)
++	if (ipa_is_param_used_by_ipa_predicates (params_summary, i)
++	    && !ctx.m_inline_param_summary[i].useless_p ())
++	  {
++            m_inline_param_summary
++		    = ctx.m_inline_param_summary.copy ();
++	    break;
++	  }
++    }
++  m_known_vals = vNULL;
++  if (ctx.m_known_vals.exists ())
++    {
++      unsigned int n = MIN (ctx.m_known_vals.length (), nargs);
++
++      for (unsigned int i = 0; i < n; i++)
++	if (ipa_is_param_used_by_indirect_call (params_summary, i)
++	    && ctx.m_known_vals[i])
++	  {
++	    m_known_vals = ctx.m_known_vals.copy ();
++	    break;
++	  }
++    }
++
++  m_known_contexts = vNULL;
++  if (ctx.m_known_contexts.exists ())
++    {
++      unsigned int n = MIN (ctx.m_known_contexts.length (), nargs);
++
++      for (unsigned int i = 0; i < n; i++)
++	if (ipa_is_param_used_by_polymorphic_call (params_summary, i)
++	    && !ctx.m_known_contexts[i].useless_p ())
++	  {
++	    m_known_contexts = ctx.m_known_contexts.copy ();
++	    break;
++	  }
++    }
++
++  m_known_aggs = vNULL;
++  if (ctx.m_known_aggs.exists ())
++    {
++      unsigned int n = MIN (ctx.m_known_aggs.length (), nargs);
++
++      for (unsigned int i = 0; i < n; i++)
++	if (ipa_is_param_used_by_indirect_call (params_summary, i)
++	    && !ctx.m_known_aggs[i].is_empty ())
++	  {
++	    m_known_aggs = ipa_copy_agg_values (ctx.m_known_aggs);
++	    break;
++	  }
++    }
++}
++
++/* Release memory used by known_vals/contexts/aggs vectors.
++   If ALL is true release also inline_param_summary.
++   This happens when context was previously duplciated to be stored
++   into cache.  */
++
++void
++ipa_call_context::release (bool all)
++{
++  /* See if context is initialized at first place.  */
++  if (!m_node)
++    return;
++  m_known_vals.release ();
++  m_known_contexts.release ();
++  ipa_release_agg_values (m_known_aggs);
++  if (all)
++    m_inline_param_summary.release ();
++}
++
++/* Return true if CTX describes the same call context as THIS.  */
++
++bool
++ipa_call_context::equal_to (const ipa_call_context &ctx)
++{
++  if (m_node != ctx.m_node
++      || m_possible_truths != ctx.m_possible_truths
++      || m_nonspec_possible_truths != ctx.m_nonspec_possible_truths)
++    return false;
++
++  class ipa_node_params *params_summary = IPA_NODE_REF (m_node);
++  unsigned int nargs = params_summary
++		       ? ipa_get_param_count (params_summary) : 0;
++
++  if (m_inline_param_summary.exists () || ctx.m_inline_param_summary.exists ())
++    {
++      for (unsigned int i = 0; i < nargs; i++)
++	{
++	  if (!ipa_is_param_used_by_ipa_predicates (params_summary, i))
++	    continue;
++	  if (i >= m_inline_param_summary.length ()
++	      || m_inline_param_summary[i].useless_p ())
++	    {
++	      if (i < ctx.m_inline_param_summary.length ()
++		  && !ctx.m_inline_param_summary[i].useless_p ())
++		return false;
++	      continue;
++	    }
++	  if (i >= ctx.m_inline_param_summary.length ()
++	      || ctx.m_inline_param_summary[i].useless_p ())
++	    {
++	      if (i < m_inline_param_summary.length ()
++		  && !m_inline_param_summary[i].useless_p ())
++		return false;
++	      continue;
++	    }
++	  if (!m_inline_param_summary[i].equal_to
++	     	 (ctx.m_inline_param_summary[i]))
++	    return false;
++	}
++    }
++  if (m_known_vals.exists () || ctx.m_known_vals.exists ())
++    {
++      for (unsigned int i = 0; i < nargs; i++)
++	{
++	  if (!ipa_is_param_used_by_indirect_call (params_summary, i))
++	    continue;
++	  if (i >= m_known_vals.length () || !m_known_vals[i])
++	    {
++	      if (i < ctx.m_known_vals.length () && ctx.m_known_vals[i])
++		return false;
++	      continue;
++	    }
++	  if (i >= ctx.m_known_vals.length () || !ctx.m_known_vals[i])
++	    {
++	      if (i < m_known_vals.length () && m_known_vals[i])
++		return false;
++	      continue;
++	    }
++	  if (m_known_vals[i] != ctx.m_known_vals[i])
++	    return false;
++	}
++    }
++  if (m_known_contexts.exists () || ctx.m_known_contexts.exists ())
++    {
++      for (unsigned int i = 0; i < nargs; i++)
++	{
++	  if (!ipa_is_param_used_by_polymorphic_call (params_summary, i))
++	    continue;
++	  if (i >= m_known_contexts.length ()
++	      || m_known_contexts[i].useless_p ())
++	    {
++	      if (i < ctx.m_known_contexts.length ()
++		  && !ctx.m_known_contexts[i].useless_p ())
++		return false;
++	      continue;
++	    }
++	  if (i >= ctx.m_known_contexts.length ()
++	      || ctx.m_known_contexts[i].useless_p ())
++	    {
++	      if (i < m_known_contexts.length ()
++		  && !m_known_contexts[i].useless_p ())
++		return false;
++	      continue;
++	    }
++	  if (!m_known_contexts[i].equal_to
++	     	 (ctx.m_known_contexts[i]))
++	    return false;
++	}
++    }
++  if (m_known_aggs.exists () || ctx.m_known_aggs.exists ())
++    {
++      for (unsigned int i = 0; i < nargs; i++)
++	{
++	  if (!ipa_is_param_used_by_indirect_call (params_summary, i))
++	    continue;
++	  if (i >= m_known_aggs.length () || m_known_aggs[i].is_empty ())
++	    {
++	      if (i < ctx.m_known_aggs.length ()
++		  && !ctx.m_known_aggs[i].is_empty ())
++		return false;
++	      continue;
++	    }
++	  if (i >= ctx.m_known_aggs.length ()
++	      || ctx.m_known_aggs[i].is_empty ())
++	    {
++	      if (i < m_known_aggs.length ()
++		  && !m_known_aggs[i].is_empty ())
++		return false;
++	      continue;
++	    }
++	  if (!m_known_aggs[i].equal_to (ctx.m_known_aggs[i]))
++	    return false;
++	}
++    }
++  return true;
++}
+ 
+-/* Estimate size and time needed to execute NODE assuming
+-   POSSIBLE_TRUTHS clause, and KNOWN_VALS, KNOWN_AGGS and KNOWN_CONTEXTS
+-   information about NODE's arguments.  If non-NULL use also probability
+-   information present in INLINE_PARAM_SUMMARY vector.
++/* Estimate size and time needed to execute call in the given context.
+    Additionally detemine hints determined by the context.  Finally compute
+    minimal size needed for the call that is independent on the call context and
+    can be used for fast estimates.  Return the values in RET_SIZE,
+    RET_MIN_SIZE, RET_TIME and RET_HINTS.  */
+ 
+ void
+-estimate_node_size_and_time (struct cgraph_node *node,
+-			     clause_t possible_truths,
+-			     clause_t nonspec_possible_truths,
+-			     vec<tree> known_vals,
+-			     vec<ipa_polymorphic_call_context> known_contexts,
+-			     vec<ipa_agg_jump_function_p> known_aggs,
+-			     int *ret_size, int *ret_min_size,
+-			     sreal *ret_time,
+-			     sreal *ret_nonspecialized_time,
+-			     ipa_hints *ret_hints,
+-			     vec<inline_param_summary>
+-			     inline_param_summary)
++ipa_call_context::estimate_size_and_time (int *ret_size,
++					  int *ret_min_size,
++					  sreal *ret_time,
++					  sreal *ret_nonspecialized_time,
++					  ipa_hints *ret_hints)
+ {
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
++  class ipa_fn_summary *info = ipa_fn_summaries->get_create (m_node);
+   size_time_entry *e;
+   int size = 0;
+   sreal time = 0;
+@@ -2666,13 +3226,13 @@ estimate_node_size_and_time (struct cgra
+     {
+       bool found = false;
+       fprintf (dump_file, "   Estimating body: %s/%i\n"
+-	       "   Known to be false: ", node->name (),
+-	       node->order);
++	       "   Known to be false: ", m_node->name (),
++	       m_node->order);
+ 
+       for (i = predicate::not_inlined_condition;
+ 	   i < (predicate::first_dynamic_condition
+ 		+ (int) vec_safe_length (info->conds)); i++)
+-	if (!(possible_truths & (1 << i)))
++	if (!(m_possible_truths & (1 << i)))
+ 	  {
+ 	    if (found)
+ 	      fprintf (dump_file, ", ");
+@@ -2681,19 +3241,19 @@ estimate_node_size_and_time (struct cgra
+ 	  }
+     }
+ 
+-  estimate_calls_size_and_time (node, &size, &min_size, &time, &hints, possible_truths,
+-				known_vals, known_contexts, known_aggs);
++  estimate_calls_size_and_time (m_node, &size, &min_size, &time, &hints, m_possible_truths,
++				m_known_vals, m_known_contexts, m_known_aggs);
+   sreal nonspecialized_time = time;
+ 
+   for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
+     {
+-      bool exec = e->exec_predicate.evaluate (nonspec_possible_truths);
++      bool exec = e->exec_predicate.evaluate (m_nonspec_possible_truths);
+ 
+       /* Because predicates are conservative, it can happen that nonconst is 1
+ 	 but exec is 0.  */
+       if (exec)
+         {
+-          bool nonconst = e->nonconst_predicate.evaluate (possible_truths);
++          bool nonconst = e->nonconst_predicate.evaluate (m_possible_truths);
+ 
+ 	  gcc_checking_assert (e->time >= 0);
+ 	  gcc_checking_assert (time >= 0);
+@@ -2709,7 +3269,7 @@ estimate_node_size_and_time (struct cgra
+ 	  nonspecialized_time += e->time;
+ 	  if (!nonconst)
+ 	    ;
+-	  else if (!inline_param_summary.exists ())
++	  else if (!m_inline_param_summary.exists ())
+ 	    {
+ 	      if (nonconst)
+ 	        time += e->time;
+@@ -2717,8 +3277,8 @@ estimate_node_size_and_time (struct cgra
+ 	  else
+ 	    {
+ 	      int prob = e->nonconst_predicate.probability 
+-					       (info->conds, possible_truths,
+-					        inline_param_summary);
++					       (info->conds, m_possible_truths,
++					        m_inline_param_summary);
+ 	      gcc_checking_assert (prob >= 0);
+ 	      gcc_checking_assert (prob <= REG_BR_PROB_BASE);
+ 	      time += e->time * prob / REG_BR_PROB_BASE;
+@@ -2742,14 +3302,14 @@ estimate_node_size_and_time (struct cgra
+     time = nonspecialized_time;
+ 
+   if (info->loop_iterations
+-      && !info->loop_iterations->evaluate (possible_truths))
++      && !info->loop_iterations->evaluate (m_possible_truths))
+     hints |= INLINE_HINT_loop_iterations;
+   if (info->loop_stride
+-      && !info->loop_stride->evaluate (possible_truths))
++      && !info->loop_stride->evaluate (m_possible_truths))
+     hints |= INLINE_HINT_loop_stride;
+   if (info->scc_no)
+     hints |= INLINE_HINT_in_scc;
+-  if (DECL_DECLARED_INLINE_P (node->decl))
++  if (DECL_DECLARED_INLINE_P (m_node->decl))
+     hints |= INLINE_HINT_declared_inline;
+ 
+   size = RDIV (size, ipa_fn_summary::size_scale);
+@@ -2782,7 +3342,7 @@ estimate_ipcp_clone_size_and_time (struc
+ 				   vec<tree> known_vals,
+ 				   vec<ipa_polymorphic_call_context>
+ 				   known_contexts,
+-				   vec<ipa_agg_jump_function_p> known_aggs,
++				   vec<ipa_agg_value_set> known_aggs,
+ 				   int *ret_size, sreal *ret_time,
+ 				   sreal *ret_nonspec_time,
+ 				   ipa_hints *hints)
+@@ -2791,10 +3351,31 @@ estimate_ipcp_clone_size_and_time (struc
+ 
+   evaluate_conditions_for_known_args (node, false, known_vals, known_aggs,
+ 				      &clause, &nonspec_clause);
+-  estimate_node_size_and_time (node, clause, nonspec_clause,
+-			       known_vals, known_contexts,
+-			       known_aggs, ret_size, NULL, ret_time,
+-			       ret_nonspec_time, hints, vNULL);
++  ipa_call_context ctx (node, clause, nonspec_clause,
++		        known_vals, known_contexts,
++		        known_aggs, vNULL);
++  ctx.estimate_size_and_time (ret_size, NULL, ret_time,
++			      ret_nonspec_time, hints);
++}
++
++/* Return stack frame offset where frame of NODE is supposed to start inside
++   of the function it is inlined to.
++   Return 0 for functions that are not inlined.  */
++
++HOST_WIDE_INT
++ipa_get_stack_frame_offset (struct cgraph_node *node)
++{
++  HOST_WIDE_INT offset = 0;
++  if (!node->inlined_to)
++    return 0;
++  node = node->callers->caller;
++  while (true)
++    {
++      offset += ipa_size_summaries->get (node)->estimated_self_stack_size;
++      if (!node->inlined_to)
++	return offset;
++      node = node->callers->caller;
++    }
+ }
+ 
+ 
+@@ -2805,19 +3386,7 @@ static void
+ inline_update_callee_summaries (struct cgraph_node *node, int depth)
+ {
+   struct cgraph_edge *e;
+-  ipa_fn_summary *callee_info = ipa_fn_summaries->get (node);
+-  ipa_fn_summary *caller_info = ipa_fn_summaries->get (node->callers->caller);
+-  HOST_WIDE_INT peak;
+-
+-  callee_info->stack_frame_offset
+-    = caller_info->stack_frame_offset
+-    + caller_info->estimated_self_stack_size;
+-  peak = callee_info->stack_frame_offset
+-    + callee_info->estimated_self_stack_size;
+-
+-  ipa_fn_summary *s = ipa_fn_summaries->get (node->global.inlined_to);
+-  if (s->estimated_stack_size < peak)
+-    s->estimated_stack_size = peak;
++
+   ipa_propagate_frequency (node);
+   for (e = node->callees; e; e = e->next_callee)
+     {
+@@ -2830,7 +3399,7 @@ inline_update_callee_summaries (struct c
+ }
+ 
+ /* Update change_prob of EDGE after INLINED_EDGE has been inlined.
+-   When functoin A is inlined in B and A calls C with parameter that
++   When function A is inlined in B and A calls C with parameter that
+    changes with probability PROB1 and C is known to be passthroug
+    of argument if B that change with probability PROB2, the probability
+    of change is now PROB1*PROB2.  */
+@@ -2842,9 +3411,11 @@ remap_edge_change_prob (struct cgraph_ed
+   if (ipa_node_params_sum)
+     {
+       int i;
+-      struct ipa_edge_args *args = IPA_EDGE_REF (edge);
+-      struct ipa_call_summary *es = ipa_call_summaries->get (edge);
+-      struct ipa_call_summary *inlined_es
++      class ipa_edge_args *args = IPA_EDGE_REF (edge);
++      if (!args)
++	return;
++      class ipa_call_summary *es = ipa_call_summaries->get (edge);
++      class ipa_call_summary *inlined_es
+ 	= ipa_call_summaries->get (inlined_edge);
+ 
+       if (es->param.length () == 0)
+@@ -2885,8 +3456,9 @@ remap_edge_change_prob (struct cgraph_ed
+ static void
+ remap_edge_summaries (struct cgraph_edge *inlined_edge,
+ 		      struct cgraph_node *node,
+-		      struct ipa_fn_summary *info,
+-		      struct ipa_fn_summary *callee_info,
++		      class ipa_fn_summary *info,
++		      class ipa_node_params *params_summary,
++		      class ipa_fn_summary *callee_info,
+ 		      vec<int> operand_map,
+ 		      vec<int> offset_map,
+ 		      clause_t possible_truths,
+@@ -2895,18 +3467,19 @@ remap_edge_summaries (struct cgraph_edge
+   struct cgraph_edge *e, *next;
+   for (e = node->callees; e; e = next)
+     {
+-      struct ipa_call_summary *es = ipa_call_summaries->get (e);
+       predicate p;
+       next = e->next_callee;
+ 
+       if (e->inline_failed)
+ 	{
++          class ipa_call_summary *es = ipa_call_summaries->get (e);
+ 	  remap_edge_change_prob (inlined_edge, e);
+ 
+ 	  if (es->predicate)
+ 	    {
+ 	      p = es->predicate->remap_after_inlining
+-				     (info, callee_info, operand_map,
++				     (info, params_summary,
++				      callee_info, operand_map,
+ 				      offset_map, possible_truths,
+ 				      *toplev_predicate);
+ 	      edge_set_predicate (e, &p);
+@@ -2915,13 +3488,14 @@ remap_edge_summaries (struct cgraph_edge
+ 	    edge_set_predicate (e, toplev_predicate);
+ 	}
+       else
+-	remap_edge_summaries (inlined_edge, e->callee, info, callee_info,
++	remap_edge_summaries (inlined_edge, e->callee, info,
++		              params_summary, callee_info,
+ 			      operand_map, offset_map, possible_truths,
+ 			      toplev_predicate);
+     }
+   for (e = node->indirect_calls; e; e = next)
+     {
+-      struct ipa_call_summary *es = ipa_call_summaries->get (e);
++      class ipa_call_summary *es = ipa_call_summaries->get (e);
+       predicate p;
+       next = e->next_callee;
+ 
+@@ -2929,7 +3503,8 @@ remap_edge_summaries (struct cgraph_edge
+       if (es->predicate)
+ 	{
+ 	  p = es->predicate->remap_after_inlining
+-				 (info, callee_info, operand_map, offset_map,
++				 (info, params_summary,
++				  callee_info, operand_map, offset_map,
+ 			          possible_truths, *toplev_predicate);
+ 	  edge_set_predicate (e, &p);
+ 	}
+@@ -2941,8 +3516,9 @@ remap_edge_summaries (struct cgraph_edge
+ /* Same as remap_predicate, but set result into hint *HINT.  */
+ 
+ static void
+-remap_hint_predicate (struct ipa_fn_summary *info,
+-		      struct ipa_fn_summary *callee_info,
++remap_hint_predicate (class ipa_fn_summary *info,
++		      class ipa_node_params *params_summary,
++		      class ipa_fn_summary *callee_info,
+ 		      predicate **hint,
+ 		      vec<int> operand_map,
+ 		      vec<int> offset_map,
+@@ -2954,7 +3530,7 @@ remap_hint_predicate (struct ipa_fn_summ
+   if (!*hint)
+     return;
+   p = (*hint)->remap_after_inlining
+-			 (info, callee_info,
++			 (info, params_summary, callee_info,
+ 			  operand_map, offset_map,
+ 			  possible_truths, *toplev_predicate);
+   if (p != false && p != true)
+@@ -2972,17 +3548,18 @@ void
+ ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge)
+ {
+   ipa_fn_summary *callee_info = ipa_fn_summaries->get (edge->callee);
+-  struct cgraph_node *to = (edge->caller->global.inlined_to
+-			    ? edge->caller->global.inlined_to : edge->caller);
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get (to);
++  struct cgraph_node *to = (edge->caller->inlined_to
++			    ? edge->caller->inlined_to : edge->caller);
++  class ipa_fn_summary *info = ipa_fn_summaries->get (to);
+   clause_t clause = 0;	/* not_inline is known to be false.  */
+   size_time_entry *e;
+-  vec<int> operand_map = vNULL;
+-  vec<int> offset_map = vNULL;
++  auto_vec<int, 8> operand_map;
++  auto_vec<int, 8> offset_map;
+   int i;
+   predicate toplev_predicate;
+-  predicate true_p = true;
+-  struct ipa_call_summary *es = ipa_call_summaries->get (edge);
++  class ipa_call_summary *es = ipa_call_summaries->get (edge);
++  class ipa_node_params *params_summary = (ipa_node_params_sum
++		 			   ? IPA_NODE_REF (to) : NULL);
+ 
+   if (es->predicate)
+     toplev_predicate = *es->predicate;
+@@ -2995,8 +3572,8 @@ ipa_merge_fn_summary_after_inlining (str
+     evaluate_properties_for_edge (edge, true, &clause, NULL, NULL, NULL, NULL);
+   if (ipa_node_params_sum && callee_info->conds)
+     {
+-      struct ipa_edge_args *args = IPA_EDGE_REF (edge);
+-      int count = ipa_get_cs_argument_count (args);
++      class ipa_edge_args *args = IPA_EDGE_REF (edge);
++      int count = args ? ipa_get_cs_argument_count (args) : 0;
+       int i;
+ 
+       if (count)
+@@ -3029,19 +3606,21 @@ ipa_merge_fn_summary_after_inlining (str
+ 		}
+ 	    }
+ 	  operand_map[i] = map;
+-	  gcc_assert (map < ipa_get_param_count (IPA_NODE_REF (to)));
++	  gcc_assert (map < ipa_get_param_count (params_summary));
+ 	}
+     }
+   for (i = 0; vec_safe_iterate (callee_info->size_time_table, i, &e); i++)
+     {
+       predicate p;
+       p = e->exec_predicate.remap_after_inlining
+-			     (info, callee_info, operand_map,
++			     (info, params_summary,
++			      callee_info, operand_map,
+ 			      offset_map, clause,
+ 			      toplev_predicate);
+       predicate nonconstp;
+       nonconstp = e->nonconst_predicate.remap_after_inlining
+-				     (info, callee_info, operand_map,
++				     (info, params_summary,
++				      callee_info, operand_map,
+ 				      offset_map, clause,
+ 				      toplev_predicate);
+       if (p != false && nonconstp != false)
+@@ -3059,48 +3638,53 @@ ipa_merge_fn_summary_after_inlining (str
+ 	  info->account_size_time (e->size, add_time, p, nonconstp);
+ 	}
+     }
+-  remap_edge_summaries (edge, edge->callee, info, callee_info, operand_map,
++  remap_edge_summaries (edge, edge->callee, info, params_summary,
++		 	callee_info, operand_map,
+ 			offset_map, clause, &toplev_predicate);
+-  remap_hint_predicate (info, callee_info,
++  remap_hint_predicate (info, params_summary, callee_info,
+ 			&callee_info->loop_iterations,
+ 			operand_map, offset_map, clause, &toplev_predicate);
+-  remap_hint_predicate (info, callee_info,
++  remap_hint_predicate (info, params_summary, callee_info,
+ 			&callee_info->loop_stride,
+ 			operand_map, offset_map, clause, &toplev_predicate);
+ 
+-  ipa_call_summary *s = ipa_call_summaries->get (edge);
+-  inline_update_callee_summaries (edge->callee, s->loop_depth);
++  HOST_WIDE_INT stack_frame_offset = ipa_get_stack_frame_offset (edge->callee);
++  HOST_WIDE_INT peak = stack_frame_offset + callee_info->estimated_stack_size;
+ 
+-  /* We do not maintain predicates of inlined edges, free it.  */
+-  edge_set_predicate (edge, &true_p);
+-  /* Similarly remove param summaries.  */
+-  es->param.release ();
+-  operand_map.release ();
+-  offset_map.release ();
++  if (info->estimated_stack_size < peak)
++    info->estimated_stack_size = peak;
++
++  inline_update_callee_summaries (edge->callee, es->loop_depth);
++
++  /* Free summaries that are not maintained for inline clones/edges.  */
++  ipa_call_summaries->remove (edge);
++  ipa_fn_summaries->remove (edge->callee);
+ }
+ 
+-/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating overall size
+-   and time.  Recompute it.  */
++/* For performance reasons ipa_merge_fn_summary_after_inlining is not updating
++   overall size and time.  Recompute it.  */
+ 
+ void
+ ipa_update_overall_fn_summary (struct cgraph_node *node)
+ {
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
++  class ipa_fn_summary *info = ipa_fn_summaries->get_create (node);
++  class ipa_size_summary *size_info = ipa_size_summaries->get_create (node);
+   size_time_entry *e;
+   int i;
+ 
+-  info->size = 0;
++  size_info->size = 0;
+   info->time = 0;
+   for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
+     {
+-      info->size += e->size;
++      size_info->size += e->size;
+       info->time += e->time;
+     }
+-  estimate_calls_size_and_time (node, &info->size, &info->min_size,
++  estimate_calls_size_and_time (node, &size_info->size, &info->min_size,
+ 				&info->time, NULL,
+ 				~(clause_t) (1 << predicate::false_condition),
+ 				vNULL, vNULL, vNULL);
+-  info->size = (info->size + ipa_fn_summary::size_scale / 2) / ipa_fn_summary::size_scale;
++  size_info->size = (size_info->size + ipa_fn_summary::size_scale / 2)
++		    / ipa_fn_summary::size_scale;
+ }
+ 
+ 
+@@ -3181,10 +3765,10 @@ ipa_fn_summary_generate (void)
+ /* Write inline summary for edge E to OB.  */
+ 
+ static void
+-read_ipa_call_summary (struct lto_input_block *ib, struct cgraph_edge *e,
++read_ipa_call_summary (class lto_input_block *ib, struct cgraph_edge *e,
+ 		       bool prevails)
+ {
+-  struct ipa_call_summary *es = prevails
++  class ipa_call_summary *es = prevails
+ 				? ipa_call_summaries->get_create (e) : NULL;
+   predicate p;
+   int length, i;
+@@ -3235,7 +3819,7 @@ inline_read_section (struct lto_file_dec
+   const int cfg_offset = sizeof (struct lto_function_header);
+   const int main_offset = cfg_offset + header->cfg_size;
+   const int string_offset = main_offset + header->main_size;
+-  struct data_in *data_in;
++  class data_in *data_in;
+   unsigned int i, count2, j;
+   unsigned int f_count;
+ 
+@@ -3250,7 +3834,9 @@ inline_read_section (struct lto_file_dec
+     {
+       unsigned int index;
+       struct cgraph_node *node;
+-      struct ipa_fn_summary *info;
++      class ipa_fn_summary *info;
++      class ipa_node_params *params_summary;
++      class ipa_size_summary *size_info;
+       lto_symtab_encoder_t encoder;
+       struct bitpack_d bp;
+       struct cgraph_edge *e;
+@@ -3261,6 +3847,9 @@ inline_read_section (struct lto_file_dec
+       node = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder,
+ 								index));
+       info = node->prevailing_p () ? ipa_fn_summaries->get_create (node) : NULL;
++      params_summary = node->prevailing_p () ? IPA_NODE_REF (node) : NULL;
++      size_info = node->prevailing_p ()
++		  ? ipa_size_summaries->get_create (node) : NULL;
+ 
+       int stack_size = streamer_read_uhwi (&ib);
+       int size = streamer_read_uhwi (&ib);
+@@ -3269,8 +3858,8 @@ inline_read_section (struct lto_file_dec
+       if (info)
+ 	{
+ 	  info->estimated_stack_size
+-	    = info->estimated_self_stack_size = stack_size;
+-	  info->size = info->self_size = size;
++	    = size_info->estimated_self_stack_size = stack_size;
++	  size_info->size = size_info->self_size = size;
+ 	  info->time = time;
+ 	}
+ 
+@@ -3288,26 +3877,70 @@ inline_read_section (struct lto_file_dec
+ 
+       count2 = streamer_read_uhwi (&ib);
+       gcc_assert (!info || !info->conds);
++      if (info)
++        vec_safe_reserve_exact (info->conds, count2);
+       for (j = 0; j < count2; j++)
+ 	{
+ 	  struct condition c;
++	  unsigned int k, count3;
+ 	  c.operand_num = streamer_read_uhwi (&ib);
+-	  c.size = streamer_read_uhwi (&ib);
+ 	  c.code = (enum tree_code) streamer_read_uhwi (&ib);
++	  c.type = stream_read_tree (&ib, data_in);
+ 	  c.val = stream_read_tree (&ib, data_in);
+ 	  bp = streamer_read_bitpack (&ib);
+ 	  c.agg_contents = bp_unpack_value (&bp, 1);
+ 	  c.by_ref = bp_unpack_value (&bp, 1);
+ 	  if (c.agg_contents)
+ 	    c.offset = streamer_read_uhwi (&ib);
++	  count3 = streamer_read_uhwi (&ib);
++	  c.param_ops = NULL;
+ 	  if (info)
+-	    vec_safe_push (info->conds, c);
++	    vec_safe_reserve_exact (c.param_ops, count3);
++	  if (params_summary)
++	    ipa_set_param_used_by_ipa_predicates
++		    (params_summary, c.operand_num, true);
++	  for (k = 0; k < count3; k++)
++	    {
++	      struct expr_eval_op op;
++	      enum gimple_rhs_class rhs_class;
++	      op.code = (enum tree_code) streamer_read_uhwi (&ib);
++	      op.type = stream_read_tree (&ib, data_in);
++	      switch (rhs_class = get_gimple_rhs_class (op.code))
++		{
++		case GIMPLE_UNARY_RHS:
++		  op.index = 0;
++		  op.val[0] = NULL_TREE;
++		  op.val[1] = NULL_TREE;
++		  break;
++
++		case GIMPLE_BINARY_RHS:
++		case GIMPLE_TERNARY_RHS:
++		  bp = streamer_read_bitpack (&ib);
++		  op.index = bp_unpack_value (&bp, 2);
++		  op.val[0] = stream_read_tree (&ib, data_in);
++		  if (rhs_class == GIMPLE_BINARY_RHS)
++		    op.val[1] = NULL_TREE;
++		  else
++		    op.val[1] = stream_read_tree (&ib, data_in);
++		  break;
++
++		default:
++		  fatal_error (UNKNOWN_LOCATION,
++			       "invalid fnsummary in LTO stream");
++		}
++	      if (info)
++	        c.param_ops->quick_push (op);
++	    }
++	  if (info)
++	    info->conds->quick_push (c);
+ 	}
+       count2 = streamer_read_uhwi (&ib);
+       gcc_assert (!info || !info->size_time_table);
++      if (info && count2)
++        vec_safe_reserve_exact (info->size_time_table, count2);
+       for (j = 0; j < count2; j++)
+ 	{
+-	  struct size_time_entry e;
++	  class size_time_entry e;
+ 
+ 	  e.size = streamer_read_uhwi (&ib);
+ 	  e.time = sreal::stream_in (&ib);
+@@ -3315,7 +3948,7 @@ inline_read_section (struct lto_file_dec
+ 	  e.nonconst_predicate.stream_in (&ib);
+ 
+ 	  if (info)
+-	    vec_safe_push (info->size_time_table, e);
++	    info->size_time_table->quick_push (e);
+ 	}
+ 
+       p.stream_in (&ib);
+@@ -3378,7 +4011,7 @@ ipa_fn_summary_read (void)
+ static void
+ write_ipa_call_summary (struct output_block *ob, struct cgraph_edge *e)
+ {
+-  struct ipa_call_summary *es = ipa_call_summaries->get (e);
++  class ipa_call_summary *es = ipa_call_summaries->get (e);
+   int i;
+ 
+   streamer_write_uhwi (ob, es->call_stmt_size);
+@@ -3426,7 +4059,8 @@ ipa_fn_summary_write (void)
+       cgraph_node *cnode = lsei_cgraph_node (lsei);
+       if (cnode->definition && !cnode->alias)
+ 	{
+-	  struct ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
++	  class ipa_fn_summary *info = ipa_fn_summaries->get (cnode);
++	  class ipa_size_summary *size_info = ipa_size_summaries->get (cnode);
+ 	  struct bitpack_d bp;
+ 	  struct cgraph_edge *edge;
+ 	  int i;
+@@ -3434,8 +4068,8 @@ ipa_fn_summary_write (void)
+ 	  struct condition *c;
+ 
+ 	  streamer_write_uhwi (ob, lto_symtab_encoder_encode (encoder, cnode));
+-	  streamer_write_hwi (ob, info->estimated_self_stack_size);
+-	  streamer_write_hwi (ob, info->self_size);
++	  streamer_write_hwi (ob, size_info->estimated_self_stack_size);
++	  streamer_write_hwi (ob, size_info->self_size);
+ 	  info->time.stream_out (ob);
+ 	  bp = bitpack_create (ob->main_stream);
+ 	  bp_pack_value (&bp, info->inlinable, 1);
+@@ -3445,9 +4079,12 @@ ipa_fn_summary_write (void)
+ 	  streamer_write_uhwi (ob, vec_safe_length (info->conds));
+ 	  for (i = 0; vec_safe_iterate (info->conds, i, &c); i++)
+ 	    {
++	      int j;
++	      struct expr_eval_op *op;
++
+ 	      streamer_write_uhwi (ob, c->operand_num);
+-	      streamer_write_uhwi (ob, c->size);
+ 	      streamer_write_uhwi (ob, c->code);
++	      stream_write_tree (ob, c->type, true);
+ 	      stream_write_tree (ob, c->val, true);
+ 	      bp = bitpack_create (ob->main_stream);
+ 	      bp_pack_value (&bp, c->agg_contents, 1);
+@@ -3455,6 +4092,21 @@ ipa_fn_summary_write (void)
+ 	      streamer_write_bitpack (&bp);
+ 	      if (c->agg_contents)
+ 		streamer_write_uhwi (ob, c->offset);
++	      streamer_write_uhwi (ob, vec_safe_length (c->param_ops));
++	      for (j = 0; vec_safe_iterate (c->param_ops, j, &op); j++)
++		{
++		  streamer_write_uhwi (ob, op->code);
++		  stream_write_tree (ob, op->type, true);
++		  if (op->val[0])
++		    {
++		      bp = bitpack_create (ob->main_stream);
++		      bp_pack_value (&bp, op->index, 2);
++		      streamer_write_bitpack (&bp);
++		      stream_write_tree (ob, op->val[0], true);
++		      if (op->val[1])
++			stream_write_tree (ob, op->val[1], true);
++		    }
++		}
+ 	    }
+ 	  streamer_write_uhwi (ob, vec_safe_length (info->size_time_table));
+ 	  for (i = 0; vec_safe_iterate (info->size_time_table, i, &e); i++)
+@@ -3487,23 +4139,33 @@ ipa_fn_summary_write (void)
+ }
+ 
+ 
+-/* Release inline summary.  */
++/* Release function summary.  */
+ 
+ void
+ ipa_free_fn_summary (void)
+ {
+-  struct cgraph_node *node;
+   if (!ipa_call_summaries)
+     return;
+-  FOR_EACH_DEFINED_FUNCTION (node)
+-    if (!node->alias)
+-      ipa_fn_summaries->remove (node);
+   ipa_fn_summaries->release ();
+   ipa_fn_summaries = NULL;
+   ipa_call_summaries->release ();
+   delete ipa_call_summaries;
+   ipa_call_summaries = NULL;
+   edge_predicate_pool.release ();
++  /* During IPA this is one of largest datastructures to release.  */
++  if (flag_wpa)
++    ggc_trim ();
++}
++
++/* Release function summary.  */
++
++void
++ipa_free_size_summary (void)
++{
++  if (!ipa_size_summaries)
++    return;
++  ipa_size_summaries->release ();
++  ipa_size_summaries = NULL;
+ }
+ 
+ namespace {
+@@ -3578,10 +4240,12 @@ public:
+       gcc_assert (n == 0);
+       small_p = param;
+     }
+-  virtual bool gate (function *) { return small_p || !flag_wpa; }
++  virtual bool gate (function *) { return true; }
+   virtual unsigned int execute (function *)
+     {
+       ipa_free_fn_summary ();
++      if (!flag_wpa)
++	ipa_free_size_summary ();
+       return 0;
+     }
+ 
+diff -Nurp a/gcc/ipa-fnsummary.h b/gcc/ipa-fnsummary.h
+--- a/gcc/ipa-fnsummary.h	2020-04-30 15:14:04.588000000 +0800
++++ b/gcc/ipa-fnsummary.h	2020-04-30 15:14:56.664000000 +0800
+@@ -81,16 +81,40 @@ struct GTY(()) size_time_entry
+   sreal GTY((skip)) time;
+ };
+ 
++/* Summary about function and stack frame sizes.  We keep this info 
++   for inline clones and also for WPA streaming. For this reason this is not
++   part of ipa_fn_summary which exists only for offline functions.  */
++class ipa_size_summary
++{
++public:
++  /* Estimated stack frame consumption by the function.  */
++  HOST_WIDE_INT estimated_self_stack_size;
++  /* Size of the function body.  */
++  int self_size;
++  /* Estimated size of the function after inlining.  */
++  int size;
++
++  ipa_size_summary ()
++  : estimated_self_stack_size (0), self_size (0), size (0)
++  {
++  }
++  /* Copy constructor.  */
++  ipa_size_summary (const ipa_size_summary &s)
++  : estimated_self_stack_size (0), self_size (s.self_size), size (s.size)
++  {
++  }
++};
++
+ /* Function inlining information.  */
+ struct GTY(()) ipa_fn_summary
+ {
+   /* Keep all field empty so summary dumping works during its computation.
+      This is useful for debugging.  */
+   ipa_fn_summary ()
+-    : estimated_self_stack_size (0), self_size (0), min_size (0),
++    : min_size (0),
+       inlinable (false), single_caller (false),
+       fp_expressions (false), estimated_stack_size (false),
+-      stack_frame_offset (false), time (0), size (0), conds (NULL),
++      time (0), conds (NULL),
+       size_time_table (NULL), loop_iterations (NULL), loop_stride (NULL),
+       growth (0), scc_no (0)
+   {
+@@ -98,13 +122,11 @@ struct GTY(()) ipa_fn_summary
+ 
+   /* Copy constructor.  */
+   ipa_fn_summary (const ipa_fn_summary &s)
+-    : estimated_self_stack_size (s.estimated_self_stack_size),
+-    self_size (s.self_size), min_size (s.min_size),
++    : min_size (s.min_size),
+     inlinable (s.inlinable), single_caller (s.single_caller),
+     fp_expressions (s.fp_expressions),
+     estimated_stack_size (s.estimated_stack_size),
+-    stack_frame_offset (s.stack_frame_offset), time (s.time), size (s.size),
+-    conds (s.conds), size_time_table (s.size_time_table),
++    time (s.time), conds (s.conds), size_time_table (s.size_time_table),
+     loop_iterations (s.loop_iterations), loop_stride (s.loop_stride),
+     growth (s.growth), scc_no (s.scc_no)
+   {}
+@@ -114,10 +136,6 @@ struct GTY(()) ipa_fn_summary
+ 
+   /* Information about the function body itself.  */
+ 
+-  /* Estimated stack frame consumption by the function.  */
+-  HOST_WIDE_INT estimated_self_stack_size;
+-  /* Size of the function body.  */
+-  int self_size;
+   /* Minimal size increase after inlining.  */
+   int min_size;
+ 
+@@ -135,11 +153,8 @@ struct GTY(()) ipa_fn_summary
+ 
+   /* Estimated stack frame consumption by the function.  */
+   HOST_WIDE_INT estimated_stack_size;
+-  /* Expected offset of the stack frame of function.  */
+-  HOST_WIDE_INT stack_frame_offset;
+-  /* Estimated size of the function after inlining.  */
++  /* Estimated runtime of function after inlining.  */
+   sreal GTY((skip)) time;
+-  int size;
+ 
+   /* Conditional size/time information.  The summaries are being
+      merged during inlining.  */
+@@ -177,7 +192,7 @@ public:
+ 
+   static ipa_fn_summary_t *create_ggc (symbol_table *symtab)
+   {
+-    struct ipa_fn_summary_t *summary = new (ggc_alloc <ipa_fn_summary_t> ())
++    class ipa_fn_summary_t *summary = new (ggc_alloc <ipa_fn_summary_t> ())
+       ipa_fn_summary_t (symtab);
+     summary->disable_insertion_hook ();
+     return summary;
+@@ -199,6 +214,24 @@ public:
+ extern GTY(()) fast_function_summary <ipa_fn_summary *, va_gc>
+   *ipa_fn_summaries;
+ 
++class ipa_size_summary_t:
++  public fast_function_summary <ipa_size_summary *, va_gc>
++{
++public:
++  ipa_size_summary_t (symbol_table *symtab):
++    fast_function_summary <ipa_size_summary *, va_gc> (symtab) {}
++
++  static ipa_size_summary_t *create_ggc (symbol_table *symtab)
++  {
++    class ipa_size_summary_t *summary = new (ggc_alloc <ipa_size_summary_t> ())
++      ipa_size_summary_t (symtab);
++    summary->disable_insertion_hook ();
++    return summary;
++  }
++};
++extern fast_function_summary <ipa_size_summary *, va_heap>
++  *ipa_size_summaries;
++
+ /* Information kept about callgraph edges.  */
+ struct ipa_call_summary
+ {
+@@ -245,6 +278,57 @@ public:
+ 			  ipa_call_summary *dst_data);
+ };
+ 
++/* This object describe a context of call.  That is a summary of known
++   information about its parameters.  Main purpose of this context is
++   to give more realistic esitmations of function runtime, size and
++   inline hints.  */
++class ipa_call_context
++{
++public:
++  ipa_call_context (cgraph_node *node,
++      		    clause_t possible_truths,
++		    clause_t nonspec_possible_truths,
++		    vec<tree> known_vals,
++		    vec<ipa_polymorphic_call_context> known_contexts,
++		    vec<ipa_agg_value_set> known_aggs,
++		    vec<inline_param_summary> m_inline_param_summary);
++  ipa_call_context ()
++  : m_node(NULL)
++  {
++  }
++  void estimate_size_and_time (int *ret_size, int *ret_min_size,
++			       sreal *ret_time,
++			       sreal *ret_nonspecialized_time,
++			       ipa_hints *ret_hints);
++  void duplicate_from (const ipa_call_context &ctx);
++  void release (bool all = false);
++  bool equal_to (const ipa_call_context &);
++  bool exists_p ()
++  {
++    return m_node != NULL;
++  }
++private:
++  /* Called function.  */
++  cgraph_node *m_node;
++  /* Clause describing what predicate conditionals can be satisfied
++     in this context if function is inlined/specialised.  */
++  clause_t m_possible_truths;
++  /* Clause describing what predicate conditionals can be satisfied
++     in this context if function is kept offline.  */
++  clause_t m_nonspec_possible_truths;
++  /* Inline summary maintains info about change probabilities.  */
++  vec<inline_param_summary> m_inline_param_summary;
++
++  /* The following is used only to resolve indirect calls.  */
++
++  /* Vector describing known values of parameters.  */
++  vec<tree> m_known_vals;
++  /* Vector describing known polymorphic call contexts.  */
++  vec<ipa_polymorphic_call_context> m_known_contexts;
++  /* Vector describing known aggregate values.  */
++  vec<ipa_agg_value_set> m_known_aggs;
++};
++
+ extern fast_call_summary <ipa_call_summary *, va_heap> *ipa_call_summaries;
+ 
+ /* In ipa-fnsummary.c  */
+@@ -253,11 +337,12 @@ void ipa_dump_fn_summaries (FILE *f);
+ void ipa_dump_fn_summary (FILE *f, struct cgraph_node *node);
+ void ipa_dump_hints (FILE *f, ipa_hints);
+ void ipa_free_fn_summary (void);
++void ipa_free_size_summary (void);
+ void inline_analyze_function (struct cgraph_node *node);
+ void estimate_ipcp_clone_size_and_time (struct cgraph_node *,
+ 					vec<tree>,
+ 					vec<ipa_polymorphic_call_context>,
+-					vec<ipa_agg_jump_function_p>,
++					vec<ipa_agg_value_set>,
+ 					int *, sreal *, sreal *,
+ 				        ipa_hints *);
+ void ipa_merge_fn_summary_after_inlining (struct cgraph_edge *edge);
+@@ -265,26 +350,16 @@ void ipa_update_overall_fn_summary (stru
+ void compute_fn_summary (struct cgraph_node *, bool);
+ 
+ 
+-void evaluate_properties_for_edge (struct cgraph_edge *e, bool inline_p,
++void evaluate_properties_for_edge (struct cgraph_edge *e,
++	       		           bool inline_p,
+ 				   clause_t *clause_ptr,
+ 				   clause_t *nonspec_clause_ptr,
+ 				   vec<tree> *known_vals_ptr,
+ 				   vec<ipa_polymorphic_call_context>
+ 				   *known_contexts_ptr,
+-				   vec<ipa_agg_jump_function_p> *);
+-void estimate_node_size_and_time (struct cgraph_node *node,
+-				  clause_t possible_truths,
+-				  clause_t nonspec_possible_truths,
+-				  vec<tree> known_vals,
+-				  vec<ipa_polymorphic_call_context>,
+-				  vec<ipa_agg_jump_function_p> known_aggs,
+-				  int *ret_size, int *ret_min_size,
+-				  sreal *ret_time,
+-				  sreal *ret_nonspecialized_time,
+-				  ipa_hints *ret_hints,
+-				  vec<inline_param_summary>
+-				  inline_param_summary);
++				   vec<ipa_agg_value_set> *);
+ 
+ void ipa_fnsummary_c_finalize (void);
++HOST_WIDE_INT ipa_get_stack_frame_offset (struct cgraph_node *node);
+ 
+ #endif /* GCC_IPA_FNSUMMARY_H */
+diff -Nurp a/gcc/ipa-icf.c b/gcc/ipa-icf.c
+--- a/gcc/ipa-icf.c	2020-04-30 15:14:04.596000000 +0800
++++ b/gcc/ipa-icf.c	2020-04-30 15:14:56.632000000 +0800
+@@ -491,7 +491,7 @@ sem_function::param_used_p (unsigned int
+ 
+   struct ipa_node_params *parms_info = IPA_NODE_REF (get_node ());
+ 
+-  if (vec_safe_length (parms_info->descriptors) <= i)
++  if (!parms_info || vec_safe_length (parms_info->descriptors) <= i)
+     return true;
+ 
+   return ipa_is_param_used (IPA_NODE_REF (get_node ()), i);
+@@ -1149,8 +1149,8 @@ sem_function::merge (sem_item *alias_ite
+ 		     "cannot create wrapper of stdarg function.\n");
+ 	}
+       else if (ipa_fn_summaries
+-	       && ipa_fn_summaries->get (alias) != NULL
+-	       && ipa_fn_summaries->get (alias)->self_size <= 2)
++	       && ipa_size_summaries->get (alias) != NULL
++	       && ipa_size_summaries->get (alias)->self_size <= 2)
+ 	{
+ 	  if (dump_file)
+ 	    fprintf (dump_file, "Wrapper creation is not "
+@@ -1268,6 +1268,7 @@ sem_function::merge (sem_item *alias_ite
+ 
+       /* Remove the function's body.  */
+       ipa_merge_profiles (original, alias);
++      symtab->call_cgraph_removal_hooks (alias);
+       alias->release_body (true);
+       alias->reset ();
+       /* Notice global symbol possibly produced RTL.  */
+@@ -1288,11 +1289,13 @@ sem_function::merge (sem_item *alias_ite
+     {
+       gcc_assert (!create_alias);
+       alias->icf_merged = true;
++      symtab->call_cgraph_removal_hooks (alias);
+       local_original->icf_merged = true;
+ 
+       /* FIXME update local_original counts.  */
+       ipa_merge_profiles (original, alias, true);
+       alias->create_wrapper (local_original);
++      symtab->call_cgraph_insertion_hooks (alias);
+ 
+       if (dump_file)
+ 	fprintf (dump_file, "Unified; Wrapper has been created.\n\n");
+diff -Nurp a/gcc/ipa-inline-analysis.c b/gcc/ipa-inline-analysis.c
+--- a/gcc/ipa-inline-analysis.c	2020-04-30 15:14:04.556000000 +0800
++++ b/gcc/ipa-inline-analysis.c	2020-04-30 15:14:56.680000000 +0800
+@@ -53,6 +53,48 @@ along with GCC; see the file COPYING3.
+ /* Cached node/edge growths.  */
+ call_summary<edge_growth_cache_entry *> *edge_growth_cache = NULL;
+ 
++/* The context cache remembers estimated time/size and hints for given
++   ipa_call_context of a call.  */
++class node_context_cache_entry
++{
++public:
++  ipa_call_context ctx;
++  sreal time, nonspec_time;
++  int size;
++  ipa_hints hints;
++
++  node_context_cache_entry ()
++  : ctx ()
++  {
++  }
++  ~node_context_cache_entry ()
++  {
++    ctx.release ();
++  }
++};
++
++/* At the moment we implement primitive single entry LRU cache.  */
++class node_context_summary
++{
++public:
++  node_context_cache_entry entry;
++
++  node_context_summary ()
++  : entry ()
++  {
++  }
++  ~node_context_summary ()
++  {
++  }
++};
++
++/* Summary holding the context cache.  */
++static fast_function_summary <node_context_summary *, va_heap>
++	*node_context_cache = NULL;
++/* Statistics about the context cache effectivity.  */
++static long node_context_cache_hit, node_context_cache_miss,
++	    node_context_cache_clear;
++
+ /* Give initial reasons why inlining would fail on EDGE.  This gets either
+    nullified or usually overwritten by more precise reasons later.  */
+ 
+@@ -77,6 +119,16 @@ initialize_inline_failed (struct cgraph_
+ 			    == CIF_FINAL_ERROR);
+ }
+ 
++/* Allocate edge growth caches.  */
++
++void
++initialize_growth_caches ()
++{
++  edge_growth_cache
++    = new call_summary<edge_growth_cache_entry *> (symtab, false);
++  node_context_cache
++    = new fast_function_summary<node_context_summary *, va_heap> (symtab);
++}
+ 
+ /* Free growth caches.  */
+ 
+@@ -84,7 +136,17 @@ void
+ free_growth_caches (void)
+ {
+   delete edge_growth_cache;
++  delete node_context_cache;
+   edge_growth_cache = NULL;
++  node_context_cache = NULL;
++  if (dump_file)
++    fprintf (dump_file, "node context cache: %li hits, %li misses,"
++		   	" %li initializations\n",
++	     node_context_cache_hit, node_context_cache_miss,
++	     node_context_cache_clear);
++  node_context_cache_hit = 0;
++  node_context_cache_miss = 0;
++  node_context_cache_clear = 0;
+ }
+ 
+ /* Return hints derrived from EDGE.   */
+@@ -93,8 +155,8 @@ int
+ simple_edge_hints (struct cgraph_edge *edge)
+ {
+   int hints = 0;
+-  struct cgraph_node *to = (edge->caller->global.inlined_to
+-			    ? edge->caller->global.inlined_to : edge->caller);
++  struct cgraph_node *to = (edge->caller->inlined_to
++			    ? edge->caller->inlined_to : edge->caller);
+   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
+   int to_scc_no = ipa_fn_summaries->get (to)->scc_no;
+   int callee_scc_no = ipa_fn_summaries->get (callee)->scc_no;
+@@ -127,9 +189,9 @@ do_estimate_edge_time (struct cgraph_edg
+   clause_t clause, nonspec_clause;
+   vec<tree> known_vals;
+   vec<ipa_polymorphic_call_context> known_contexts;
+-  vec<ipa_agg_jump_function_p> known_aggs;
+-  struct ipa_call_summary *es = ipa_call_summaries->get (edge);
+-  int min_size;
++  vec<ipa_agg_value_set> known_aggs;
++  class ipa_call_summary *es = ipa_call_summaries->get (edge);
++  int min_size = -1;
+ 
+   callee = edge->callee->ultimate_alias_target ();
+ 
+@@ -137,9 +199,53 @@ do_estimate_edge_time (struct cgraph_edg
+   evaluate_properties_for_edge (edge, true,
+ 				&clause, &nonspec_clause, &known_vals,
+ 				&known_contexts, &known_aggs);
+-  estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
+-			       known_contexts, known_aggs, &size, &min_size,
+-			       &time, &nonspec_time, &hints, es->param);
++  ipa_call_context ctx (callee, clause, nonspec_clause, known_vals,
++		  	known_contexts, known_aggs, es->param);
++  if (node_context_cache != NULL)
++    {
++      node_context_summary *e = node_context_cache->get_create (callee);
++      if (e->entry.ctx.equal_to (ctx))
++	{
++	  node_context_cache_hit++;
++	  size = e->entry.size;
++	  time = e->entry.time;
++	  nonspec_time = e->entry.nonspec_time;
++	  hints = e->entry.hints;
++	  if (flag_checking
++	      && !callee->count.ipa_p ())
++	    {
++	      sreal chk_time, chk_nonspec_time;
++	      int chk_size, chk_min_size;
++
++	      ipa_hints chk_hints;
++	      ctx.estimate_size_and_time (&chk_size, &chk_min_size,
++					  &chk_time, &chk_nonspec_time,
++					  &chk_hints);
++	      gcc_assert (chk_size == size && chk_time == time
++		  	  && chk_nonspec_time == nonspec_time
++			  && chk_hints == hints);
++	    }
++	}
++      else
++	{
++	  if (e->entry.ctx.exists_p ())
++	    node_context_cache_miss++;
++	  else
++	    node_context_cache_clear++;
++	  e->entry.ctx.release (true);
++	  e->entry.ctx = ctx;
++	  ctx.estimate_size_and_time (&size, &min_size,
++				      &time, &nonspec_time, &hints);
++	  e->entry.size = size;
++	  e->entry.time = time;
++	  e->entry.nonspec_time = nonspec_time;
++	  e->entry.hints = hints;
++	  e->entry.ctx.duplicate_from (ctx);
++	}
++    }
++  else
++    ctx.estimate_size_and_time (&size, &min_size,
++				&time, &nonspec_time, &hints);
+ 
+   /* When we have profile feedback, we can quite safely identify hot
+      edges and for those we disable size limits.  Don't do that when
+@@ -147,21 +253,21 @@ do_estimate_edge_time (struct cgraph_edg
+      may hurt optimization of the caller's hot path.  */
+   if (edge->count.ipa ().initialized_p () && edge->maybe_hot_p ()
+       && (edge->count.ipa ().apply_scale (2, 1)
+-          > (edge->caller->global.inlined_to
+-	     ? edge->caller->global.inlined_to->count.ipa ()
++	  > (edge->caller->inlined_to
++	     ? edge->caller->inlined_to->count.ipa ()
+ 	     : edge->caller->count.ipa ())))
+     hints |= INLINE_HINT_known_hot;
+ 
+-  known_vals.release ();
+-  known_contexts.release ();
+-  known_aggs.release ();
++  ctx.release ();
+   gcc_checking_assert (size >= 0);
+   gcc_checking_assert (time >= 0);
+ 
+   /* When caching, update the cache entry.  */
+   if (edge_growth_cache != NULL)
+     {
+-      ipa_fn_summaries->get_create (edge->callee)->min_size = min_size;
++      if (min_size >= 0)
++        ipa_fn_summaries->get (edge->callee->function_symbol ())->min_size
++	   = min_size;
+       edge_growth_cache_entry *entry
+ 	= edge_growth_cache->get_create (edge);
+       entry->time = time;
+@@ -174,6 +280,14 @@ do_estimate_edge_time (struct cgraph_edg
+   return time;
+ }
+ 
++/* Reset cache for NODE.
++   This must be done each time NODE body is modified.  */
++void
++reset_node_cache (struct cgraph_node *node)
++{
++  if (node_context_cache)
++    node_context_cache->remove (node);
++}
+ 
+ /* Return estimated callee growth after inlining EDGE.
+    Only to be called via estimate_edge_size.  */
+@@ -186,7 +300,7 @@ do_estimate_edge_size (struct cgraph_edg
+   clause_t clause, nonspec_clause;
+   vec<tree> known_vals;
+   vec<ipa_polymorphic_call_context> known_contexts;
+-  vec<ipa_agg_jump_function_p> known_aggs;
++  vec<ipa_agg_value_set> known_aggs;
+ 
+   /* When we do caching, use do_estimate_edge_time to populate the entry.  */
+ 
+@@ -206,12 +320,10 @@ do_estimate_edge_size (struct cgraph_edg
+ 				&clause, &nonspec_clause,
+ 				&known_vals, &known_contexts,
+ 				&known_aggs);
+-  estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
+-			       known_contexts, known_aggs, &size, NULL, NULL,
+-			       NULL, NULL, vNULL);
+-  known_vals.release ();
+-  known_contexts.release ();
+-  known_aggs.release ();
++  ipa_call_context ctx (callee, clause, nonspec_clause, known_vals,
++		  	known_contexts, known_aggs, vNULL);
++  ctx.estimate_size_and_time (&size, NULL, NULL, NULL, NULL);
++  ctx.release ();
+   return size;
+ }
+ 
+@@ -227,7 +339,7 @@ do_estimate_edge_hints (struct cgraph_ed
+   clause_t clause, nonspec_clause;
+   vec<tree> known_vals;
+   vec<ipa_polymorphic_call_context> known_contexts;
+-  vec<ipa_agg_jump_function_p> known_aggs;
++  vec<ipa_agg_value_set> known_aggs;
+ 
+   /* When we do caching, use do_estimate_edge_time to populate the entry.  */
+ 
+@@ -247,12 +359,10 @@ do_estimate_edge_hints (struct cgraph_ed
+ 				&clause, &nonspec_clause,
+ 				&known_vals, &known_contexts,
+ 				&known_aggs);
+-  estimate_node_size_and_time (callee, clause, nonspec_clause, known_vals,
+-			       known_contexts, known_aggs, NULL, NULL,
+-			       NULL, NULL, &hints, vNULL);
+-  known_vals.release ();
+-  known_contexts.release ();
+-  known_aggs.release ();
++  ipa_call_context ctx (callee, clause, nonspec_clause, known_vals,
++		  	known_contexts, known_aggs, vNULL);
++  ctx.estimate_size_and_time (NULL, NULL, NULL, NULL, &hints);
++  ctx.release ();
+   hints |= simple_edge_hints (edge);
+   return hints;
+ }
+@@ -264,8 +374,8 @@ int
+ estimate_size_after_inlining (struct cgraph_node *node,
+ 			      struct cgraph_edge *edge)
+ {
+-  struct ipa_call_summary *es = ipa_call_summaries->get (edge);
+-  ipa_fn_summary *s = ipa_fn_summaries->get (node);
++  class ipa_call_summary *es = ipa_call_summaries->get (edge);
++  ipa_size_summary *s = ipa_size_summaries->get (node);
+   if (!es->predicate || *es->predicate != false)
+     {
+       int size = s->size + estimate_edge_growth (edge);
+@@ -321,7 +431,7 @@ int
+ estimate_growth (struct cgraph_node *node)
+ {
+   struct growth_data d = { node, false, false, 0 };
+-  struct ipa_fn_summary *info = ipa_fn_summaries->get (node);
++  class ipa_size_summary *info = ipa_size_summaries->get (node);
+ 
+   node->call_for_symbol_and_aliases (do_estimate_growth_1, &d, true);
+ 
+@@ -396,7 +506,7 @@ growth_likely_positive (struct cgraph_no
+       || node->address_taken)
+     return true;
+ 
+-  max_callers = ipa_fn_summaries->get (node)->size * 4 / edge_growth + 2;
++  max_callers = ipa_size_summaries->get (node)->size * 4 / edge_growth + 2;
+ 
+   for (e = node->callers; e; e = e->next_caller)
+     {
+diff -Nurp a/gcc/ipa-inline.c b/gcc/ipa-inline.c
+--- a/gcc/ipa-inline.c	2020-04-30 15:14:04.652000000 +0800
++++ b/gcc/ipa-inline.c	2020-04-30 15:14:56.684000000 +0800
+@@ -150,8 +150,7 @@ caller_growth_limits (struct cgraph_edge
+   int newsize;
+   int limit = 0;
+   HOST_WIDE_INT stack_size_limit = 0, inlined_stack;
+-  ipa_fn_summary *info, *what_info;
+-  ipa_fn_summary *outer_info = ipa_fn_summaries->get (to);
++  ipa_size_summary *outer_info = ipa_size_summaries->get (to);
+ 
+   /* Look for function e->caller is inlined to.  While doing
+      so work out the largest function body on the way.  As
+@@ -163,28 +162,29 @@ caller_growth_limits (struct cgraph_edge
+      too much in order to prevent compiler from exploding".  */
+   while (true)
+     {
+-      info = ipa_fn_summaries->get (to);
+-      if (limit < info->self_size)
+-	limit = info->self_size;
+-      if (stack_size_limit < info->estimated_self_stack_size)
+-	stack_size_limit = info->estimated_self_stack_size;
+-      if (to->global.inlined_to)
++      ipa_size_summary *size_info = ipa_size_summaries->get (to);
++      if (limit < size_info->self_size)
++	limit = size_info->self_size;
++      if (stack_size_limit < size_info->estimated_self_stack_size)
++	stack_size_limit = size_info->estimated_self_stack_size;
++      if (to->inlined_to)
+         to = to->callers->caller;
+       else
+ 	break;
+     }
+ 
+-  what_info = ipa_fn_summaries->get (what);
++  ipa_fn_summary *what_info = ipa_fn_summaries->get (what);
++  ipa_size_summary *what_size_info = ipa_size_summaries->get (what);
+ 
+-  if (limit < what_info->self_size)
+-    limit = what_info->self_size;
++  if (limit < what_size_info->self_size)
++    limit = what_size_info->self_size;
+ 
+   limit += limit * PARAM_VALUE (PARAM_LARGE_FUNCTION_GROWTH) / 100;
+ 
+   /* Check the size after inlining against the function limits.  But allow
+      the function to shrink if it went over the limits by forced inlining.  */
+   newsize = estimate_size_after_inlining (to, e);
+-  if (newsize >= info->size
++  if (newsize >= ipa_size_summaries->get (what)->size
+       && newsize > PARAM_VALUE (PARAM_LARGE_FUNCTION_INSNS)
+       && newsize > limit)
+     {
+@@ -203,7 +203,7 @@ caller_growth_limits (struct cgraph_edge
+   stack_size_limit += ((gcov_type)stack_size_limit
+ 		       * PARAM_VALUE (PARAM_STACK_FRAME_GROWTH) / 100);
+ 
+-  inlined_stack = (outer_info->stack_frame_offset
++  inlined_stack = (ipa_get_stack_frame_offset (to)
+ 		   + outer_info->estimated_self_stack_size
+ 		   + what_info->estimated_stack_size);
+   /* Check new stack consumption with stack consumption at the place
+@@ -213,7 +213,7 @@ caller_growth_limits (struct cgraph_edge
+ 	 inline call, we can inline, too.
+ 	 This bit overoptimistically assume that we are good at stack
+ 	 packing.  */
+-      && inlined_stack > info->estimated_stack_size
++      && inlined_stack > ipa_fn_summaries->get (to)->estimated_stack_size
+       && inlined_stack > PARAM_VALUE (PARAM_LARGE_STACK_FRAME))
+     {
+       e->inline_failed = CIF_LARGE_STACK_FRAME_GROWTH_LIMIT;
+@@ -321,8 +321,8 @@ can_inline_edge_p (struct cgraph_edge *e
+ 
+   bool inlinable = true;
+   enum availability avail;
+-  cgraph_node *caller = e->caller->global.inlined_to
+-		        ? e->caller->global.inlined_to : e->caller;
++  cgraph_node *caller = (e->caller->inlined_to
++			 ? e->caller->inlined_to : e->caller);
+   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
+ 
+   if (!callee->definition)
+@@ -414,8 +414,8 @@ can_inline_edge_by_limits_p (struct cgra
+ 
+   bool inlinable = true;
+   enum availability avail;
+-  cgraph_node *caller = e->caller->global.inlined_to
+-		        ? e->caller->global.inlined_to : e->caller;
++  cgraph_node *caller = (e->caller->inlined_to
++			 ? e->caller->inlined_to : e->caller);
+   cgraph_node *callee = e->callee->ultimate_alias_target (&avail, caller);
+   tree caller_tree = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (caller->decl);
+   tree callee_tree
+@@ -687,8 +687,8 @@ inline sreal
+ compute_uninlined_call_time (struct cgraph_edge *edge,
+ 			     sreal uninlined_call_time)
+ {
+-  cgraph_node *caller = (edge->caller->global.inlined_to 
+-			 ? edge->caller->global.inlined_to
++  cgraph_node *caller = (edge->caller->inlined_to
++			 ? edge->caller->inlined_to
+ 			 : edge->caller);
+ 
+   sreal freq = edge->sreal_frequency ();
+@@ -708,8 +708,8 @@ inline sreal
+ compute_inlined_call_time (struct cgraph_edge *edge,
+ 			   sreal time)
+ {
+-  cgraph_node *caller = (edge->caller->global.inlined_to 
+-			 ? edge->caller->global.inlined_to
++  cgraph_node *caller = (edge->caller->inlined_to
++			 ? edge->caller->inlined_to
+ 			 : edge->caller);
+   sreal caller_time = ipa_fn_summaries->get (caller)->time;
+ 
+@@ -895,7 +895,7 @@ want_inline_self_recursive_call_p (struc
+       reason = "--param max-inline-recursive-depth exceeded.";
+       want_inline = false;
+     }
+-  else if (outer_node->global.inlined_to
++  else if (outer_node->inlined_to
+ 	   && (caller_freq = outer_node->callers->sreal_frequency ()) == 0)
+     {
+       reason = "caller frequency is 0";
+@@ -1005,7 +1005,7 @@ want_inline_function_to_all_callers_p (s
+   if (node->alias)
+     return false;
+   /* Already inlined?  */
+-  if (node->global.inlined_to)
++  if (node->inlined_to)
+     return false;
+   /* Does it have callers?  */
+   if (!node->call_for_symbol_and_aliases (has_caller_p, NULL, true))
+@@ -1037,8 +1037,8 @@ edge_badness (struct cgraph_edge *edge,
+   struct cgraph_node *callee = edge->callee->ultimate_alias_target ();
+   struct ipa_fn_summary *callee_info = ipa_fn_summaries->get (callee);
+   ipa_hints hints;
+-  cgraph_node *caller = (edge->caller->global.inlined_to 
+-			 ? edge->caller->global.inlined_to
++  cgraph_node *caller = (edge->caller->inlined_to
++			 ? edge->caller->inlined_to
+ 			 : edge->caller);
+ 
+   growth = estimate_edge_growth (edge);
+@@ -1051,7 +1051,7 @@ edge_badness (struct cgraph_edge *edge,
+   gcc_checking_assert ((edge_time * 100
+ 			- callee_info->time * 101).to_int () <= 0
+ 			|| callee->count.ipa ().initialized_p ());
+-  gcc_checking_assert (growth <= callee_info->size);
++  gcc_checking_assert (growth <= ipa_size_summaries->get (callee)->size);
+ 
+   if (dump)
+     {
+@@ -1122,7 +1122,7 @@ edge_badness (struct cgraph_edge *edge,
+ 	     if (need_more_work)
+ 	       noninline_callee ();
+ 	   }
+-	 Withhout panilizing this case, we usually inline noninline_callee
++	 Withhout penalizing this case, we usually inline noninline_callee
+ 	 into the inline_caller because overall_growth is small preventing
+ 	 further inlining of inline_caller.
+ 
+@@ -1132,7 +1132,7 @@ edge_badness (struct cgraph_edge *edge,
+       if (growth > overall_growth
+ 	  /* ... and having only one caller which is not inlined ... */
+ 	  && callee_info->single_caller
+-	  && !edge->caller->global.inlined_to
++	  && !edge->caller->inlined_to
+ 	  /* ... and edges executed only conditionally ... */
+ 	  && edge->sreal_frequency () < 1
+ 	  /* ... consider case where callee is not inline but caller is ... */
+@@ -1155,7 +1155,7 @@ edge_badness (struct cgraph_edge *edge,
+ 	     and it is not called once and.  */
+ 	  if (!caller_info->single_caller && overall_growth < caller_growth
+ 	      && caller_info->inlinable
+-	      && caller_info->size
++	      && ipa_size_summaries->get (caller)->size
+ 		 < (DECL_DECLARED_INLINE_P (caller->decl)
+ 		    ? MAX_INLINE_INSNS_SINGLE : MAX_INLINE_INSNS_AUTO))
+ 	    {
+@@ -1178,7 +1178,7 @@ edge_badness (struct cgraph_edge *edge,
+ 	    overall_growth += 256 * 256 - 256;
+ 	  denominator *= overall_growth;
+         }
+-      denominator *= ipa_fn_summaries->get (caller)->self_size + growth;
++      denominator *= ipa_size_summaries->get (caller)->size + growth;
+ 
+       badness = - numerator / denominator;
+ 
+@@ -1300,8 +1300,10 @@ reset_edge_caches (struct cgraph_node *n
+   struct cgraph_node *where = node;
+   struct ipa_ref *ref;
+ 
+-  if (where->global.inlined_to)
+-    where = where->global.inlined_to;
++  if (where->inlined_to)
++    where = where->inlined_to;
++
++  reset_node_cache (where);
+ 
+   if (edge_growth_cache != NULL)
+     for (edge = where->callers; edge; edge = edge->next_caller)
+@@ -1351,7 +1353,7 @@ update_caller_keys (edge_heap_t *heap, s
+   struct ipa_ref *ref;
+ 
+   if ((!node->alias && !ipa_fn_summaries->get (node)->inlinable)
+-      || node->global.inlined_to)
++      || node->inlined_to)
+     return;
+   if (!bitmap_set_bit (updated_nodes, node->get_uid ()))
+     return;
+@@ -1479,8 +1481,8 @@ recursive_inlining (struct cgraph_edge *
+   int n = 0;
+ 
+   node = edge->caller;
+-  if (node->global.inlined_to)
+-    node = node->global.inlined_to;
++  if (node->inlined_to)
++    node = node->inlined_to;
+ 
+   if (DECL_DECLARED_INLINE_P (node->decl))
+     limit = PARAM_VALUE (PARAM_MAX_INLINE_INSNS_RECURSIVE);
+@@ -1528,7 +1530,7 @@ recursive_inlining (struct cgraph_edge *
+ 
+       depth = 1;
+       for (cnode = curr->caller;
+-	   cnode->global.inlined_to; cnode = cnode->callers->caller)
++	   cnode->inlined_to; cnode = cnode->callers->caller)
+ 	if (node->decl
+ 	    == curr->callee->ultimate_alias_target ()->decl)
+           depth++;
+@@ -1567,6 +1569,7 @@ recursive_inlining (struct cgraph_edge *
+ 	}
+ 
+       inline_call (curr, false, new_edges, &overall_size, true);
++      reset_node_cache (node);
+       lookup_recursive_calls (node, curr->callee, &heap);
+       n++;
+     }
+@@ -1581,8 +1584,8 @@ recursive_inlining (struct cgraph_edge *
+     dump_printf_loc (MSG_NOTE, edge->call_stmt,
+ 		     "\n   Inlined %i times, "
+ 		     "body grown from size %i to %i, time %f to %f\n", n,
+-		     ipa_fn_summaries->get (master_clone)->size,
+-		     ipa_fn_summaries->get (node)->size,
++		     ipa_size_summaries->get (master_clone)->size,
++		     ipa_size_summaries->get (node)->size,
+ 		     ipa_fn_summaries->get (master_clone)->time.to_double (),
+ 		     ipa_fn_summaries->get (node)->time.to_double ());
+ 
+@@ -1593,7 +1596,7 @@ recursive_inlining (struct cgraph_edge *
+        node = next)
+     {
+       next = symtab->next_function (node);
+-      if (node->global.inlined_to == master_clone)
++      if (node->inlined_to == master_clone)
+ 	node->remove ();
+     }
+   master_clone->remove ();
+@@ -1707,8 +1710,8 @@ resolve_noninline_speculation (edge_heap
+   if (edge->speculative && !speculation_useful_p (edge, false))
+     {
+       struct cgraph_node *node = edge->caller;
+-      struct cgraph_node *where = node->global.inlined_to
+-				  ? node->global.inlined_to : node;
++      struct cgraph_node *where = node->inlined_to
++				  ? node->inlined_to : node;
+       auto_bitmap updated_nodes;
+ 
+       if (edge->count.ipa ().initialized_p ())
+@@ -1749,6 +1752,16 @@ sum_callers (struct cgraph_node *node, v
+   return false;
+ }
+ 
++/* We only propagate across edges with non-interposable callee.  */
++
++inline bool
++ignore_edge_p (struct cgraph_edge *e)
++{
++  enum availability avail;
++  e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
++  return (avail <= AVAIL_INTERPOSABLE);
++}
++
+ /* We use greedy algorithm for inlining of small functions:
+    All inline candidates are put into prioritized heap ordered in
+    increasing badness.
+@@ -1776,11 +1789,11 @@ inline_small_functions (void)
+      metrics.  */
+ 
+   max_count = profile_count::uninitialized ();
+-  ipa_reduced_postorder (order, true, NULL);
++  ipa_reduced_postorder (order, true, ignore_edge_p);
+   free (order);
+ 
+   FOR_EACH_DEFINED_FUNCTION (node)
+-    if (!node->global.inlined_to)
++    if (!node->inlined_to)
+       {
+ 	if (!node->alias && node->analyzed
+ 	    && (node->has_gimple_body_p () || node->thunk.thunk_p)
+@@ -1792,7 +1805,7 @@ inline_small_functions (void)
+ 	    /* Do not account external functions, they will be optimized out
+ 	       if not inlined.  Also only count the non-cold portion of program.  */
+ 	    if (inline_account_function_p (node))
+-	      initial_size += info->size;
++	      initial_size += ipa_size_summaries->get (node)->size;
+ 	    info->growth = estimate_growth (node);
+ 
+ 	    int num_calls = 0;
+@@ -1808,7 +1821,8 @@ inline_small_functions (void)
+ 		     n2 = ((struct ipa_dfs_info *) n2->aux)->next_cycle)
+ 		  if (opt_for_fn (n2->decl, optimize))
+ 		    {
+-		      ipa_fn_summary *info2 = ipa_fn_summaries->get (n2);
++		      ipa_fn_summary *info2 = ipa_fn_summaries->get
++			 (n2->inlined_to ? n2->inlined_to : n2);
+ 		      if (info2->scc_no)
+ 			break;
+ 		      info2->scc_no = id;
+@@ -1820,8 +1834,7 @@ inline_small_functions (void)
+ 	  max_count = max_count.max (edge->count.ipa ());
+       }
+   ipa_free_postorder_info ();
+-  edge_growth_cache
+-    = new call_summary<edge_growth_cache_entry *> (symtab, false);
++  initialize_growth_caches ();
+ 
+   if (dump_file)
+     fprintf (dump_file,
+@@ -1872,8 +1885,8 @@ inline_small_functions (void)
+ 	    }
+       if (update)
+ 	{
+-	  struct cgraph_node *where = node->global.inlined_to
+-				      ? node->global.inlined_to : node;
++	  struct cgraph_node *where = node->inlined_to
++				      ? node->inlined_to : node;
+ 	  ipa_update_overall_fn_summary (where);
+ 	  reset_edge_caches (where);
+           update_caller_keys (&edge_heap, where,
+@@ -1902,11 +1915,10 @@ inline_small_functions (void)
+       if (!edge->inline_failed || !edge->callee->analyzed)
+ 	continue;
+ 
+-#if CHECKING_P
+       /* Be sure that caches are maintained consistent.
+ 	 This check is affected by scaling roundoff errors when compiling for
+ 	 IPA this we skip it in that case.  */
+-      if (!edge->callee->count.ipa_p ()
++      if (flag_checking && !edge->callee->count.ipa_p ()
+ 	  && (!max_count.initialized_p () || !max_count.nonzero_p ()))
+ 	{
+ 	  sreal cached_badness = edge_badness (edge, false);
+@@ -1917,6 +1929,9 @@ inline_small_functions (void)
+ 
+ 	  if (edge_growth_cache != NULL)
+ 	    edge_growth_cache->remove (edge);
++	  reset_node_cache (edge->caller->inlined_to
++			    ? edge->caller->inlined_to
++			    : edge->caller);
+ 	  gcc_assert (old_size_est == estimate_edge_size (edge));
+ 	  gcc_assert (old_time_est == estimate_edge_time (edge));
+ 	  /* FIXME:
+@@ -1941,9 +1956,6 @@ inline_small_functions (void)
+ 	}
+       else
+         current_badness = edge_badness (edge, false);
+-#else
+-      current_badness = edge_badness (edge, false);
+-#endif
+       if (current_badness != badness)
+ 	{
+ 	  if (edge_heap.min () && current_badness > edge_heap.min_key ())
+@@ -1969,7 +1981,7 @@ inline_small_functions (void)
+ 	  fprintf (dump_file,
+ 		   "\nConsidering %s with %i size\n",
+ 		   callee->dump_name (),
+-		   ipa_fn_summaries->get (callee)->size);
++		   ipa_size_summaries->get (callee)->size);
+ 	  fprintf (dump_file,
+ 		   " to be inlined into %s in %s:%i\n"
+ 		   " Estimated badness is %f, frequency %.2f.\n",
+@@ -2017,8 +2029,8 @@ inline_small_functions (void)
+       if (edge->recursive_p ())
+ 	{
+ 	  where = edge->caller;
+-	  if (where->global.inlined_to)
+-	    where = where->global.inlined_to;
++	  if (where->inlined_to)
++	    where = where->inlined_to;
+ 	  if (!recursive_inlining (edge,
+ 				   opt_for_fn (edge->caller->decl,
+ 					       flag_indirect_inlining)
+@@ -2048,7 +2060,7 @@ inline_small_functions (void)
+ 	     selective.  */
+ 
+ 	  where = edge->caller;
+-	  while (where->global.inlined_to)
++	  while (where->inlined_to)
+ 	    {
+ 	      if (where->decl == callee->decl)
+ 		outer_node = where, depth++;
+@@ -2067,17 +2079,16 @@ inline_small_functions (void)
+ 	  else if (depth && dump_file)
+ 	    fprintf (dump_file, " Peeling recursion with depth %i\n", depth);
+ 
+-	  gcc_checking_assert (!callee->global.inlined_to);
++	  gcc_checking_assert (!callee->inlined_to);
+ 	  inline_call (edge, true, &new_indirect_edges, &overall_size, true);
+-	  add_new_edges_to_heap (&edge_heap, new_indirect_edges);
+-
+ 	  reset_edge_caches (edge->callee);
++	  add_new_edges_to_heap (&edge_heap, new_indirect_edges);
+ 
+ 	  update_callee_keys (&edge_heap, where, updated_nodes);
+ 	}
+       where = edge->caller;
+-      if (where->global.inlined_to)
+-	where = where->global.inlined_to;
++      if (where->inlined_to)
++	where = where->inlined_to;
+ 
+       /* Our profitability metric can depend on local properties
+ 	 such as number of inlinable calls and size of the function body.
+@@ -2095,7 +2106,7 @@ inline_small_functions (void)
+ 
+       if (dump_enabled_p ())
+ 	{
+-	  ipa_fn_summary *s = ipa_fn_summaries->get (edge->caller);
++	  ipa_fn_summary *s = ipa_fn_summaries->get (where);
+ 
+ 	  /* dump_printf can't handle %+i.  */
+ 	  char buf_net_change[100];
+@@ -2106,7 +2117,9 @@ inline_small_functions (void)
+ 			   " Inlined %C into %C which now has time %f and "
+ 			   "size %i, net change of %s.\n",
+ 			   edge->callee, edge->caller,
+-			   s->time.to_double (), s->size, buf_net_change);
++			   s->time.to_double (),
++			   ipa_size_summaries->get (edge->caller)->size,
++			   buf_net_change);
+ 	}
+       if (min_size > overall_size)
+ 	{
+@@ -2208,8 +2221,8 @@ flatten_function (struct cgraph_node *no
+ 
+   node->aux = NULL;
+   if (update)
+-    ipa_update_overall_fn_summary (node->global.inlined_to
+-				   ? node->global.inlined_to : node);
++    ipa_update_overall_fn_summary (node->inlined_to
++				   ? node->inlined_to : node);
+ }
+ 
+ /* Inline NODE to all callers.  Worker for cgraph_for_node_and_aliases.
+@@ -2223,7 +2236,7 @@ inline_to_all_callers_1 (struct cgraph_n
+   int *num_calls = (int *)data;
+   bool callee_removed = false;
+ 
+-  while (node->callers && !node->global.inlined_to)
++  while (node->callers && !node->inlined_to)
+     {
+       struct cgraph_node *caller = node->callers->caller;
+ 
+@@ -2243,11 +2256,11 @@ inline_to_all_callers_1 (struct cgraph_n
+ 	  fprintf (dump_file,
+ 		   "\nInlining %s size %i.\n",
+ 		   ultimate->name (),
+-		   ipa_fn_summaries->get (ultimate)->size);
++		   ipa_size_summaries->get (ultimate)->size);
+ 	  fprintf (dump_file,
+ 		   " Called once from %s %i insns.\n",
+ 		   node->callers->caller->name (),
+-		   ipa_fn_summaries->get (node->callers->caller)->size);
++		   ipa_size_summaries->get (node->callers->caller)->size);
+ 	}
+ 
+       /* Remember which callers we inlined to, delaying updating the
+@@ -2258,7 +2271,7 @@ inline_to_all_callers_1 (struct cgraph_n
+ 	fprintf (dump_file,
+ 		 " Inlined into %s which now has %i size\n",
+ 		 caller->name (),
+-		 ipa_fn_summaries->get (caller)->size);
++		 ipa_size_summaries->get (caller)->size);
+       if (!(*num_calls)--)
+ 	{
+ 	  if (dump_file)
+@@ -2296,7 +2309,7 @@ dump_overall_stats (void)
+   struct cgraph_node *node;
+ 
+   FOR_EACH_DEFINED_FUNCTION (node)
+-    if (!node->global.inlined_to
++    if (!node->inlined_to
+ 	&& !node->alias)
+       {
+ 	ipa_fn_summary *s = ipa_fn_summaries->get (node);
+@@ -2482,8 +2495,9 @@ ipa_inline (void)
+   for (i = nnodes - 1, j = i; i >= 0; i--)
+     {
+       node = order[i];
+-      if (lookup_attribute ("flatten",
+-			    DECL_ATTRIBUTES (node->decl)) != NULL)
++      if (node->definition
++	  && lookup_attribute ("flatten",
++			       DECL_ATTRIBUTES (node->decl)) != NULL)
+ 	order[j--] = order[i];
+     }
+ 
+@@ -2588,8 +2602,8 @@ ipa_inline (void)
+ 	    }
+ 	  if (update)
+ 	    {
+-	      struct cgraph_node *where = node->global.inlined_to
+-					  ? node->global.inlined_to : node;
++	      struct cgraph_node *where = node->inlined_to
++					  ? node->inlined_to : node;
+ 	      reset_edge_caches (where);
+ 	      ipa_update_overall_fn_summary (where);
+ 	    }
+diff -Nurp a/gcc/ipa-inline.h b/gcc/ipa-inline.h
+--- a/gcc/ipa-inline.h	2020-04-30 15:14:04.608000000 +0800
++++ b/gcc/ipa-inline.h	2020-04-30 15:14:56.608000000 +0800
+@@ -47,6 +47,8 @@ bool growth_likely_positive (struct cgra
+ int do_estimate_edge_size (struct cgraph_edge *edge);
+ sreal do_estimate_edge_time (struct cgraph_edge *edge);
+ ipa_hints do_estimate_edge_hints (struct cgraph_edge *edge);
++void reset_node_cache (struct cgraph_node *node);
++void initialize_growth_caches ();
+ void free_growth_caches (void);
+ 
+ /* In ipa-inline.c  */
+diff -Nurp a/gcc/ipa-inline-transform.c b/gcc/ipa-inline-transform.c
+--- a/gcc/ipa-inline-transform.c	2020-04-30 15:14:04.568000000 +0800
++++ b/gcc/ipa-inline-transform.c	2020-04-30 15:14:56.624000000 +0800
+@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
+ #include "function.h"
+ #include "cfg.h"
+ #include "basic-block.h"
++#include "ipa-utils.h"
+ 
+ int ncalls_inlined;
+ int nfunctions_inlined;
+@@ -166,8 +167,8 @@ clone_inlined_nodes (struct cgraph_edge
+   struct cgraph_node *inlining_into;
+   struct cgraph_edge *next;
+ 
+-  if (e->caller->global.inlined_to)
+-    inlining_into = e->caller->global.inlined_to;
++  if (e->caller->inlined_to)
++    inlining_into = e->caller->inlined_to;
+   else
+     inlining_into = e->caller;
+ 
+@@ -193,14 +194,14 @@ clone_inlined_nodes (struct cgraph_edge
+ 
+ 	     For now we keep the ohter functions in the group in program until
+ 	     cgraph_remove_unreachable_functions gets rid of them.  */
+-	  gcc_assert (!e->callee->global.inlined_to);
++	  gcc_assert (!e->callee->inlined_to);
+ 	  e->callee->remove_from_same_comdat_group ();
+ 	  if (e->callee->definition
+ 	      && inline_account_function_p (e->callee))
+ 	    {
+ 	      gcc_assert (!e->callee->alias);
+ 	      if (overall_size)
+-		*overall_size -= ipa_fn_summaries->get (e->callee)->size;
++		*overall_size -= ipa_size_summaries->get (e->callee)->size;
+ 	      nfunctions_inlined++;
+ 	    }
+ 	  duplicate = false;
+@@ -226,7 +227,7 @@ clone_inlined_nodes (struct cgraph_edge
+   else
+     e->callee->remove_from_same_comdat_group ();
+ 
+-  e->callee->global.inlined_to = inlining_into;
++  e->callee->inlined_to = inlining_into;
+ 
+   /* Recursively clone all bodies.  */
+   for (e = e->callee->callees; e; e = next)
+@@ -310,20 +311,24 @@ inline_call (struct cgraph_edge *e, bool
+   /* Don't inline inlined edges.  */
+   gcc_assert (e->inline_failed);
+   /* Don't even think of inlining inline clone.  */
+-  gcc_assert (!callee->global.inlined_to);
++  gcc_assert (!callee->inlined_to);
+ 
+   to = e->caller;
+-  if (to->global.inlined_to)
+-    to = to->global.inlined_to;
++  if (to->inlined_to)
++    to = to->inlined_to;
+   if (to->thunk.thunk_p)
+     {
+       struct cgraph_node *target = to->callees->callee;
++      thunk_expansion = true;
++      symtab->call_cgraph_removal_hooks (to);
+       if (in_lto_p)
+ 	to->get_untransformed_body ();
+       to->expand_thunk (false, true);
+       /* When thunk is instrumented we may have multiple callees.  */
+       for (e = to->callees; e && e->callee != target; e = e->next_callee)
+ 	;
++      symtab->call_cgraph_insertion_hooks (to);
++      thunk_expansion = false;
+       gcc_assert (e);
+     }
+ 
+@@ -442,9 +447,9 @@ inline_call (struct cgraph_edge *e, bool
+ 
+   clone_inlined_nodes (e, true, update_original, overall_size);
+ 
+-  gcc_assert (curr->callee->global.inlined_to == to);
++  gcc_assert (curr->callee->inlined_to == to);
+ 
+-  old_size = ipa_fn_summaries->get (to)->size;
++  old_size = ipa_size_summaries->get (to)->size;
+   ipa_merge_fn_summary_after_inlining (e);
+   if (e->in_polymorphic_cdtor)
+     mark_all_inlined_calls_cdtor (e->callee);
+@@ -458,8 +463,8 @@ inline_call (struct cgraph_edge *e, bool
+        work for further inlining into this function.  Before inlining
+        the function we inlined to again we expect the caller to update
+        the overall summary.  */
+-    ipa_fn_summaries->get (to)->size += estimated_growth;
+-  new_size = ipa_fn_summaries->get (to)->size;
++    ipa_size_summaries->get (to)->size += estimated_growth;
++  new_size = ipa_size_summaries->get (to)->size;
+ 
+   if (callee->calls_comdat_local)
+     to->calls_comdat_local = true;
+diff -Nurp a/gcc/ipa-predicate.c b/gcc/ipa-predicate.c
+--- a/gcc/ipa-predicate.c	2020-04-30 15:14:04.620000000 +0800
++++ b/gcc/ipa-predicate.c	2020-04-30 15:14:56.620000000 +0800
+@@ -33,9 +33,36 @@ along with GCC; see the file COPYING3.
+ #include "fold-const.h"
+ #include "tree-pretty-print.h"
+ #include "gimple.h"
++#include "gimplify.h"
+ #include "data-streamer.h"
+ 
+ 
++/* Check whether two set of operations have same effects.  */
++static bool
++expr_eval_ops_equal_p (expr_eval_ops ops1, expr_eval_ops ops2)
++{
++  if (ops1)
++    {
++      if (!ops2 || ops1->length () != ops2->length ())
++	return false;
++
++      for (unsigned i = 0; i < ops1->length (); i++)
++	{
++	  expr_eval_op &op1 = (*ops1)[i];
++	  expr_eval_op &op2 = (*ops2)[i];
++
++	  if (op1.code != op2.code
++	      || op1.index != op2.index
++	      || !vrp_operand_equal_p (op1.val[0], op2.val[0])
++	      || !vrp_operand_equal_p (op1.val[1], op2.val[1])
++	      || !types_compatible_p (op1.type, op2.type))
++	    return false;
++	}
++      return true;
++    }
++  return !ops2;
++}
++
+ /* Add clause CLAUSE into the predicate P.
+    When CONDITIONS is NULL do not perform checking whether NEW_CLAUSE
+    is obviously true.  This is useful only when NEW_CLAUSE is known to be
+@@ -110,14 +137,16 @@ predicate::add_clause (conditions condit
+ 	for (c2 = c1 + 1; c2 < num_conditions; c2++)
+ 	  if (new_clause & (1 << c2))
+ 	    {
+-	      condition *cc1 =
+-		&(*conditions)[c1 - predicate::first_dynamic_condition];
+ 	      condition *cc2 =
+ 		&(*conditions)[c2 - predicate::first_dynamic_condition];
+ 	      if (cc1->operand_num == cc2->operand_num
+-		  && cc1->val == cc2->val
++		  && vrp_operand_equal_p (cc1->val, cc2->val)
+ 		  && cc2->code != is_not_constant
+-		  && cc2->code != predicate::changed
++		  && cc2->code != changed
++		  && expr_eval_ops_equal_p (cc1->param_ops, cc2->param_ops)
++		  && cc2->agg_contents == cc1->agg_contents
++		  && cc2->by_ref == cc1->by_ref
++		  && types_compatible_p (cc2->type, cc1->type)
+ 		  && cc1->code == invert_tree_comparison (cc2->code,
+ 							  HONOR_NANS (cc1->val)))
+ 		return;
+@@ -300,6 +329,83 @@ dump_condition (FILE *f, conditions cond
+       if (c->agg_contents)
+ 	fprintf (f, "[%soffset: " HOST_WIDE_INT_PRINT_DEC "]",
+ 		 c->by_ref ? "ref " : "", c->offset);
++
++      for (unsigned i = 0; i < vec_safe_length (c->param_ops); i++)
++	{
++	  expr_eval_op &op = (*(c->param_ops))[i];
++	  const char *op_name = op_symbol_code (op.code);
++
++	  if (op_name == op_symbol_code (ERROR_MARK))
++	    op_name = get_tree_code_name (op.code);
++
++	  fprintf (f, ",(");
++
++	  if (!op.val[0])
++	    {
++	      switch (op.code)
++		{
++		case FLOAT_EXPR:
++		case FIX_TRUNC_EXPR:
++		case FIXED_CONVERT_EXPR:
++		case VIEW_CONVERT_EXPR:
++		CASE_CONVERT:
++		  if (op.code == VIEW_CONVERT_EXPR)
++		    fprintf (f, "VCE");
++		  fprintf (f, "(");
++		  print_generic_expr (f, op.type);
++		  fprintf (f, ")" );
++		  break;
++
++		default:
++		  fprintf (f, "%s", op_name);
++		}
++	      fprintf (f, " #");
++	    }
++	  else if (!op.val[1])
++	    {
++	      if (op.index)
++		{
++		  print_generic_expr (f, op.val[0]);
++		  fprintf (f, " %s #", op_name);
++		}
++	      else
++		{
++		  fprintf (f, "# %s ", op_name);
++		  print_generic_expr (f, op.val[0]);
++		}
++	    }
++	  else
++	    {
++	      fprintf (f, "%s ", op_name);
++	      switch (op.index)
++		{
++		case 0:
++		  fprintf (f, "#, ");
++		  print_generic_expr (f, op.val[0]);
++		  fprintf (f, ", ");
++		  print_generic_expr (f, op.val[1]);
++		  break;
++
++		case 1:
++		  print_generic_expr (f, op.val[0]);
++		  fprintf (f, ", #, ");
++		  print_generic_expr (f, op.val[1]);
++		  break;
++
++		case 2:
++		  print_generic_expr (f, op.val[0]);
++		  fprintf (f, ", ");
++		  print_generic_expr (f, op.val[1]);
++		  fprintf (f, ", #");
++		  break;
++
++		default:
++		  fprintf (f, "*, *, *");
++		}
++	    }
++	  fprintf (f, ")");
++	}
++
+       if (c->code == predicate::is_not_constant)
+ 	{
+ 	  fprintf (f, " not constant");
+@@ -398,8 +504,9 @@ predicate::remap_after_duplication (clau
+    for other purposes).  */
+ 
+ predicate
+-predicate::remap_after_inlining (struct ipa_fn_summary *info,
+-				 struct ipa_fn_summary *callee_info,
++predicate::remap_after_inlining (class ipa_fn_summary *info,
++				 class ipa_node_params *params_summary,
++				 class ipa_fn_summary *callee_info,
+ 				 vec<int> operand_map,
+ 				 vec<int> offset_map,
+ 				 clause_t possible_truths,
+@@ -460,10 +567,10 @@ predicate::remap_after_inlining (struct
+ 		    ap.offset = c->offset + offset_delta;
+ 		    ap.agg_contents = c->agg_contents;
+ 		    ap.by_ref = c->by_ref;
+-		    cond_predicate = add_condition (info,
++		    cond_predicate = add_condition (info, params_summary,
+ 						    operand_map[c->operand_num],
+-						    c->size, &ap, c->code,
+-						    c->val);
++						    c->type, &ap, c->code,
++						    c->val, c->param_ops);
+ 		  }
+ 	      }
+ 	    /* Fixed conditions remains same, construct single
+@@ -483,7 +590,7 @@ predicate::remap_after_inlining (struct
+ /* Read predicate from IB.  */
+ 
+ void
+-predicate::stream_in (struct lto_input_block *ib)
++predicate::stream_in (class lto_input_block *ib)
+ {
+   clause_t clause;
+   int k = 0;
+@@ -516,21 +623,28 @@ predicate::stream_out (struct output_blo
+ }
+ 
+ 
+-/* Add condition to condition list SUMMARY. OPERAND_NUM, SIZE, CODE and VAL
+-   correspond to fields of condition structure.  AGGPOS describes whether the
+-   used operand is loaded from an aggregate and where in the aggregate it is.
+-   It can be NULL, which means this not a load from an aggregate.  */
++/* Add condition to condition list SUMMARY.  OPERAND_NUM, TYPE, CODE, VAL and
++   PARAM_OPS correspond to fields of condition structure.  AGGPOS describes
++   whether the used operand is loaded from an aggregate and where in the
++   aggregate it is.  It can be NULL, which means this not a load from an
++   aggregate.  */
+ 
+ predicate
+-add_condition (struct ipa_fn_summary *summary, int operand_num,
+-	       HOST_WIDE_INT size, struct agg_position_info *aggpos,
+-	       enum tree_code code, tree val)
++add_condition (class ipa_fn_summary *summary,
++	       class ipa_node_params *params_summary,
++	       int operand_num,
++	       tree type, struct agg_position_info *aggpos,
++	       enum tree_code code, tree val, expr_eval_ops param_ops)
+ {
+-  int i;
++  int i, j;
+   struct condition *c;
+   struct condition new_cond;
+   HOST_WIDE_INT offset;
+   bool agg_contents, by_ref;
++  expr_eval_op *op;
++
++  if (params_summary)
++    ipa_set_param_used_by_ipa_predicates (params_summary, operand_num, true);
+ 
+   if (aggpos)
+     {
+@@ -549,10 +663,11 @@ add_condition (struct ipa_fn_summary *su
+   for (i = 0; vec_safe_iterate (summary->conds, i, &c); i++)
+     {
+       if (c->operand_num == operand_num
+-	  && c->size == size
+ 	  && c->code == code
+-	  && c->val == val
++	  && types_compatible_p (c->type, type)
++	  && vrp_operand_equal_p (c->val, val)
+ 	  && c->agg_contents == agg_contents
++	  && expr_eval_ops_equal_p (c->param_ops, param_ops)
+ 	  && (!agg_contents || (c->offset == offset && c->by_ref == by_ref)))
+ 	return predicate::predicate_testing_cond (i);
+     }
+@@ -562,11 +677,21 @@ add_condition (struct ipa_fn_summary *su
+ 
+   new_cond.operand_num = operand_num;
+   new_cond.code = code;
+-  new_cond.val = val;
++  new_cond.type = unshare_expr_without_location (type);
++  new_cond.val = val ? unshare_expr_without_location (val) : val;
+   new_cond.agg_contents = agg_contents;
+   new_cond.by_ref = by_ref;
+   new_cond.offset = offset;
+-  new_cond.size = size;
++  new_cond.param_ops = vec_safe_copy (param_ops);
++
++  for (j = 0; vec_safe_iterate (new_cond.param_ops, j, &op); j++)
++    {
++      if (op->val[0])
++	op->val[0] = unshare_expr_without_location (op->val[0]);
++      if (op->val[1])
++	op->val[1] = unshare_expr_without_location (op->val[1]);
++    }
++
+   vec_safe_push (summary->conds, new_cond);
+ 
+   return predicate::predicate_testing_cond (i);
+diff -Nurp a/gcc/ipa-predicate.h b/gcc/ipa-predicate.h
+--- a/gcc/ipa-predicate.h	2020-04-30 15:14:04.612000000 +0800
++++ b/gcc/ipa-predicate.h	2020-04-30 15:14:56.620000000 +0800
+@@ -22,16 +22,36 @@ along with GCC; see the file COPYING3.
+    inlined into (i.e. known constant values of function parameters.
+ 
+    Conditions that are interesting for function body are collected into CONDS
+-   vector.  They are of simple for  function_param OP VAL, where VAL is
+-   IPA invariant.  The conditions are then referred by predicates.  */
++   vector.  They are of simple as kind of a mathematical transformation on
++   function parameter, T(function_param), in which the parameter occurs only
++   once, and other operands are IPA invariant.  The conditions are then
++   referred by predicates.  */
++
++
++/* A simplified representation of tree node, for unary, binary and ternary
++   operation.  Computations on parameter are decomposed to a series of this
++   kind of structure.  */
++struct GTY(()) expr_eval_op
++{
++  /* Result type of expression.  */
++  tree type;
++  /* Constant operands in expression, there are at most two.  */
++  tree val[2];
++  /* Index of parameter operand in expression.  */
++  unsigned index : 2;
++  /* Operation code of expression.  */
++  ENUM_BITFIELD(tree_code) code : 16;
++};
++
++typedef vec<expr_eval_op, va_gc> *expr_eval_ops;
+ 
+ struct GTY(()) condition
+ {
+   /* If agg_contents is set, this is the offset from which the used data was
+      loaded.  */
+   HOST_WIDE_INT offset;
+-  /* Size of the access reading the data (or the PARM_DECL SSA_NAME).  */
+-  HOST_WIDE_INT size;
++  /* Type of the access reading the data (or the PARM_DECL SSA_NAME).  */
++  tree type;
+   tree val;
+   int operand_num;
+   ENUM_BITFIELD(tree_code) code : 16;
+@@ -41,6 +61,9 @@ struct GTY(()) condition
+   /* If agg_contents is set, this differentiates between loads from data
+      passed by reference and by value.  */
+   unsigned by_ref : 1;
++  /* A set of sequential operations on the parameter, which can be seen as
++     a mathmatical function on the parameter.  */
++  expr_eval_ops param_ops;
+ };
+ 
+ /* Information kept about parameter of call site.  */
+@@ -54,6 +77,14 @@ struct inline_param_summary
+ 
+      Value 0 is reserved for compile time invariants. */
+   int change_prob;
++  bool equal_to (const inline_param_summary &other) const
++  {
++    return change_prob == other.change_prob;
++  }
++  bool useless_p (void) const
++  {
++    return change_prob == REG_BR_PROB_BASE;
++  }
+ };
+ 
+ typedef vec<condition, va_gc> *conditions;
+@@ -205,11 +236,12 @@ public:
+   predicate remap_after_duplication (clause_t);
+ 
+   /* Return predicate equal to THIS after inlining.  */
+-  predicate remap_after_inlining (struct ipa_fn_summary *,
+-			          struct ipa_fn_summary *,
++  predicate remap_after_inlining (class ipa_fn_summary *,
++		  		  class ipa_node_params *params_summary,
++			          class ipa_fn_summary *,
+ 			          vec<int>, vec<int>, clause_t, const predicate &);
+ 
+-  void stream_in (struct lto_input_block *);
++  void stream_in (class lto_input_block *);
+   void stream_out (struct output_block *);
+ 
+ private:
+@@ -227,6 +259,9 @@ private:
+ };
+ 
+ void dump_condition (FILE *f, conditions conditions, int cond);
+-predicate add_condition (struct ipa_fn_summary *summary, int operand_num,
+-			 HOST_WIDE_INT size, struct agg_position_info *aggpos,
+-			 enum tree_code code, tree val);
++predicate add_condition (class ipa_fn_summary *summary,
++			 class ipa_node_params *params_summary,
++	       		 int operand_num,
++			 tree type, struct agg_position_info *aggpos,
++			 enum tree_code code, tree val,
++			 expr_eval_ops param_ops = NULL);
+diff -Nurp a/gcc/ipa-profile.c b/gcc/ipa-profile.c
+--- a/gcc/ipa-profile.c	2020-04-30 15:14:04.632000000 +0800
++++ b/gcc/ipa-profile.c	2020-04-30 15:14:56.652000000 +0800
+@@ -326,8 +326,8 @@ ipa_propagate_frequency_1 (struct cgraph
+       if (profile_info
+ 	  && !(edge->callee->count.ipa () == profile_count::zero ())
+ 	  && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
+-	      || (edge->caller->global.inlined_to
+-		  && edge->caller->global.inlined_to->frequency
++	      || (edge->caller->inlined_to
++		  && edge->caller->inlined_to->frequency
+ 		     != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
+ 	  d->maybe_unlikely_executed = false;
+       if (edge->count.ipa ().initialized_p ()
+@@ -477,6 +477,29 @@ ipa_propagate_frequency (struct cgraph_n
+   return changed;
+ }
+ 
++/* Check that number of arguments of N agrees with E.
++   Be conservative when summaries are not present.  */
++
++static bool
++check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
++{
++  if (!ipa_node_params_sum || !ipa_edge_args_sum)
++    return true;
++  class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ());
++  if (!info)
++    return true;
++  if (!info->descriptors)
++    return true;
++  ipa_edge_args *e_info = IPA_EDGE_REF (e);
++  if (!e)
++    return true;
++  if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
++      && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
++	  || !stdarg_p (TREE_TYPE (n->decl))))
++    return false;
++  return true;
++}
++
+ /* Simple ipa profile pass propagating frequencies across the callgraph.  */
+ 
+ static unsigned int
+@@ -600,14 +623,7 @@ ipa_profile (void)
+ 				 "Not speculating: target is overwritable "
+ 				 "and can be discarded.\n");
+ 		    }
+-		  else if (ipa_node_params_sum && ipa_edge_args_sum
+-			   && (!vec_safe_is_empty
+-			       (IPA_NODE_REF (n2)->descriptors))
+-			   && ipa_get_param_count (IPA_NODE_REF (n2))
+-			      != ipa_get_cs_argument_count (IPA_EDGE_REF (e))
+-			    && (ipa_get_param_count (IPA_NODE_REF (n2))
+-				>= ipa_get_cs_argument_count (IPA_EDGE_REF (e))
+-				|| !stdarg_p (TREE_TYPE (n2->decl))))
++		  else if (!check_argument_count (n2, e))
+ 		    {
+ 		      nmismatch++;
+ 		      if (dump_file)
+diff -Nurp a/gcc/ipa-prop.c b/gcc/ipa-prop.c
+--- a/gcc/ipa-prop.c	2020-04-30 15:14:04.616000000 +0800
++++ b/gcc/ipa-prop.c	2020-04-30 15:14:56.676000000 +0800
+@@ -203,7 +203,7 @@ ipa_get_param_decl_index_1 (vec<ipa_para
+    to INFO.  */
+ 
+ int
+-ipa_get_param_decl_index (struct ipa_node_params *info, tree ptree)
++ipa_get_param_decl_index (class ipa_node_params *info, tree ptree)
+ {
+   return ipa_get_param_decl_index_1 (info->descriptors, ptree);
+ }
+@@ -227,8 +227,10 @@ ipa_populate_param_decls (struct cgraph_
+   for (parm = fnargs; parm; parm = DECL_CHAIN (parm))
+     {
+       descriptors[param_num].decl_or_type = parm;
+-      descriptors[param_num].move_cost = estimate_move_cost (TREE_TYPE (parm),
+-							     true);
++      unsigned int cost = estimate_move_cost (TREE_TYPE (parm), true);
++      descriptors[param_num].move_cost = cost;
++      /* Watch overflow, move_cost is a bitfield.  */
++      gcc_checking_assert (cost == descriptors[param_num].move_cost);
+       param_num++;
+     }
+ }
+@@ -253,7 +255,7 @@ count_formal_params (tree fndecl)
+    using ipa_initialize_node_params. */
+ 
+ void
+-ipa_dump_param (FILE *file, struct ipa_node_params *info, int i)
++ipa_dump_param (FILE *file, class ipa_node_params *info, int i)
+ {
+   fprintf (file, "param #%i", i);
+   if ((*info->descriptors)[i].decl_or_type)
+@@ -269,7 +271,7 @@ ipa_dump_param (FILE *file, struct ipa_n
+ static bool
+ ipa_alloc_node_params (struct cgraph_node *node, int param_count)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF_GET_CREATE (node);
+ 
+   if (!info->descriptors && param_count)
+     {
+@@ -287,7 +289,7 @@ ipa_alloc_node_params (struct cgraph_nod
+ void
+ ipa_initialize_node_params (struct cgraph_node *node)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF_GET_CREATE (node);
+ 
+   if (!info->descriptors
+       && ipa_alloc_node_params (node, count_formal_params (node->decl)))
+@@ -359,23 +361,50 @@ ipa_print_node_jump_functions_for_edge (
+ 
+ 	  fprintf (f, "         Aggregate passed by %s:\n",
+ 		   jump_func->agg.by_ref ? "reference" : "value");
+-	  FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, j, item)
++	  FOR_EACH_VEC_ELT (*jump_func->agg.items, j, item)
+ 	    {
+ 	      fprintf (f, "           offset: " HOST_WIDE_INT_PRINT_DEC ", ",
+ 		       item->offset);
+-	      if (TYPE_P (item->value))
+-		fprintf (f, "clobber of " HOST_WIDE_INT_PRINT_DEC " bits",
+-			 tree_to_uhwi (TYPE_SIZE (item->value)));
+-	      else
++	      fprintf (f, "type: ");
++	      print_generic_expr (f, item->type);
++	      fprintf (f, ", ");
++	      if (item->jftype == IPA_JF_PASS_THROUGH)
++		fprintf (f, "PASS THROUGH: %d,",
++			 item->value.pass_through.formal_id);
++	      else if (item->jftype == IPA_JF_LOAD_AGG)
++		{
++		  fprintf (f, "LOAD AGG: %d",
++			   item->value.pass_through.formal_id);
++		  fprintf (f, " [offset: " HOST_WIDE_INT_PRINT_DEC ", by %s],",
++			   item->value.load_agg.offset,
++			   item->value.load_agg.by_ref ? "reference"
++						       : "value");
++		}
++
++	      if (item->jftype == IPA_JF_PASS_THROUGH
++		  || item->jftype == IPA_JF_LOAD_AGG)
++		{
++		  fprintf (f, " op %s",
++		     get_tree_code_name (item->value.pass_through.operation));
++		  if (item->value.pass_through.operation != NOP_EXPR)
++		    {
++		      fprintf (f, " ");
++		      print_generic_expr (f, item->value.pass_through.operand);
++		    }
++		}
++	      else if (item->jftype == IPA_JF_CONST)
+ 		{
+-		  fprintf (f, "cst: ");
+-		  print_generic_expr (f, item->value);
++		  fprintf (f, "CONST: ");
++		  print_generic_expr (f, item->value.constant);
+ 		}
++	      else if (item->jftype == IPA_JF_UNKNOWN)
++		fprintf (f, "UNKNOWN: " HOST_WIDE_INT_PRINT_DEC " bits",
++			 tree_to_uhwi (TYPE_SIZE (item->type)));
+ 	      fprintf (f, "\n");
+ 	    }
+ 	}
+ 
+-      struct ipa_polymorphic_call_context *ctx
++      class ipa_polymorphic_call_context *ctx
+ 	= ipa_get_ith_polymorhic_call_context (IPA_EDGE_REF (cs), i);
+       if (ctx && !ctx->useless_p ())
+ 	{
+@@ -432,7 +461,7 @@ ipa_print_node_jump_functions (FILE *f,
+ 
+   for (cs = node->indirect_calls; cs; cs = cs->next_callee)
+     {
+-      struct cgraph_indirect_call_info *ii;
++      class cgraph_indirect_call_info *ii;
+       if (!ipa_edge_args_info_available_for_edge_p (cs))
+ 	continue;
+ 
+@@ -1059,7 +1088,7 @@ bool
+ ipa_load_from_parm_agg (struct ipa_func_body_info *fbi,
+ 			vec<ipa_param_descriptor, va_gc> *descriptors,
+ 			gimple *stmt, tree op, int *index_p,
+-			HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p,
++			HOST_WIDE_INT *offset_p, poly_int64 *size_p,
+ 			bool *by_ref_p, bool *guaranteed_unmodified)
+ {
+   int index;
+@@ -1135,6 +1164,67 @@ ipa_load_from_parm_agg (struct ipa_func_
+   return false;
+ }
+ 
++/* If STMT is an assignment that loads a value from a parameter declaration,
++   or from an aggregate passed as the parameter either by value or reference,
++   return the index of the parameter in ipa_node_params.  Otherwise return -1.
++
++   FBI holds gathered information about the function.  INFO describes
++   parameters of the function, STMT is the assignment statement.  If it is a
++   memory load from an aggregate, *OFFSET_P is filled with offset within the
++   aggregate, and *BY_REF_P specifies whether the aggregate is passed by
++   reference.  */
++
++static int
++load_from_unmodified_param_or_agg (struct ipa_func_body_info *fbi,
++				   class ipa_node_params *info,
++				   gimple *stmt,
++				   HOST_WIDE_INT *offset_p,
++				   bool *by_ref_p)
++{
++  int index = load_from_unmodified_param (fbi, info->descriptors, stmt);
++  poly_int64 size;
++
++  /* Load value from a parameter declaration.  */
++  if (index >= 0)
++    {
++      *offset_p = -1;
++      return index;
++    }
++
++  if (!gimple_assign_load_p (stmt))
++    return -1;
++
++  tree rhs = gimple_assign_rhs1 (stmt);
++
++  /* Skip memory reference containing VIEW_CONVERT_EXPR.  */
++  for (tree t = rhs; handled_component_p (t); t = TREE_OPERAND (t, 0))
++    if (TREE_CODE (t) == VIEW_CONVERT_EXPR)
++      return -1;
++
++  /* Skip memory reference containing bit-field.  */
++  if (TREE_CODE (rhs) == BIT_FIELD_REF
++      || contains_bitfld_component_ref_p (rhs))
++    return -1;
++
++  if (!ipa_load_from_parm_agg (fbi, info->descriptors, stmt, rhs, &index,
++			       offset_p, &size, by_ref_p))
++    return -1;
++
++  gcc_assert (!maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (rhs))),
++			 size));
++  if (!*by_ref_p)
++    {
++      tree param_type = ipa_get_type (info, index);
++
++      if (!param_type || !AGGREGATE_TYPE_P (param_type))
++	return -1;
++    }
++  else if (TREE_THIS_VOLATILE (rhs))
++    return -1;
++
++  return index;
++}
++
+ /* Given that an actual argument is an SSA_NAME (given in NAME) and is a result
+    of an assignment statement STMT, try to determine whether we are actually
+    handling any of the following cases and construct an appropriate jump
+@@ -1190,7 +1280,7 @@ ipa_load_from_parm_agg (struct ipa_func_
+ 
+ static void
+ compute_complex_assign_jump_func (struct ipa_func_body_info *fbi,
+-				  struct ipa_node_params *info,
++				  class ipa_node_params *info,
+ 				  struct ipa_jump_func *jfunc,
+ 				  gcall *call, gimple *stmt, tree name,
+ 				  tree param_type)
+@@ -1346,7 +1436,7 @@ get_ancestor_addr_info (gimple *assign,
+ 
+ static void
+ compute_complex_ancestor_jump_func (struct ipa_func_body_info *fbi,
+-				    struct ipa_node_params *info,
++				    class ipa_node_params *info,
+ 				    struct ipa_jump_func *jfunc,
+ 				    gcall *call, gphi *phi)
+ {
+@@ -1440,11 +1530,11 @@ type_like_member_ptr_p (tree type, tree
+ }
+ 
+ /* If RHS is an SSA_NAME and it is defined by a simple copy assign statement,
+-   return the rhs of its defining statement.  Otherwise return RHS as it
+-   is.  */
++   return the rhs of its defining statement, and this statement is stored in
++   *RHS_STMT.  Otherwise return RHS as it is.  */
+ 
+ static inline tree
+-get_ssa_def_if_simple_copy (tree rhs)
++get_ssa_def_if_simple_copy (tree rhs, gimple **rhs_stmt)
+ {
+   while (TREE_CODE (rhs) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (rhs))
+     {
+@@ -1454,100 +1544,323 @@ get_ssa_def_if_simple_copy (tree rhs)
+ 	rhs = gimple_assign_rhs1 (def_stmt);
+       else
+ 	break;
++      *rhs_stmt = def_stmt;
+     }
+   return rhs;
+ }
+ 
+-/* Simple linked list, describing known contents of an aggregate beforere
+-   call.  */
++/* Simple linked list, describing contents of an aggregate before call.  */
+ 
+ struct ipa_known_agg_contents_list
+ {
+   /* Offset and size of the described part of the aggregate.  */
+   HOST_WIDE_INT offset, size;
+-  /* Known constant value or NULL if the contents is known to be unknown.  */
+-  tree constant;
++
++  /* Type of the described part of the aggregate.  */
++  tree type;
++
++  /* Known constant value or jump function data describing contents.  */
++  struct ipa_load_agg_data value;
++
+   /* Pointer to the next structure in the list.  */
+   struct ipa_known_agg_contents_list *next;
+ };
+ 
+-/* Find the proper place in linked list of ipa_known_agg_contents_list
+-   structures where to put a new one with the given LHS_OFFSET and LHS_SIZE,
+-   unless there is a partial overlap, in which case return NULL, or such
+-   element is already there, in which case set *ALREADY_THERE to true.  */
+-
+-static struct ipa_known_agg_contents_list **
+-get_place_in_agg_contents_list (struct ipa_known_agg_contents_list **list,
+-				HOST_WIDE_INT lhs_offset,
+-				HOST_WIDE_INT lhs_size,
+-				bool *already_there)
++/* Add an aggregate content item into a linked list of
++   ipa_known_agg_contents_list structure, in which all elements
++   are sorted ascendingly by offset.  */
++
++static inline void
++add_to_agg_contents_list (struct ipa_known_agg_contents_list **plist,
++			  struct ipa_known_agg_contents_list *item)
+ {
+-  struct ipa_known_agg_contents_list **p = list;
+-  while (*p && (*p)->offset < lhs_offset)
++  struct ipa_known_agg_contents_list *list = *plist;
++
++  for (; list; list = list->next)
+     {
+-      if ((*p)->offset + (*p)->size > lhs_offset)
+-	return NULL;
+-      p = &(*p)->next;
++      if (list->offset >= item->offset)
++	break;
++
++      plist = &list->next;
+     }
+ 
+-  if (*p && (*p)->offset < lhs_offset + lhs_size)
++  item->next = list;
++  *plist = item;
++}
++
++/* Check whether a given aggregate content is clobbered by certain element in
++   a linked list of ipa_known_agg_contents_list.  */
++
++static inline bool
++clobber_by_agg_contents_list_p (struct ipa_known_agg_contents_list *list,
++				struct ipa_known_agg_contents_list *item)
++{
++  for (; list; list = list->next)
+     {
+-      if ((*p)->offset == lhs_offset && (*p)->size == lhs_size)
+-	/* We already know this value is subsequently overwritten with
+-	   something else.  */
+-	*already_there = true;
+-      else
+-	/* Otherwise this is a partial overlap which we cannot
+-	   represent.  */
+-	return NULL;
++      if (list->offset >= item->offset)
++	return list->offset < item->offset + item->size;
++
++      if (list->offset + list->size > item->offset)
++	return true;
+     }
+-  return p;
++
++  return false;
+ }
+ 
+ /* Build aggregate jump function from LIST, assuming there are exactly
+-   CONST_COUNT constant entries there and that th offset of the passed argument
++   VALUE_COUNT entries there and that offset of the passed argument
+    is ARG_OFFSET and store it into JFUNC.  */
+ 
+ static void
+ build_agg_jump_func_from_list (struct ipa_known_agg_contents_list *list,
+-			       int const_count, HOST_WIDE_INT arg_offset,
++			       int value_count, HOST_WIDE_INT arg_offset,
+ 			       struct ipa_jump_func *jfunc)
+ {
+-  vec_alloc (jfunc->agg.items, const_count);
+-  while (list)
++  vec_alloc (jfunc->agg.items, value_count);
++  for (; list; list = list->next)
++    {
++      struct ipa_agg_jf_item item;
++      tree operand = list->value.pass_through.operand;
++
++      if (list->value.pass_through.formal_id >= 0)
++	{
++	  /* Content value is derived from some formal paramerter.  */
++	  if (list->value.offset >= 0)
++	    item.jftype = IPA_JF_LOAD_AGG;
++	  else
++	    item.jftype = IPA_JF_PASS_THROUGH;
++
++	  item.value.load_agg = list->value;
++	  if (operand)
++	    item.value.pass_through.operand
++	      = unshare_expr_without_location (operand);
++	}
++      else if (operand)
++	{
++	  /* Content value is known constant.  */
++	  item.jftype = IPA_JF_CONST;
++	  item.value.constant = unshare_expr_without_location (operand);
++	}
++      else
++	continue;
++
++      item.type = list->type;
++      gcc_assert (tree_to_shwi (TYPE_SIZE (list->type)) == list->size);
++
++      item.offset = list->offset - arg_offset;
++      gcc_assert ((item.offset % BITS_PER_UNIT) == 0);
++
++      jfunc->agg.items->quick_push (item);
++    }
++}
++
++/* Given an assignment statement STMT, try to collect information into
++   AGG_VALUE that will be used to construct jump function for RHS of the
++   assignment, from which content value of an aggregate part comes.
++
++   Besides constant and simple pass-through jump functions, also try to
++   identify whether it matches the following pattern that can be described by
++   a load-value-from-aggregate jump function, which is a derivative of simple
++   pass-through jump function.
++
++     foo (int *p)
++     {
++       ...
++
++       *(q_5 + 4) = *(p_3(D) + 28) op 1;
++       bar (q_5);
++     }
++
++   Here IPA_LOAD_AGG_DATA data structure is informative enough to describe
++   constant, simple pass-through and load-vale-from-aggregate. If value
++   is constant, it will be kept in field OPERAND, and field FORMAL_ID is
++   set to -1. For simple pass-through and load-value-from-aggregate, field
++   FORMAL_ID specifies the related formal parameter index, and field
++   OFFSET can be used to distinguish them, -1 means simple pass-through,
++   otherwise means load-value-from-aggregate.  */
++
++static void
++analyze_agg_content_value (struct ipa_func_body_info *fbi,
++			   struct ipa_load_agg_data *agg_value,
++			   gimple *stmt)
++{
++  tree lhs = gimple_assign_lhs (stmt);
++  tree rhs1 = gimple_assign_rhs1 (stmt);
++  enum tree_code code;
++  int index = -1;
++
++  /* Initialize jump function data for the aggregate part.  */
++  memset (agg_value, 0, sizeof (*agg_value));
++  agg_value->pass_through.operation = NOP_EXPR;
++  agg_value->pass_through.formal_id = -1;
++  agg_value->offset = -1;
++
++  if (AGGREGATE_TYPE_P (TREE_TYPE (lhs))  /* TODO: Support aggregate type.  */
++      || TREE_THIS_VOLATILE (lhs)
++      || TREE_CODE (lhs) == BIT_FIELD_REF
++      || contains_bitfld_component_ref_p (lhs))
++    return;
++
++  /* Skip SSA copies.  */
++  while (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
++    {
++      if (TREE_CODE (rhs1) != SSA_NAME || SSA_NAME_IS_DEFAULT_DEF (rhs1))
++	break;
++
++      stmt = SSA_NAME_DEF_STMT (rhs1);
++      if (!is_gimple_assign (stmt))
++	return;
++
++      rhs1 = gimple_assign_rhs1 (stmt);
++    }
++
++  code = gimple_assign_rhs_code (stmt);
++  switch (gimple_assign_rhs_class (stmt))
+     {
+-      if (list->constant)
++    case GIMPLE_SINGLE_RHS:
++      if (is_gimple_ip_invariant (rhs1))
+ 	{
+-	  struct ipa_agg_jf_item item;
+-	  item.offset = list->offset - arg_offset;
+-	  gcc_assert ((item.offset % BITS_PER_UNIT) == 0);
+-	  item.value = unshare_expr_without_location (list->constant);
+-	  jfunc->agg.items->quick_push (item);
++	  agg_value->pass_through.operand = rhs1;
++	  return;
+ 	}
+-      list = list->next;
++      code = NOP_EXPR;
++      break;
++
++    case GIMPLE_UNARY_RHS:
++      /* NOTE: A GIMPLE_UNARY_RHS operation might not be tcc_unary
++	 (truth_not_expr is example), GIMPLE_BINARY_RHS does not imply
++	 tcc_binary, this subtleness is somewhat misleading.
++
++	 Since tcc_unary is widely used in IPA-CP code to check an operation
++	 with one operand, here we only allow tc_unary operation to avoid
++	 possible problem.  Then we can use (opclass == tc_unary) or not to
++	 distinguish unary and binary.  */
++      if (TREE_CODE_CLASS (code) != tcc_unary || CONVERT_EXPR_CODE_P (code))
++	return;
++
++      rhs1 = get_ssa_def_if_simple_copy (rhs1, &stmt);
++      break;
++
++    case GIMPLE_BINARY_RHS:
++      {
++	gimple *rhs1_stmt = stmt;
++	gimple *rhs2_stmt = stmt;
++	tree rhs2 = gimple_assign_rhs2 (stmt);
++
++	rhs1 = get_ssa_def_if_simple_copy (rhs1, &rhs1_stmt);
++	rhs2 = get_ssa_def_if_simple_copy (rhs2, &rhs2_stmt);
++
++	if (is_gimple_ip_invariant (rhs2))
++	  {
++	    agg_value->pass_through.operand = rhs2;
++	    stmt = rhs1_stmt;
++	  }
++	else if (is_gimple_ip_invariant (rhs1))
++	  {
++	    if (TREE_CODE_CLASS (code) == tcc_comparison)
++	      code = swap_tree_comparison (code);
++	    else if (!commutative_tree_code (code))
++	      return;
++
++	    agg_value->pass_through.operand = rhs1;
++	    stmt = rhs2_stmt;
++	    rhs1 = rhs2;
++	  }
++	else
++	  return;
++
++	if (TREE_CODE_CLASS (code) != tcc_comparison
++	    && !useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (rhs1)))
++	  return;
++      }
++      break;
++
++    default:
++      return;
++  }
++
++  if (TREE_CODE (rhs1) != SSA_NAME)
++    index = load_from_unmodified_param_or_agg (fbi, fbi->info, stmt,
++					       &agg_value->offset,
++					       &agg_value->by_ref);
++  else if (SSA_NAME_IS_DEFAULT_DEF (rhs1))
++    index = ipa_get_param_decl_index (fbi->info, SSA_NAME_VAR (rhs1));
++
++  if (index >= 0)
++    {
++      if (agg_value->offset >= 0)
++	agg_value->type = TREE_TYPE (rhs1);
++      agg_value->pass_through.formal_id = index;
++      agg_value->pass_through.operation = code;
+     }
++  else
++    agg_value->pass_through.operand = NULL_TREE;
++}
++
++/* If STMT is a memory store to the object whose address is BASE, extract
++   information (offset, size, and value) into CONTENT, and return true,
++   otherwise we conservatively assume the whole object is modified with
++   unknown content, and return false.  CHECK_REF means that access to object
++   is expected to be in form of MEM_REF expression.  */
++
++static bool
++extract_mem_content (struct ipa_func_body_info *fbi,
++		     gimple *stmt, tree base, bool check_ref,
++		     struct ipa_known_agg_contents_list *content)
++{
++  HOST_WIDE_INT lhs_offset, lhs_size;
++  bool reverse;
++
++  if (!is_gimple_assign (stmt))
++    return false;
++
++  tree lhs = gimple_assign_lhs (stmt);
++  tree lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset, &lhs_size,
++					       &reverse);
++  if (!lhs_base)
++    return false;
++
++  if (check_ref)
++    {
++      if (TREE_CODE (lhs_base) != MEM_REF
++	  || TREE_OPERAND (lhs_base, 0) != base
++	  || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
++	return false;
++    }
++  else if (lhs_base != base)
++    return false;
++
++  content->offset = lhs_offset;
++  content->size = lhs_size;
++  content->type = TREE_TYPE (lhs);
++  content->next = NULL;
++
++  analyze_agg_content_value (fbi, &content->value, stmt);
++  return true;
+ }
+ 
+ /* Traverse statements from CALL backwards, scanning whether an aggregate given
+-   in ARG is filled in with constant values.  ARG can either be an aggregate
+-   expression or a pointer to an aggregate.  ARG_TYPE is the type of the
+-   aggregate.  JFUNC is the jump function into which the constants are
+-   subsequently stored.  */
++   in ARG is filled in constants or values that are derived from caller's
++   formal parameter in the way described by some kinds of jump functions.  FBI
++   is the context of the caller function for interprocedural analysis.  ARG can
++   either be an aggregate expression or a pointer to an aggregate.  ARG_TYPE is
++   the type of the aggregate, JFUNC is the jump function for the aggregate.  */
+ 
+ static void
+-determine_locally_known_aggregate_parts (gcall *call, tree arg,
+-					 tree arg_type,
+-					 struct ipa_jump_func *jfunc)
+-{
+-  struct ipa_known_agg_contents_list *list = NULL;
+-  int item_count = 0, const_count = 0;
++determine_known_aggregate_parts (struct ipa_func_body_info *fbi,
++				 gcall *call, tree arg,
++				 tree arg_type,
++				 struct ipa_jump_func *jfunc)
++{
++  struct ipa_known_agg_contents_list *list = NULL, *all_list = NULL;
++  bitmap visited = NULL;
++  int item_count = 0, value_count = 0;
+   HOST_WIDE_INT arg_offset, arg_size;
+-  gimple_stmt_iterator gsi;
+   tree arg_base;
+   bool check_ref, by_ref;
+   ao_ref r;
+ 
+-  if (PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0)
++  if ( PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS) == 0)
+     return;
+ 
+   /* The function operates in three stages.  First, we prepare check_ref, r,
+@@ -1606,91 +1919,73 @@ determine_locally_known_aggregate_parts
+       ao_ref_init (&r, arg);
+     }
+ 
+-  /* Second stage walks back the BB, looks at individual statements and as long
+-     as it is confident of how the statements affect contents of the
+-     aggregates, it builds a sorted linked list of ipa_agg_jf_list structures
+-     describing it.  */
+-  gsi = gsi_for_stmt (call);
+-  gsi_prev (&gsi);
+-  for (; !gsi_end_p (gsi); gsi_prev (&gsi))
+-    {
+-      struct ipa_known_agg_contents_list *n, **p;
+-      gimple *stmt = gsi_stmt (gsi);
+-      HOST_WIDE_INT lhs_offset, lhs_size;
+-      tree lhs, rhs, lhs_base;
+-      bool reverse;
+-
+-      if (!stmt_may_clobber_ref_p_1 (stmt, &r))
+-	continue;
+-      if (!gimple_assign_single_p (stmt))
+-	break;
+-
+-      lhs = gimple_assign_lhs (stmt);
+-      rhs = gimple_assign_rhs1 (stmt);
+-      if (!is_gimple_reg_type (TREE_TYPE (rhs))
+-	  || TREE_CODE (lhs) == BIT_FIELD_REF
+-	  || contains_bitfld_component_ref_p (lhs))
+-	break;
++  /* Second stage traverses virtual SSA web backwards starting from the call
++     statement, only looks at individual dominating virtual operand (its
++     definition dominates the call), as long as it is confident that content
++     of the aggregate is affected by definition of the virtual operand, it
++     builds a sorted linked list of ipa_agg_jf_list describing that.  */
+ 
+-      lhs_base = get_ref_base_and_extent_hwi (lhs, &lhs_offset,
+-					      &lhs_size, &reverse);
+-      if (!lhs_base)
+-	break;
++  for (tree dom_vuse = gimple_vuse (call); dom_vuse;)
++    {
++      gimple *stmt = SSA_NAME_DEF_STMT (dom_vuse);
+ 
+-      if (check_ref)
++      if (gimple_code (stmt) == GIMPLE_PHI)
+ 	{
+-	  if (TREE_CODE (lhs_base) != MEM_REF
+-	      || TREE_OPERAND (lhs_base, 0) != arg_base
+-	      || !integer_zerop (TREE_OPERAND (lhs_base, 1)))
+-	    break;
++	  dom_vuse = get_continuation_for_phi (stmt, &r, true,
++					       fbi->aa_walk_budget,
++					       &visited, false, NULL, NULL);
++	  continue;
+ 	}
+-      else if (lhs_base != arg_base)
++
++      if (stmt_may_clobber_ref_p_1 (stmt, &r))
+ 	{
+-	  if (DECL_P (lhs_base))
+-	    continue;
+-	  else
++	  struct ipa_known_agg_contents_list *content
++			= XALLOCA (struct ipa_known_agg_contents_list);
++
++	  if (!extract_mem_content (fbi, stmt, arg_base, check_ref, content))
+ 	    break;
+-	}
+ 
+-      bool already_there = false;
+-      p = get_place_in_agg_contents_list (&list, lhs_offset, lhs_size,
+-					  &already_there);
+-      if (!p)
+-	break;
+-      if (already_there)
+-	continue;
++	  /* Now we get a dominating virtual operand, and need to check
++	     whether its value is clobbered any other dominating one.  */
++	  if ((content->value.pass_through.formal_id >= 0
++	       || content->value.pass_through.operand)
++	      && !clobber_by_agg_contents_list_p (all_list, content))
++	    {
++	      struct ipa_known_agg_contents_list *copy
++			= XALLOCA (struct ipa_known_agg_contents_list);
+ 
+-      rhs = get_ssa_def_if_simple_copy (rhs);
+-      n = XALLOCA (struct ipa_known_agg_contents_list);
+-      n->size = lhs_size;
+-      n->offset = lhs_offset;
+-      if (is_gimple_ip_invariant (rhs))
+-	{
+-	  n->constant = rhs;
+-	  const_count++;
++	      /* Add to the list consisting of only dominating virtual
++		 operands, whose definitions can finally reach the call.  */
++	      add_to_agg_contents_list (&list, (*copy = *content, copy));
++
++	      if (++value_count ==  PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
++		break;
++	    }
++
++	  /* Add to the list consisting of all dominating virtual operands.  */
++	  add_to_agg_contents_list (&all_list, content);
++
++	  if (++item_count == 2 *  PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
++	    break;
+ 	}
+-      else
+-	n->constant = NULL_TREE;
+-      n->next = *p;
+-      *p = n;
+-
+-      item_count++;
+-      if (const_count == PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS)
+-	  || item_count == 2 * PARAM_VALUE (PARAM_IPA_MAX_AGG_ITEMS))
+-	break;
+-    }
++      dom_vuse = gimple_vuse (stmt);
++   }
++
++  if (visited)
++    BITMAP_FREE (visited);
+ 
+   /* Third stage just goes over the list and creates an appropriate vector of
+-     ipa_agg_jf_item structures out of it, of sourse only if there are
+-     any known constants to begin with.  */
++     ipa_agg_jf_item structures out of it, of course only if there are
++     any meaningful items to begin with.  */
+ 
+-  if (const_count)
++  if (value_count)
+     {
+       jfunc->agg.by_ref = by_ref;
+-      build_agg_jump_func_from_list (list, const_count, arg_offset, jfunc);
++      build_agg_jump_func_from_list (list, value_count, arg_offset, jfunc);
+     }
+ }
+ 
++
+ /* Return the Ith param type of callee associated with call graph
+    edge E.  */
+ 
+@@ -1797,7 +2092,7 @@ ipa_set_jfunc_vr (ipa_jump_func *jf, enu
+   jf->m_vr = ipa_get_value_range (type, min, max);
+ }
+ 
+-/* Assign to JF a pointer to a value_range just liek TMP but either fetch a
++/* Assign to JF a pointer to a value_range just like TMP but either fetch a
+    copy from ipa_vr_hash_table or allocate a new on in GC memory.  */
+ 
+ static void
+@@ -1814,8 +2109,8 @@ static void
+ ipa_compute_jump_functions_for_edge (struct ipa_func_body_info *fbi,
+ 				     struct cgraph_edge *cs)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (cs->caller);
+-  struct ipa_edge_args *args = IPA_EDGE_REF (cs);
++  class ipa_node_params *info = IPA_NODE_REF (cs->caller);
++  class ipa_edge_args *args = IPA_EDGE_REF_GET_CREATE (cs);
+   gcall *call = cs->call_stmt;
+   int n, arg_num = gimple_call_num_args (call);
+   bool useful_context = false;
+@@ -1839,7 +2134,7 @@ ipa_compute_jump_functions_for_edge (str
+       if (flag_devirtualize && POINTER_TYPE_P (TREE_TYPE (arg)))
+ 	{
+ 	  tree instance;
+-	  struct ipa_polymorphic_call_context context (cs->caller->decl,
++	  class ipa_polymorphic_call_context context (cs->caller->decl,
+ 						       arg, cs->call_stmt,
+ 						       &instance);
+ 	  context.get_dynamic_type (instance, arg, NULL, cs->call_stmt,
+@@ -1978,7 +2273,7 @@ ipa_compute_jump_functions_for_edge (str
+ 	      || !ipa_get_jf_ancestor_agg_preserved (jfunc))
+ 	  && (AGGREGATE_TYPE_P (TREE_TYPE (arg))
+ 	      || POINTER_TYPE_P (param_type)))
+-	determine_locally_known_aggregate_parts (call, arg, param_type, jfunc);
++	determine_known_aggregate_parts (fbi, call, arg, param_type, jfunc);
+     }
+   if (!useful_context)
+     vec_free (args->polymorphic_call_contexts);
+@@ -2076,11 +2371,12 @@ ipa_is_ssa_with_stmt_def (tree t)
+ 
+ /* Find the indirect call graph edge corresponding to STMT and mark it as a
+    call to a parameter number PARAM_INDEX.  NODE is the caller.  Return the
+-   indirect call graph edge.  */
++   indirect call graph edge.
++   If POLYMORPHIC is true record is as a destination of polymorphic call.  */
+ 
+ static struct cgraph_edge *
+ ipa_note_param_call (struct cgraph_node *node, int param_index,
+-		     gcall *stmt)
++		     gcall *stmt, bool polymorphic)
+ {
+   struct cgraph_edge *cs;
+ 
+@@ -2089,6 +2385,11 @@ ipa_note_param_call (struct cgraph_node
+   cs->indirect_info->agg_contents = 0;
+   cs->indirect_info->member_ptr = 0;
+   cs->indirect_info->guaranteed_unmodified = 0;
++  ipa_set_param_used_by_indirect_call (IPA_NODE_REF (node),
++					  param_index, true);
++  if (cs->indirect_info->polymorphic || polymorphic)
++    ipa_set_param_used_by_polymorphic_call
++	    (IPA_NODE_REF (node), param_index, true);
+   return cs;
+ }
+ 
+@@ -2155,7 +2456,7 @@ static void
+ ipa_analyze_indirect_call_uses (struct ipa_func_body_info *fbi, gcall *call,
+ 				tree target)
+ {
+-  struct ipa_node_params *info = fbi->info;
++  class ipa_node_params *info = fbi->info;
+   HOST_WIDE_INT offset;
+   bool by_ref;
+ 
+@@ -2164,7 +2465,7 @@ ipa_analyze_indirect_call_uses (struct i
+       tree var = SSA_NAME_VAR (target);
+       int index = ipa_get_param_decl_index (info, var);
+       if (index >= 0)
+-	ipa_note_param_call (fbi->node, index, call);
++	ipa_note_param_call (fbi->node, index, call, false);
+       return;
+     }
+ 
+@@ -2176,7 +2477,8 @@ ipa_analyze_indirect_call_uses (struct i
+ 				 gimple_assign_rhs1 (def), &index, &offset,
+ 				 NULL, &by_ref, &guaranteed_unmodified))
+     {
+-      struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call);
++      struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index,
++	 					    call, false);
+       cs->indirect_info->offset = offset;
+       cs->indirect_info->agg_contents = 1;
+       cs->indirect_info->by_ref = by_ref;
+@@ -2277,7 +2579,8 @@ ipa_analyze_indirect_call_uses (struct i
+   if (index >= 0
+       && parm_preserved_before_stmt_p (fbi, index, call, rec))
+     {
+-      struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call);
++      struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index,
++	 					    call, false);
+       cs->indirect_info->offset = offset;
+       cs->indirect_info->agg_contents = 1;
+       cs->indirect_info->member_ptr = 1;
+@@ -2306,7 +2609,7 @@ ipa_analyze_virtual_call_uses (struct ip
+   if (TREE_CODE (obj) != SSA_NAME)
+     return;
+ 
+-  struct ipa_node_params *info = fbi->info;
++  class ipa_node_params *info = fbi->info;
+   if (SSA_NAME_IS_DEFAULT_DEF (obj))
+     {
+       struct ipa_jump_func jfunc;
+@@ -2337,8 +2640,9 @@ ipa_analyze_virtual_call_uses (struct ip
+ 	return;
+     }
+ 
+-  struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index, call);
+-  struct cgraph_indirect_call_info *ii = cs->indirect_info;
++  struct cgraph_edge *cs = ipa_note_param_call (fbi->node, index,
++     						call, true);
++  class cgraph_indirect_call_info *ii = cs->indirect_info;
+   ii->offset = anc_offset;
+   ii->otr_token = tree_to_uhwi (OBJ_TYPE_REF_TOKEN (target));
+   ii->otr_type = obj_type_ref_class (target);
+@@ -2410,7 +2714,7 @@ ipa_analyze_stmt_uses (struct ipa_func_b
+ static bool
+ visit_ref_for_mod_analysis (gimple *, tree op, tree, void *data)
+ {
+-  struct ipa_node_params *info = (struct ipa_node_params *) data;
++  class ipa_node_params *info = (class ipa_node_params *) data;
+ 
+   op = get_base_address (op);
+   if (op
+@@ -2458,7 +2762,7 @@ ipa_analyze_params_uses_in_bb (struct ip
+ static void
+ ipa_analyze_controlled_uses (struct cgraph_node *node)
+ {
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+ 
+   for (int i = 0; i < ipa_get_param_count (info); i++)
+     {
+@@ -2550,11 +2854,11 @@ void
+ ipa_analyze_node (struct cgraph_node *node)
+ {
+   struct ipa_func_body_info fbi;
+-  struct ipa_node_params *info;
++  class ipa_node_params *info;
+ 
+   ipa_check_create_node_params ();
+   ipa_check_create_edge_args ();
+-  info = IPA_NODE_REF (node);
++  info = IPA_NODE_REF_GET_CREATE (node);
+ 
+   if (info->analysis_done)
+     return;
+@@ -2610,22 +2914,96 @@ static void
+ update_jump_functions_after_inlining (struct cgraph_edge *cs,
+ 				      struct cgraph_edge *e)
+ {
+-  struct ipa_edge_args *top = IPA_EDGE_REF (cs);
+-  struct ipa_edge_args *args = IPA_EDGE_REF (e);
++  class ipa_edge_args *top = IPA_EDGE_REF (cs);
++  class ipa_edge_args *args = IPA_EDGE_REF (e);
++  if (!args)
++    return;
+   int count = ipa_get_cs_argument_count (args);
+   int i;
+ 
+   for (i = 0; i < count; i++)
+     {
+       struct ipa_jump_func *dst = ipa_get_ith_jump_func (args, i);
+-      struct ipa_polymorphic_call_context *dst_ctx
++      class ipa_polymorphic_call_context *dst_ctx
+ 	= ipa_get_ith_polymorhic_call_context (args, i);
+ 
++      if (dst->agg.items)
++	{
++	  struct ipa_agg_jf_item *item;
++	  int j;
++
++	  FOR_EACH_VEC_ELT (*dst->agg.items, j, item)
++	    {
++	      int dst_fid;
++	      struct ipa_jump_func *src;
++
++	      if (item->jftype != IPA_JF_PASS_THROUGH
++		  && item->jftype != IPA_JF_LOAD_AGG)
++		continue;
++
++	      dst_fid = item->value.pass_through.formal_id;
++	      if (!top || dst_fid >= ipa_get_cs_argument_count (top))
++		{
++		  item->jftype = IPA_JF_UNKNOWN;
++		  continue;
++		}
++
++	      item->value.pass_through.formal_id = -1;
++	      src = ipa_get_ith_jump_func (top, dst_fid);
++	      if (src->type == IPA_JF_CONST)
++		{
++		  if (item->jftype == IPA_JF_PASS_THROUGH
++		      && item->value.pass_through.operation == NOP_EXPR)
++		    {
++		      item->jftype = IPA_JF_CONST;
++		      item->value.constant = src->value.constant.value;
++		      continue;
++		    }
++		}
++	      else if (src->type == IPA_JF_PASS_THROUGH
++		       && src->value.pass_through.operation == NOP_EXPR)
++		{
++		  if (item->jftype == IPA_JF_PASS_THROUGH
++		      || !item->value.load_agg.by_ref
++		      || src->value.pass_through.agg_preserved)
++		    item->value.pass_through.formal_id
++				= src->value.pass_through.formal_id;
++		}
++	      else if (src->type == IPA_JF_ANCESTOR)
++		{
++		  if (item->jftype == IPA_JF_PASS_THROUGH)
++		    {
++		      if (!src->value.ancestor.offset)
++			item->value.pass_through.formal_id
++				= src->value.ancestor.formal_id;
++		    }
++		  else if (src->value.ancestor.agg_preserved)
++		    {
++		      gcc_checking_assert (item->value.load_agg.by_ref);
++
++		      item->value.pass_through.formal_id
++				 = src->value.ancestor.formal_id;
++		      item->value.load_agg.offset
++				+= src->value.ancestor.offset;
++		    }
++		}
++
++	      if (item->value.pass_through.formal_id < 0)
++		item->jftype = IPA_JF_UNKNOWN;
++	    }
++	}
++
++      if (!top)
++	{
++	  ipa_set_jf_unknown (dst);
++	  continue;
++	}
++
+       if (dst->type == IPA_JF_ANCESTOR)
+ 	{
+ 	  struct ipa_jump_func *src;
+ 	  int dst_fid = dst->value.ancestor.formal_id;
+-	  struct ipa_polymorphic_call_context *src_ctx
++	  class ipa_polymorphic_call_context *src_ctx
+ 	    = ipa_get_ith_polymorhic_call_context (top, dst_fid);
+ 
+ 	  /* Variable number of arguments can cause havoc if we try to access
+@@ -2641,7 +3019,7 @@ update_jump_functions_after_inlining (st
+ 
+ 	  if (src_ctx && !src_ctx->useless_p ())
+ 	    {
+-	      struct ipa_polymorphic_call_context ctx = *src_ctx;
++	      class ipa_polymorphic_call_context ctx = *src_ctx;
+ 
+ 	      /* TODO: Make type preserved safe WRT contexts.  */
+ 	      if (!ipa_get_jf_ancestor_type_preserved (dst))
+@@ -2660,8 +3038,11 @@ update_jump_functions_after_inlining (st
+ 		}
+ 	    }
+ 
+-	  if (src->agg.items
+-	      && (dst->value.ancestor.agg_preserved || !src->agg.by_ref))
++	  /* Parameter and argument in ancestor jump function must be pointer
++	     type, which means access to aggregate must be by-reference.  */
++	  gcc_assert (!src->agg.items || src->agg.by_ref);
++
++	  if (src->agg.items && dst->value.ancestor.agg_preserved)
+ 	    {
+ 	      struct ipa_agg_jf_item *item;
+ 	      int j;
+@@ -2705,18 +3086,18 @@ update_jump_functions_after_inlining (st
+ 	  /* We must check range due to calls with variable number of arguments
+ 	     and we cannot combine jump functions with operations.  */
+ 	  if (dst->value.pass_through.operation == NOP_EXPR
+-	      && (dst->value.pass_through.formal_id
++	      && (top && dst->value.pass_through.formal_id
+ 		  < ipa_get_cs_argument_count (top)))
+ 	    {
+ 	      int dst_fid = dst->value.pass_through.formal_id;
+ 	      src = ipa_get_ith_jump_func (top, dst_fid);
+ 	      bool dst_agg_p = ipa_get_jf_pass_through_agg_preserved (dst);
+-	      struct ipa_polymorphic_call_context *src_ctx
++	      class ipa_polymorphic_call_context *src_ctx
+ 		= ipa_get_ith_polymorhic_call_context (top, dst_fid);
+ 
+ 	      if (src_ctx && !src_ctx->useless_p ())
+ 		{
+-		  struct ipa_polymorphic_call_context ctx = *src_ctx;
++		  class ipa_polymorphic_call_context ctx = *src_ctx;
+ 
+ 		  /* TODO: Make type preserved safe WRT contexts.  */
+ 		  if (!ipa_get_jf_pass_through_type_preserved (dst))
+@@ -2856,7 +3237,7 @@ ipa_make_edge_direct_to_target (struct c
+ 
+   /* Because may-edges are not explicitely represented and vtable may be external,
+      we may create the first reference to the object in the unit.  */
+-  if (!callee || callee->global.inlined_to)
++  if (!callee || callee->inlined_to)
+     {
+ 
+       /* We are better to ensure we can refer to it.
+@@ -2909,7 +3290,7 @@ ipa_make_edge_direct_to_target (struct c
+ 
+   /* We cannot make edges to inline clones.  It is bug that someone removed
+      the cgraph node too early.  */
+-  gcc_assert (!callee->global.inlined_to);
++  gcc_assert (!callee->inlined_to);
+ 
+   if (dump_file && !unreachable)
+     {
+@@ -3059,18 +3440,19 @@ ipa_find_agg_cst_from_init (tree scalar,
+   return find_constructor_constant_at_offset (DECL_INITIAL (scalar), offset);
+ }
+ 
+-/* Retrieve value from aggregate jump function AGG or static initializer of
+-   SCALAR (which can be NULL) for the given OFFSET or return NULL if there is
+-   none.  BY_REF specifies whether the value has to be passed by reference or
+-   by value.  If FROM_GLOBAL_CONSTANT is non-NULL, then the boolean it points
+-   to is set to true if the value comes from an initializer of a constant.  */
++/* Retrieve value from AGG, a set of known offset/value for an aggregate or
++   static initializer of SCALAR (which can be NULL) for the given OFFSET or
++   return NULL if there is none.  BY_REF specifies whether the value has to be
++   passed by reference or by value.  If FROM_GLOBAL_CONSTANT is non-NULL, then
++   the boolean it points to is set to true if the value comes from an
++   initializer of a constant.  */
+ 
+ tree
+-ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *agg, tree scalar,
++ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar,
+ 			    HOST_WIDE_INT offset, bool by_ref,
+ 			    bool *from_global_constant)
+ {
+-  struct ipa_agg_jf_item *item;
++  struct ipa_agg_value *item;
+   int i;
+ 
+   if (scalar)
+@@ -3088,7 +3470,7 @@ ipa_find_agg_cst_for_param (struct ipa_a
+       || by_ref != agg->by_ref)
+     return NULL;
+ 
+-  FOR_EACH_VEC_SAFE_ELT (agg->items, i, item)
++  FOR_EACH_VEC_ELT (agg->items, i, item)
+     if (item->offset == offset)
+       {
+ 	/* Currently we do not have clobber values, return NULL for them once
+@@ -3184,12 +3566,14 @@ try_decrement_rdesc_refcount (struct ipa
+    pointer formal parameter described by jump function JFUNC.  TARGET_TYPE is
+    the type of the parameter to which the result of JFUNC is passed.  If it can
+    be determined, return the newly direct edge, otherwise return NULL.
+-   NEW_ROOT_INFO is the node info that JFUNC lattices are relative to.  */
++   NEW_ROOT and NEW_ROOT_INFO is the node and its info that JFUNC lattices are
++   relative to.  */
+ 
+ static struct cgraph_edge *
+ try_make_edge_direct_simple_call (struct cgraph_edge *ie,
+ 				  struct ipa_jump_func *jfunc, tree target_type,
+-				  struct ipa_node_params *new_root_info)
++				  struct cgraph_node *new_root,
++				  class ipa_node_params *new_root_info)
+ {
+   struct cgraph_edge *cs;
+   tree target;
+@@ -3198,10 +3582,14 @@ try_make_edge_direct_simple_call (struct
+   if (agg_contents)
+     {
+       bool from_global_constant;
+-      target = ipa_find_agg_cst_for_param (&jfunc->agg, scalar,
++      ipa_agg_value_set agg = ipa_agg_value_set_from_jfunc (new_root_info,
++							    new_root,
++							    &jfunc->agg);
++      target = ipa_find_agg_cst_for_param (&agg, scalar,
+ 					   ie->indirect_info->offset,
+ 					   ie->indirect_info->by_ref,
+ 					   &from_global_constant);
++      agg.release ();
+       if (target
+ 	  && !from_global_constant
+ 	  && !ie->indirect_info->guaranteed_unmodified)
+@@ -3255,12 +3643,16 @@ ipa_impossible_devirt_target (struct cgr
+    call based on a formal parameter which is described by jump function JFUNC
+    and if it can be determined, make it direct and return the direct edge.
+    Otherwise, return NULL.  CTX describes the polymorphic context that the
+-   parameter the call is based on brings along with it.  */
++   parameter the call is based on brings along with it.  NEW_ROOT and
++   NEW_ROOT_INFO is the node and its info that JFUNC lattices are relative
++   to.  */
+ 
+ static struct cgraph_edge *
+ try_make_edge_direct_virtual_call (struct cgraph_edge *ie,
+ 				   struct ipa_jump_func *jfunc,
+-				   struct ipa_polymorphic_call_context ctx)
++				   class ipa_polymorphic_call_context ctx,
++				   struct cgraph_node *new_root,
++				   class ipa_node_params *new_root_info)
+ {
+   tree target = NULL;
+   bool speculative = false;
+@@ -3278,9 +3670,13 @@ try_make_edge_direct_virtual_call (struc
+       unsigned HOST_WIDE_INT offset;
+       tree scalar = (jfunc->type == IPA_JF_CONST) ? ipa_get_jf_constant (jfunc)
+ 	: NULL;
+-      tree t = ipa_find_agg_cst_for_param (&jfunc->agg, scalar,
++      ipa_agg_value_set agg = ipa_agg_value_set_from_jfunc (new_root_info,
++							    new_root,
++							    &jfunc->agg);
++      tree t = ipa_find_agg_cst_for_param (&agg, scalar,
+ 					   ie->indirect_info->offset,
+ 					   true);
++      agg.release ();
+       if (t && vtable_pointer_value_to_vtable (t, &vtable, &offset))
+ 	{
+ 	  bool can_refer;
+@@ -3370,21 +3766,22 @@ update_indirect_edges_after_inlining (st
+ 				      struct cgraph_node *node,
+ 				      vec<cgraph_edge *> *new_edges)
+ {
+-  struct ipa_edge_args *top;
++  class ipa_edge_args *top;
+   struct cgraph_edge *ie, *next_ie, *new_direct_edge;
+-  struct ipa_node_params *new_root_info, *inlined_node_info;
++  struct cgraph_node *new_root;
++  class ipa_node_params *new_root_info, *inlined_node_info;
+   bool res = false;
+ 
+   ipa_check_create_edge_args ();
+   top = IPA_EDGE_REF (cs);
+-  new_root_info = IPA_NODE_REF (cs->caller->global.inlined_to
+-				? cs->caller->global.inlined_to
+-				: cs->caller);
++  new_root = cs->caller->inlined_to
++		? cs->caller->inlined_to : cs->caller;
++  new_root_info = IPA_NODE_REF (new_root);
+   inlined_node_info = IPA_NODE_REF (cs->callee->function_symbol ());
+ 
+   for (ie = node->indirect_calls; ie; ie = next_ie)
+     {
+-      struct cgraph_indirect_call_info *ici = ie->indirect_info;
++      class cgraph_indirect_call_info *ici = ie->indirect_info;
+       struct ipa_jump_func *jfunc;
+       int param_index;
+       cgraph_node *spec_target = NULL;
+@@ -3395,7 +3792,7 @@ update_indirect_edges_after_inlining (st
+ 	continue;
+ 
+       /* We must check range due to calls with variable number of arguments:  */
+-      if (ici->param_index >= ipa_get_cs_argument_count (top))
++      if (!top || ici->param_index >= ipa_get_cs_argument_count (top))
+ 	{
+ 	  ici->param_index = -1;
+ 	  continue;
+@@ -3418,13 +3815,16 @@ update_indirect_edges_after_inlining (st
+ 	{
+           ipa_polymorphic_call_context ctx;
+ 	  ctx = ipa_context_from_jfunc (new_root_info, cs, param_index, jfunc);
+-	  new_direct_edge = try_make_edge_direct_virtual_call (ie, jfunc, ctx);
++	  new_direct_edge = try_make_edge_direct_virtual_call (ie, jfunc, ctx,
++							       new_root,
++							       new_root_info);
+ 	}
+       else
+ 	{
+ 	  tree target_type =  ipa_get_type (inlined_node_info, param_index);
+ 	  new_direct_edge = try_make_edge_direct_simple_call (ie, jfunc,
+ 							      target_type,
++							      new_root,
+ 							      new_root_info);
+ 	}
+ 
+@@ -3470,6 +3870,11 @@ update_indirect_edges_after_inlining (st
+ 	      if (ici->polymorphic
+ 		  && !ipa_get_jf_pass_through_type_preserved (jfunc))
+ 		ici->vptr_changed = true;
++	      ipa_set_param_used_by_indirect_call (new_root_info,
++			     			   ici->param_index, true);
++	      if (ici->polymorphic)
++		ipa_set_param_used_by_polymorphic_call (new_root_info,
++						        ici->param_index, true);
+ 	    }
+ 	}
+       else if (jfunc->type == IPA_JF_ANCESTOR)
+@@ -3485,6 +3890,11 @@ update_indirect_edges_after_inlining (st
+ 	      if (ici->polymorphic
+ 		  && !ipa_get_jf_ancestor_type_preserved (jfunc))
+ 		ici->vptr_changed = true;
++	      ipa_set_param_used_by_indirect_call (new_root_info,
++			     			   ici->param_index, true);
++	      if (ici->polymorphic)
++		ipa_set_param_used_by_polymorphic_call (new_root_info,
++						        ici->param_index, true);
+ 	    }
+ 	}
+       else
+@@ -3541,13 +3951,18 @@ combine_controlled_uses_counters (int c,
+ static void
+ propagate_controlled_uses (struct cgraph_edge *cs)
+ {
+-  struct ipa_edge_args *args = IPA_EDGE_REF (cs);
+-  struct cgraph_node *new_root = cs->caller->global.inlined_to
+-    ? cs->caller->global.inlined_to : cs->caller;
+-  struct ipa_node_params *new_root_info = IPA_NODE_REF (new_root);
+-  struct ipa_node_params *old_root_info = IPA_NODE_REF (cs->callee);
++  class ipa_edge_args *args = IPA_EDGE_REF (cs);
++  if (!args)
++    return;
++  struct cgraph_node *new_root = cs->caller->inlined_to
++    ? cs->caller->inlined_to : cs->caller;
++  class ipa_node_params *new_root_info = IPA_NODE_REF (new_root);
++  class ipa_node_params *old_root_info = IPA_NODE_REF (cs->callee);
+   int count, i;
+ 
++  if (!old_root_info)
++    return;
++
+   count = MIN (ipa_get_cs_argument_count (args),
+ 	       ipa_get_param_count (old_root_info));
+   for (i = 0; i < count; i++)
+@@ -3608,9 +4023,9 @@ propagate_controlled_uses (struct cgraph
+ 		  gcc_checking_assert (ok);
+ 
+ 		  clone = cs->caller;
+-		  while (clone->global.inlined_to
+-			 && clone != rdesc->cs->caller
+-			 && IPA_NODE_REF (clone)->ipcp_orig_node)
++		  while (clone->inlined_to
++			 && clone->ipcp_clone
++			 && clone != rdesc->cs->caller)
+ 		    {
+ 		      struct ipa_ref *ref;
+ 		      ref = clone->find_reference (n, NULL, 0);
+@@ -3669,6 +4084,7 @@ ipa_propagate_indirect_call_infos (struc
+ 
+   propagate_controlled_uses (cs);
+   changed = propagate_info_to_inlined_callees (cs, cs->callee, new_edges);
++  ipa_node_params_sum->remove (cs->callee);
+ 
+   return changed;
+ }
+@@ -3830,16 +4246,16 @@ ipa_edge_args_sum_t::duplicate (cgraph_e
+ 		 We need to find the duplicate that refers to our tree of
+ 		 inline clones.  */
+ 
+-	      gcc_assert (dst->caller->global.inlined_to);
++	      gcc_assert (dst->caller->inlined_to);
+ 	      for (dst_rdesc = src_rdesc->next_duplicate;
+ 		   dst_rdesc;
+ 		   dst_rdesc = dst_rdesc->next_duplicate)
+ 		{
+ 		  struct cgraph_node *top;
+-		  top = dst_rdesc->cs->caller->global.inlined_to
+-		    ? dst_rdesc->cs->caller->global.inlined_to
++		  top = dst_rdesc->cs->caller->inlined_to
++		    ? dst_rdesc->cs->caller->inlined_to
+ 		    : dst_rdesc->cs->caller;
+-		  if (dst->caller->global.inlined_to == top)
++		  if (dst->caller->inlined_to == top)
+ 		    break;
+ 		}
+ 	      gcc_assert (dst_rdesc);
+@@ -3849,9 +4265,9 @@ ipa_edge_args_sum_t::duplicate (cgraph_e
+       else if (dst_jf->type == IPA_JF_PASS_THROUGH
+ 	       && src->caller == dst->caller)
+ 	{
+-	  struct cgraph_node *inline_root = dst->caller->global.inlined_to
+-	    ? dst->caller->global.inlined_to : dst->caller;
+-	  struct ipa_node_params *root_info = IPA_NODE_REF (inline_root);
++	  struct cgraph_node *inline_root = dst->caller->inlined_to
++	    ? dst->caller->inlined_to : dst->caller;
++	  class ipa_node_params *root_info = IPA_NODE_REF (inline_root);
+ 	  int idx = ipa_get_jf_pass_through_formal_id (dst_jf);
+ 
+ 	  int c = ipa_get_controlled_uses (root_info, idx);
+@@ -3995,7 +4411,7 @@ void
+ ipa_print_node_params (FILE *f, struct cgraph_node *node)
+ {
+   int i, count;
+-  struct ipa_node_params *info;
++  class ipa_node_params *info;
+ 
+   if (!node->definition)
+     return;
+@@ -4010,6 +4426,12 @@ ipa_print_node_params (FILE *f, struct c
+       ipa_dump_param (f, info, i);
+       if (ipa_is_param_used (info, i))
+ 	fprintf (f, " used");
++      if (ipa_is_param_used_by_ipa_predicates (info, i))
++	fprintf (f, " used_by_ipa_predicates");
++      if (ipa_is_param_used_by_indirect_call (info, i))
++	fprintf (f, " used_by_indirect_call");
++      if (ipa_is_param_used_by_polymorphic_call (info, i))
++	fprintf (f, " used_by_polymorphic_call");
+       c = ipa_get_controlled_uses (info, i);
+       if (c == IPA_UNDESCRIBED_USE)
+ 	fprintf (f, " undescribed_use");
+@@ -4104,6 +4526,8 @@ ipa_write_jump_function (struct output_b
+       bp_pack_value (&bp, jump_func->value.ancestor.agg_preserved, 1);
+       streamer_write_bitpack (&bp);
+       break;
++    default:
++      fatal_error (UNKNOWN_LOCATION, "invalid jump function in LTO stream");
+     }
+ 
+   count = vec_safe_length (jump_func->agg.items);
+@@ -4117,8 +4541,36 @@ ipa_write_jump_function (struct output_b
+ 
+   FOR_EACH_VEC_SAFE_ELT (jump_func->agg.items, i, item)
+     {
++      stream_write_tree (ob, item->type, true);
+       streamer_write_uhwi (ob, item->offset);
+-      stream_write_tree (ob, item->value, true);
++      streamer_write_uhwi (ob, item->jftype);
++      switch (item->jftype)
++	{
++	case IPA_JF_UNKNOWN:
++	  break;
++	case IPA_JF_CONST:
++	  stream_write_tree (ob, item->value.constant, true);
++	  break;
++	case IPA_JF_PASS_THROUGH:
++	case IPA_JF_LOAD_AGG:
++	  streamer_write_uhwi (ob, item->value.pass_through.operation);
++	  streamer_write_uhwi (ob, item->value.pass_through.formal_id);
++	  if (TREE_CODE_CLASS (item->value.pass_through.operation)
++							!= tcc_unary)
++	    stream_write_tree (ob, item->value.pass_through.operand, true);
++	  if (item->jftype == IPA_JF_LOAD_AGG)
++	    {
++	      stream_write_tree (ob, item->value.load_agg.type, true);
++	      streamer_write_uhwi (ob, item->value.load_agg.offset);
++	      bp = bitpack_create (ob->main_stream);
++	      bp_pack_value (&bp, item->value.load_agg.by_ref, 1);
++	      streamer_write_bitpack (&bp);
++	    }
++	  break;
++	default:
++	  fatal_error (UNKNOWN_LOCATION,
++		       "invalid jump function in LTO stream");
++	}
+     }
+ 
+   bp = bitpack_create (ob->main_stream);
+@@ -4143,10 +4595,10 @@ ipa_write_jump_function (struct output_b
+ /* Read in jump function JUMP_FUNC from IB.  */
+ 
+ static void
+-ipa_read_jump_function (struct lto_input_block *ib,
++ipa_read_jump_function (class lto_input_block *ib,
+ 			struct ipa_jump_func *jump_func,
+ 			struct cgraph_edge *cs,
+-			struct data_in *data_in,
++			class data_in *data_in,
+ 			bool prevails)
+ {
+   enum jump_func_type jftype;
+@@ -4215,8 +4667,39 @@ ipa_read_jump_function (struct lto_input
+   for (i = 0; i < count; i++)
+     {
+       struct ipa_agg_jf_item item;
++      item.type = stream_read_tree (ib, data_in);
+       item.offset = streamer_read_uhwi (ib);
+-      item.value = stream_read_tree (ib, data_in);
++      item.jftype = (enum jump_func_type) streamer_read_uhwi (ib);
++
++      switch (item.jftype)
++	{
++	case IPA_JF_UNKNOWN:
++	  break;
++	case IPA_JF_CONST:
++	  item.value.constant = stream_read_tree (ib, data_in);
++	  break;
++	case IPA_JF_PASS_THROUGH:
++	case IPA_JF_LOAD_AGG:
++	  operation = (enum tree_code) streamer_read_uhwi (ib);
++	  item.value.pass_through.operation = operation;
++	  item.value.pass_through.formal_id = streamer_read_uhwi (ib);
++	  if (TREE_CODE_CLASS (operation) == tcc_unary)
++	    item.value.pass_through.operand = NULL_TREE;
++	  else
++	    item.value.pass_through.operand = stream_read_tree (ib, data_in);
++	  if (item.jftype == IPA_JF_LOAD_AGG)
++	    {
++	      struct bitpack_d bp;
++	      item.value.load_agg.type = stream_read_tree (ib, data_in);
++	      item.value.load_agg.offset = streamer_read_uhwi (ib);
++	      bp = streamer_read_bitpack (ib);
++	      item.value.load_agg.by_ref = bp_unpack_value (&bp, 1);
++	    }
++	  break;
++	default:
++	  fatal_error (UNKNOWN_LOCATION,
++		       "invalid jump function in LTO stream");
++	}
+       if (prevails)
+         jump_func->agg.items->quick_push (item);
+     }
+@@ -4255,7 +4738,7 @@ static void
+ ipa_write_indirect_edge_info (struct output_block *ob,
+ 			      struct cgraph_edge *cs)
+ {
+-  struct cgraph_indirect_call_info *ii = cs->indirect_info;
++  class cgraph_indirect_call_info *ii = cs->indirect_info;
+   struct bitpack_d bp;
+ 
+   streamer_write_hwi (ob, ii->param_index);
+@@ -4284,11 +4767,12 @@ ipa_write_indirect_edge_info (struct out
+    relevant to indirect inlining from IB.  */
+ 
+ static void
+-ipa_read_indirect_edge_info (struct lto_input_block *ib,
+-			     struct data_in *data_in,
+-			     struct cgraph_edge *cs)
++ipa_read_indirect_edge_info (class lto_input_block *ib,
++			     class data_in *data_in,
++			     struct cgraph_edge *cs,
++			     class ipa_node_params *info)
+ {
+-  struct cgraph_indirect_call_info *ii = cs->indirect_info;
++  class cgraph_indirect_call_info *ii = cs->indirect_info;
+   struct bitpack_d bp;
+ 
+   ii->param_index = (int) streamer_read_hwi (ib);
+@@ -4309,6 +4793,14 @@ ipa_read_indirect_edge_info (struct lto_
+       ii->otr_type = stream_read_tree (ib, data_in);
+       ii->context.stream_in (ib, data_in);
+     }
++  if (info && ii->param_index >= 0)
++    {
++      if (ii->polymorphic)
++	ipa_set_param_used_by_polymorphic_call (info,
++						ii->param_index , true);
++      ipa_set_param_used_by_indirect_call (info,
++					   ii->param_index, true);
++    }
+ }
+ 
+ /* Stream out NODE info to OB.  */
+@@ -4318,7 +4810,7 @@ ipa_write_node_info (struct output_block
+ {
+   int node_ref;
+   lto_symtab_encoder_t encoder;
+-  struct ipa_node_params *info = IPA_NODE_REF (node);
++  class ipa_node_params *info = IPA_NODE_REF (node);
+   int j;
+   struct cgraph_edge *e;
+   struct bitpack_d bp;
+@@ -4345,7 +4837,13 @@ ipa_write_node_info (struct output_block
+     }
+   for (e = node->callees; e; e = e->next_callee)
+     {
+-      struct ipa_edge_args *args = IPA_EDGE_REF (e);
++      class ipa_edge_args *args = IPA_EDGE_REF (e);
++
++      if (!args)
++	{
++	  streamer_write_uhwi (ob, 0);
++	  continue;
++	}
+ 
+       streamer_write_uhwi (ob,
+ 			   ipa_get_cs_argument_count (args) * 2
+@@ -4359,16 +4857,20 @@ ipa_write_node_info (struct output_block
+     }
+   for (e = node->indirect_calls; e; e = e->next_callee)
+     {
+-      struct ipa_edge_args *args = IPA_EDGE_REF (e);
+-
+-      streamer_write_uhwi (ob,
+-			   ipa_get_cs_argument_count (args) * 2
+-  			   + (args->polymorphic_call_contexts != NULL));
+-      for (j = 0; j < ipa_get_cs_argument_count (args); j++)
++      class ipa_edge_args *args = IPA_EDGE_REF (e);
++      if (!args)
++	streamer_write_uhwi (ob, 0);
++      else
+ 	{
+-	  ipa_write_jump_function (ob, ipa_get_ith_jump_func (args, j));
+-	  if (args->polymorphic_call_contexts != NULL)
+-	    ipa_get_ith_polymorhic_call_context (args, j)->stream_out (ob);
++	  streamer_write_uhwi (ob,
++			       ipa_get_cs_argument_count (args) * 2
++			       + (args->polymorphic_call_contexts != NULL));
++	  for (j = 0; j < ipa_get_cs_argument_count (args); j++)
++	    {
++	      ipa_write_jump_function (ob, ipa_get_ith_jump_func (args, j));
++	      if (args->polymorphic_call_contexts != NULL)
++		ipa_get_ith_polymorhic_call_context (args, j)->stream_out (ob);
++	    }
+ 	}
+       ipa_write_indirect_edge_info (ob, e);
+     }
+@@ -4377,8 +4879,8 @@ ipa_write_node_info (struct output_block
+ /* Stream in edge E from IB.  */
+ 
+ static void
+-ipa_read_edge_info (struct lto_input_block *ib,
+-		    struct data_in *data_in,
++ipa_read_edge_info (class lto_input_block *ib,
++		    class data_in *data_in,
+ 		    struct cgraph_edge *e, bool prevails)
+ {
+   int count = streamer_read_uhwi (ib);
+@@ -4389,7 +4891,7 @@ ipa_read_edge_info (struct lto_input_blo
+     return;
+   if (prevails && e->possibly_call_in_translation_unit_p ())
+     {
+-      struct ipa_edge_args *args = IPA_EDGE_REF (e);
++      class ipa_edge_args *args = IPA_EDGE_REF_GET_CREATE (e);
+       vec_safe_grow_cleared (args->jump_functions, count);
+       if (contexts_computed)
+ 	vec_safe_grow_cleared (args->polymorphic_call_contexts, count);
+@@ -4411,7 +4913,7 @@ ipa_read_edge_info (struct lto_input_blo
+ 				  data_in, prevails);
+ 	  if (contexts_computed)
+ 	    {
+-	      struct ipa_polymorphic_call_context ctx;
++	      class ipa_polymorphic_call_context ctx;
+ 	      ctx.stream_in (ib, data_in);
+ 	    }
+ 	}
+@@ -4421,14 +4923,15 @@ ipa_read_edge_info (struct lto_input_blo
+ /* Stream in NODE info from IB.  */
+ 
+ static void
+-ipa_read_node_info (struct lto_input_block *ib, struct cgraph_node *node,
+-		    struct data_in *data_in)
++ipa_read_node_info (class lto_input_block *ib, struct cgraph_node *node,
++		    class data_in *data_in)
+ {
+   int k;
+   struct cgraph_edge *e;
+   struct bitpack_d bp;
+   bool prevails = node->prevailing_p ();
+-  struct ipa_node_params *info = prevails ? IPA_NODE_REF (node) : NULL;
++  class ipa_node_params *info = prevails
++				? IPA_NODE_REF_GET_CREATE (node) : NULL;
+ 
+   int param_count = streamer_read_uhwi (ib);
+   if (prevails)
+@@ -4468,7 +4971,7 @@ ipa_read_node_info (struct lto_input_blo
+   for (e = node->indirect_calls; e; e = e->next_callee)
+     {
+       ipa_read_edge_info (ib, data_in, e, prevails);
+-      ipa_read_indirect_edge_info (ib, data_in, e);
++      ipa_read_indirect_edge_info (ib, data_in, e, info);
+     }
+ }
+ 
+@@ -4525,7 +5028,7 @@ ipa_prop_read_section (struct lto_file_d
+   const int cfg_offset = sizeof (struct lto_function_header);
+   const int main_offset = cfg_offset + header->cfg_size;
+   const int string_offset = main_offset + header->main_size;
+-  struct data_in *data_in;
++  class data_in *data_in;
+   unsigned int i;
+   unsigned int count;
+ 
+@@ -4774,7 +5277,7 @@ read_replacements_section (struct lto_fi
+   const int cfg_offset = sizeof (struct lto_function_header);
+   const int main_offset = cfg_offset + header->cfg_size;
+   const int string_offset = main_offset + header->main_size;
+-  struct data_in *data_in;
++  class data_in *data_in;
+   unsigned int i;
+   unsigned int count;
+ 
+@@ -4888,7 +5391,8 @@ ipcp_modif_dom_walker::before_dom_childr
+       struct ipa_agg_replacement_value *v;
+       gimple *stmt = gsi_stmt (gsi);
+       tree rhs, val, t;
+-      HOST_WIDE_INT offset, size;
++      HOST_WIDE_INT offset;
++      poly_int64 size;
+       int index;
+       bool by_ref, vce;
+ 
+@@ -4923,7 +5427,8 @@ ipcp_modif_dom_walker::before_dom_childr
+ 	  break;
+       if (!v
+ 	  || v->by_ref != by_ref
+-	  || tree_to_shwi (TYPE_SIZE (TREE_TYPE (v->value))) != size)
++	  || maybe_ne (tree_to_poly_int64 (TYPE_SIZE (TREE_TYPE (v->value))),
++		       size))
+ 	continue;
+ 
+       gcc_checking_assert (is_gimple_ip_invariant (v->value));
+@@ -5194,4 +5699,12 @@ ipcp_transform_function (struct cgraph_n
+   return TODO_update_ssa_only_virtuals;
+ }
+ 
++
++/* Return true if OTHER describes same agg value.  */
++bool
++ipa_agg_value::equal_to (const ipa_agg_value &other)
++{
++  return offset == other.offset
++	 && operand_equal_p (value, other.value, 0);
++}
+ #include "gt-ipa-prop.h"
+diff -Nurp a/gcc/ipa-prop.h b/gcc/ipa-prop.h
+--- a/gcc/ipa-prop.h	2020-04-30 15:14:04.624000000 +0800
++++ b/gcc/ipa-prop.h	2020-04-30 15:14:56.696000000 +0800
+@@ -39,6 +39,15 @@ along with GCC; see the file COPYING3.
+                   argument.
+    Unknown      - neither of the above.
+ 
++   IPA_JF_LOAD_AGG is a compound pass-through jump function, in which primary
++   operation on formal parameter is memory dereference that loads a value from
++   a part of an aggregate, which is represented or pointed to by the formal
++   parameter.  Moreover, an additional unary/binary operation can be applied on
++   the loaded value, and final result is passed as actual argument of callee
++   (e.g. *(param_1(D) + 4) op 24 ).  It is meant to describe usage of aggregate
++   parameter or by-reference parameter referenced in argument passing, commonly
++   found in C++ and Fortran.
++
+    IPA_JF_ANCESTOR is a special pass-through jump function, which means that
+    the result is an address of a part of the object pointed to by the formal
+    parameter to which the function refers.  It is mainly intended to represent
+@@ -60,6 +69,7 @@ enum jump_func_type
+   IPA_JF_UNKNOWN = 0,  /* newly allocated and zeroed jump functions default */
+   IPA_JF_CONST,             /* represented by field costant */
+   IPA_JF_PASS_THROUGH,	    /* represented by field pass_through */
++  IPA_JF_LOAD_AGG,	    /* represented by field load_agg */
+   IPA_JF_ANCESTOR	    /* represented by field ancestor */
+ };
+ 
+@@ -97,6 +107,26 @@ struct GTY(()) ipa_pass_through_data
+   unsigned agg_preserved : 1;
+ };
+ 
++/* Structure holding data required to describe a load-value-from-aggregate
++   jump function.  */
++
++struct GTY(()) ipa_load_agg_data
++{
++  /* Inherit from pass through jump function, describing unary/binary
++     operation on the value loaded from aggregate that is represented or
++     pointed to by the formal parameter, specified by formal_id in this
++     pass_through jump function data structure.  */
++  struct ipa_pass_through_data pass_through;
++  /* Type of the value loaded from the aggregate.  */
++  tree type;
++  /* Offset at which the value is located within the aggregate.  */
++  HOST_WIDE_INT offset;
++  /* True if loaded by reference (the aggregate is pointed to by the formal
++     parameter) or false if loaded by value (the aggregate is represented
++     by the formal parameter).  */
++  bool by_ref;
++};
++
+ /* Structure holding data required to describe an ancestor pass-through
+    jump function.  */
+ 
+@@ -110,38 +140,139 @@ struct GTY(()) ipa_ancestor_jf_data
+   unsigned agg_preserved : 1;
+ };
+ 
+-/* An element in an aggegate part of a jump function describing a known value
+-   at a given offset.  When it is part of a pass-through jump function with
+-   agg_preserved set or an ancestor jump function with agg_preserved set, all
+-   unlisted positions are assumed to be preserved but the value can be a type
+-   node, which means that the particular piece (starting at offset and having
+-   the size of the type) is clobbered with an unknown value.  When
+-   agg_preserved is false or the type of the containing jump function is
+-   different, all unlisted parts are assumed to be unknown and all values must
+-   fulfill is_gimple_ip_invariant.  */
++/* A jump function for an aggregate part at a given offset, which describes how
++   it content value is generated.  All unlisted positions are assumed to have a
++   value defined in an unknown way.  */
+ 
+ struct GTY(()) ipa_agg_jf_item
+ {
+-  /* The offset at which the known value is located within the aggregate.  */
++  /* The offset for the aggregate part.  */
+   HOST_WIDE_INT offset;
+ 
+-  /* The known constant or type if this is a clobber.  */
+-  tree value;
+-};
++  /* Data type of the aggregate part.  */
++  tree type;
+ 
++  /* Jump function type.  */
++  enum jump_func_type jftype;
+ 
+-/* Aggregate jump function - i.e. description of contents of aggregates passed
+-   either by reference or value.  */
++  /* Represents a value of jump function. constant represents the actual constant
++     in constant jump function content.  pass_through is used only in simple pass
++     through jump function context.  load_agg is for load-value-from-aggregate
++     jump function context.  */
++  union jump_func_agg_value
++  {
++    tree GTY ((tag ("IPA_JF_CONST"))) constant;
++    struct ipa_pass_through_data GTY ((tag ("IPA_JF_PASS_THROUGH"))) pass_through;
++    struct ipa_load_agg_data GTY ((tag ("IPA_JF_LOAD_AGG"))) load_agg;
++  } GTY ((desc ("%1.jftype"))) value;
++};
++
++/* Jump functions describing a set of aggregate contents.  */
+ 
+ struct GTY(()) ipa_agg_jump_function
+ {
+-  /* Description of the individual items.  */
++  /* Description of the individual jump function item.  */
+   vec<ipa_agg_jf_item, va_gc> *items;
+-  /* True if the data was passed by reference (as opposed to by value). */
++  /* True if the data was passed by reference (as opposed to by value).  */
++  bool by_ref;
++};
++
++/* An element in an aggregate part describing a known value at a given offset.
++   All unlisted positions are assumed to be unknown and all listed values must
++   fulfill is_gimple_ip_invariant.  */
++
++struct ipa_agg_value
++{
++  /* The offset at which the known value is located within the aggregate.  */
++  HOST_WIDE_INT offset;
++
++  /* The known constant.  */
++  tree value;
++
++  /* Return true if OTHER describes same agg value.  */
++  bool equal_to (const ipa_agg_value &other);
++};
++
++/* Structure describing a set of known offset/value for aggregate.  */
++
++struct ipa_agg_value_set
++{
++  /* Description of the individual item.  */
++  vec<ipa_agg_value> items;
++  /* True if the data was passed by reference (as opposed to by value).  */
+   bool by_ref;
++
++  /* Return true if OTHER describes same agg values.  */
++  bool equal_to (const ipa_agg_value_set &other)
++  {
++    if (by_ref != other.by_ref)
++      return false;
++    if (items.length () != other.items.length ())
++      return false;
++    for (unsigned int i = 0; i < items.length (); i++)
++      if (!items[i].equal_to (other.items[i]))
++	return false;
++    return true;
++  }
++
++  /* Return true if there is any value for aggregate.  */
++  bool is_empty () const
++  {
++    return items.is_empty ();
++  }
++
++  ipa_agg_value_set copy () const
++  {
++    ipa_agg_value_set new_copy;
++
++    new_copy.items = items.copy ();
++    new_copy.by_ref = by_ref;
++
++    return new_copy;
++  }
++
++  void release ()
++  {
++    items.release ();
++  }
+ };
+ 
+-typedef struct ipa_agg_jump_function *ipa_agg_jump_function_p;
++/* Return copy of a vec<ipa_agg_value_set>.  */
++
++static inline vec<ipa_agg_value_set>
++ipa_copy_agg_values (const vec<ipa_agg_value_set> &aggs)
++{
++  vec<ipa_agg_value_set> aggs_copy = vNULL;
++
++  if (!aggs.is_empty ())
++    {
++      ipa_agg_value_set *agg;
++      int i;
++
++      aggs_copy.reserve_exact (aggs.length ());
++
++      FOR_EACH_VEC_ELT (aggs, i, agg)
++	aggs_copy.quick_push (agg->copy ());
++    }
++
++  return aggs_copy;
++}
++
++/* For vec<ipa_agg_value_set>, DO NOT call release(), use below function
++   instead.  Because ipa_agg_value_set contains a field of vector type, we
++   should release this child vector in each element before reclaiming the
++   whole vector.  */
++
++static inline void
++ipa_release_agg_values (vec<ipa_agg_value_set> &aggs)
++{
++  ipa_agg_value_set *agg;
++  int i;
++
++  FOR_EACH_VEC_ELT (aggs, i, agg)
++    agg->release ();
++  aggs.release ();
++}
+ 
+ /* Information about zero/non-zero bits.  */
+ struct GTY(()) ipa_bits
+@@ -170,19 +301,19 @@ struct GTY(()) ipa_vr
+    types of jump functions supported.  */
+ struct GTY (()) ipa_jump_func
+ {
+-  /* Aggregate contants description.  See struct ipa_agg_jump_function and its
+-     description.  */
++  /* Aggregate jump function description.  See struct ipa_agg_jump_function
++     and its description.  */
+   struct ipa_agg_jump_function agg;
+ 
+   /* Information about zero/non-zero bits.  The pointed to structure is shared
+      betweed different jump functions.  Use ipa_set_jfunc_bits to set this
+      field.  */
+-  struct ipa_bits *bits;
++  class ipa_bits *bits;
+ 
+   /* Information about value range, containing valid data only when vr_known is
+      true.  The pointed to structure is shared betweed different jump
+      functions.  Use ipa_set_jfunc_vr to set this field.  */
+-  struct value_range_base *m_vr;
++  class value_range_base *m_vr;
+ 
+   enum jump_func_type type;
+   /* Represents a value of a jump function.  pass_through is used only in jump
+@@ -310,9 +441,12 @@ struct GTY(()) ipa_param_descriptor
+      says how many there are.  If any use could not be described by means of
+      ipa-prop structures, this is IPA_UNDESCRIBED_USE.  */
+   int controlled_uses;
+-  unsigned int move_cost : 31;
++  unsigned int move_cost : 28;
+   /* The parameter is used.  */
+   unsigned used : 1;
++  unsigned used_by_ipa_predicates : 1;
++  unsigned used_by_indirect_call : 1;
++  unsigned used_by_polymorphic_call : 1;
+ };
+ 
+ /* ipa_node_params stores information related to formal parameters of functions
+@@ -332,7 +466,7 @@ struct GTY((for_user)) ipa_node_params
+   vec<ipa_param_descriptor, va_gc> *descriptors;
+   /* Pointer to an array of structures describing individual formal
+      parameters.  */
+-  struct ipcp_param_lattices * GTY((skip)) lattices;
++  class ipcp_param_lattices * GTY((skip)) lattices;
+   /* Only for versioned nodes this field would not be NULL,
+      it points to the node that IPA cp cloned from.  */
+   struct cgraph_node * GTY((skip)) ipcp_orig_node;
+@@ -357,6 +491,8 @@ struct GTY((for_user)) ipa_node_params
+   unsigned node_dead : 1;
+   /* Node is involved in a recursion, potentionally indirect.  */
+   unsigned node_within_scc : 1;
++  /* Node contains only direct recursion.  */
++  unsigned node_is_self_scc : 1;
+   /* Node is calling a private function called only once.  */
+   unsigned node_calling_single_call : 1;
+   /* False when there is something makes versioning impossible.  */
+@@ -420,7 +556,7 @@ struct ipa_func_body_info
+   cgraph_node *node;
+ 
+   /* Its info.  */
+-  struct ipa_node_params *info;
++  class ipa_node_params *info;
+ 
+   /* Information about individual BBs. */
+   vec<ipa_bb_info> bb_infos;
+@@ -439,7 +575,7 @@ struct ipa_func_body_info
+ /* Return the number of formal parameters. */
+ 
+ static inline int
+-ipa_get_param_count (struct ipa_node_params *info)
++ipa_get_param_count (class ipa_node_params *info)
+ {
+   return vec_safe_length (info->descriptors);
+ }
+@@ -450,10 +586,9 @@ ipa_get_param_count (struct ipa_node_par
+    WPA.  */
+ 
+ static inline tree
+-ipa_get_param (struct ipa_node_params *info, int i)
++ipa_get_param (class ipa_node_params *info, int i)
+ {
+   gcc_checking_assert (info->descriptors);
+-  gcc_checking_assert (!flag_wpa);
+   tree t = (*info->descriptors)[i].decl_or_type;
+   gcc_checking_assert (TREE_CODE (t) == PARM_DECL);
+   return t;
+@@ -463,7 +598,7 @@ ipa_get_param (struct ipa_node_params *i
+    to INFO if it is known or NULL if not.  */
+ 
+ static inline tree
+-ipa_get_type (struct ipa_node_params *info, int i)
++ipa_get_type (class ipa_node_params *info, int i)
+ {
+   if (vec_safe_length (info->descriptors) <= (unsigned) i)
+     return NULL;
+@@ -480,7 +615,7 @@ ipa_get_type (struct ipa_node_params *in
+    to INFO.  */
+ 
+ static inline int
+-ipa_get_param_move_cost (struct ipa_node_params *info, int i)
++ipa_get_param_move_cost (class ipa_node_params *info, int i)
+ {
+   gcc_checking_assert (info->descriptors);
+   return (*info->descriptors)[i].move_cost;
+@@ -490,17 +625,47 @@ ipa_get_param_move_cost (struct ipa_node
+    associated with INFO to VAL.  */
+ 
+ static inline void
+-ipa_set_param_used (struct ipa_node_params *info, int i, bool val)
++ipa_set_param_used (class ipa_node_params *info, int i, bool val)
+ {
+   gcc_checking_assert (info->descriptors);
+   (*info->descriptors)[i].used = val;
+ }
+ 
++/* Set the used_by_ipa_predicates flag corresponding to the Ith formal
++   parameter of the function associated with INFO to VAL.  */
++
++static inline void
++ipa_set_param_used_by_ipa_predicates (class ipa_node_params *info, int i, bool val)
++{
++  gcc_checking_assert (info->descriptors);
++  (*info->descriptors)[i].used_by_ipa_predicates = val;
++}
++
++/* Set the used_by_indirect_call flag corresponding to the Ith formal
++   parameter of the function associated with INFO to VAL.  */
++
++static inline void
++ipa_set_param_used_by_indirect_call (class ipa_node_params *info, int i, bool val)
++{
++  gcc_checking_assert (info->descriptors);
++  (*info->descriptors)[i].used_by_indirect_call = val;
++}
++
++/* Set the .used_by_polymorphic_call flag corresponding to the Ith formal
++   parameter of the function associated with INFO to VAL.  */
++
++static inline void
++ipa_set_param_used_by_polymorphic_call (class ipa_node_params *info, int i, bool val)
++{
++  gcc_checking_assert (info->descriptors);
++  (*info->descriptors)[i].used_by_polymorphic_call = val;
++}
++
+ /* Return how many uses described by ipa-prop a parameter has or
+    IPA_UNDESCRIBED_USE if there is a use that is not described by these
+    structures.  */
+ static inline int
+-ipa_get_controlled_uses (struct ipa_node_params *info, int i)
++ipa_get_controlled_uses (class ipa_node_params *info, int i)
+ {
+   /* FIXME: introducing speculation causes out of bounds access here.  */
+   if (vec_safe_length (info->descriptors) > (unsigned)i)
+@@ -511,7 +676,7 @@ ipa_get_controlled_uses (struct ipa_node
+ /* Set the controlled counter of a given parameter.  */
+ 
+ static inline void
+-ipa_set_controlled_uses (struct ipa_node_params *info, int i, int val)
++ipa_set_controlled_uses (class ipa_node_params *info, int i, int val)
+ {
+   gcc_checking_assert (info->descriptors);
+   (*info->descriptors)[i].controlled_uses = val;
+@@ -521,12 +686,42 @@ ipa_set_controlled_uses (struct ipa_node
+    function associated with INFO.  */
+ 
+ static inline bool
+-ipa_is_param_used (struct ipa_node_params *info, int i)
++ipa_is_param_used (class ipa_node_params *info, int i)
+ {
+   gcc_checking_assert (info->descriptors);
+   return (*info->descriptors)[i].used;
+ }
+ 
++/* Return the used_by_ipa_predicates flag corresponding to the Ith formal
++   parameter of the function associated with INFO.  */
++
++static inline bool
++ipa_is_param_used_by_ipa_predicates (class ipa_node_params *info, int i)
++{
++  gcc_checking_assert (info->descriptors);
++  return (*info->descriptors)[i].used_by_ipa_predicates;
++}
++
++/* Return the used_by_indirect_call flag corresponding to the Ith formal
++   parameter of the function associated with INFO.  */
++
++static inline bool
++ipa_is_param_used_by_indirect_call (class ipa_node_params *info, int i)
++{
++  gcc_checking_assert (info->descriptors);
++  return (*info->descriptors)[i].used_by_indirect_call;
++}
++
++/* Return the used_by_polymorphic_call flag corresponding to the Ith formal
++   parameter of the function associated with INFO.  */
++
++static inline bool
++ipa_is_param_used_by_polymorphic_call (class ipa_node_params *info, int i)
++{
++  gcc_checking_assert (info->descriptors);
++  return (*info->descriptors)[i].used_by_polymorphic_call;
++}
++
+ /* Information about replacements done in aggregates for a given node (each
+    node has its linked list).  */
+ struct GTY(()) ipa_agg_replacement_value
+@@ -590,7 +785,7 @@ class GTY((for_user)) ipa_edge_args
+ /* Return the number of actual arguments. */
+ 
+ static inline int
+-ipa_get_cs_argument_count (struct ipa_edge_args *args)
++ipa_get_cs_argument_count (class ipa_edge_args *args)
+ {
+   return vec_safe_length (args->jump_functions);
+ }
+@@ -600,15 +795,15 @@ ipa_get_cs_argument_count (struct ipa_ed
+    ipa_compute_jump_functions. */
+ 
+ static inline struct ipa_jump_func *
+-ipa_get_ith_jump_func (struct ipa_edge_args *args, int i)
++ipa_get_ith_jump_func (class ipa_edge_args *args, int i)
+ {
+   return &(*args->jump_functions)[i];
+ }
+ 
+ /* Returns a pointer to the polymorphic call context for the ith argument.
+    NULL if contexts are not computed.  */
+-static inline struct ipa_polymorphic_call_context *
+-ipa_get_ith_polymorhic_call_context (struct ipa_edge_args *args, int i)
++static inline class ipa_polymorphic_call_context *
++ipa_get_ith_polymorhic_call_context (class ipa_edge_args *args, int i)
+ {
+   if (!args->polymorphic_call_contexts)
+     return NULL;
+@@ -637,7 +832,12 @@ class GTY((user)) ipa_edge_args_sum_t :
+   ipa_edge_args_sum_t (symbol_table *table, bool ggc)
+     : call_summary<ipa_edge_args *> (table, ggc) { }
+ 
+-  /* Hook that is called by summary when an edge is duplicated.  */
++  void remove (cgraph_edge *edge)
++  {
++    call_summary <ipa_edge_args *>::remove (edge);
++  }
++
++  /* Hook that is called by summary when an edge is removed.  */
+   virtual void remove (cgraph_edge *cs, ipa_edge_args *args);
+   /* Hook that is called by summary when an edge is duplicated.  */
+   virtual void duplicate (cgraph_edge *src,
+@@ -675,8 +875,10 @@ extern GTY(()) function_summary <ipcp_tr
+ 
+ /* Return the associated parameter/argument info corresponding to the given
+    node/edge.  */
+-#define IPA_NODE_REF(NODE) (ipa_node_params_sum->get_create (NODE))
+-#define IPA_EDGE_REF(EDGE) (ipa_edge_args_sum->get_create (EDGE))
++#define IPA_NODE_REF(NODE) (ipa_node_params_sum->get (NODE))
++#define IPA_NODE_REF_GET_CREATE(NODE) (ipa_node_params_sum->get_create (NODE))
++#define IPA_EDGE_REF(EDGE) (ipa_edge_args_sum->get (EDGE))
++#define IPA_EDGE_REF_GET_CREATE(EDGE) (ipa_edge_args_sum->get_create (EDGE))
+ /* This macro checks validity of index returned by
+    ipa_get_param_decl_index function.  */
+ #define IS_VALID_JUMP_FUNC_INDEX(I) ((I) != -1)
+@@ -740,9 +942,9 @@ bool ipa_propagate_indirect_call_infos (
+ 
+ /* Indirect edge and binfo processing.  */
+ tree ipa_get_indirect_edge_target (struct cgraph_edge *ie,
+-				   vec<tree> ,
++				   vec<tree>,
+ 				   vec<ipa_polymorphic_call_context>,
+-				   vec<ipa_agg_jump_function_p>,
++				   vec<ipa_agg_value_set>,
+ 				   bool *);
+ struct cgraph_edge *ipa_make_edge_direct_to_target (struct cgraph_edge *, tree,
+ 						    bool speculative = false);
+@@ -755,13 +957,13 @@ ipa_bits *ipa_get_ipa_bits_for_value (co
+ void ipa_analyze_node (struct cgraph_node *);
+ 
+ /* Aggregate jump function related functions.  */
+-tree ipa_find_agg_cst_for_param (struct ipa_agg_jump_function *agg, tree scalar,
++tree ipa_find_agg_cst_for_param (struct ipa_agg_value_set *agg, tree scalar,
+ 				 HOST_WIDE_INT offset, bool by_ref,
+ 				 bool *from_global_constant = NULL);
+ bool ipa_load_from_parm_agg (struct ipa_func_body_info *fbi,
+ 			     vec<ipa_param_descriptor, va_gc> *descriptors,
+ 			     gimple *stmt, tree op, int *index_p,
+-			     HOST_WIDE_INT *offset_p, HOST_WIDE_INT *size_p,
++			     HOST_WIDE_INT *offset_p, poly_int64 *size_p,
+ 			     bool *by_ref, bool *guaranteed_unmodified = NULL);
+ 
+ /* Debugging interface.  */
+@@ -779,11 +981,11 @@ extern object_allocator<ipcp_value<ipa_p
+   ipcp_poly_ctx_values_pool;
+ 
+ template <typename valtype>
+-class ipcp_value_source;
++struct ipcp_value_source;
+ 
+ extern object_allocator<ipcp_value_source<tree> > ipcp_sources_pool;
+ 
+-class ipcp_agg_lattice;
++struct ipcp_agg_lattice;
+ 
+ extern object_allocator<ipcp_agg_lattice> ipcp_agg_lattice_pool;
+ 
+@@ -793,15 +995,18 @@ void ipa_prop_write_jump_functions (void
+ void ipa_prop_read_jump_functions (void);
+ void ipcp_write_transformation_summaries (void);
+ void ipcp_read_transformation_summaries (void);
+-int ipa_get_param_decl_index (struct ipa_node_params *, tree);
+-tree ipa_value_from_jfunc (struct ipa_node_params *info,
++int ipa_get_param_decl_index (class ipa_node_params *, tree);
++tree ipa_value_from_jfunc (class ipa_node_params *info,
+ 			   struct ipa_jump_func *jfunc, tree type);
+ unsigned int ipcp_transform_function (struct cgraph_node *node);
+ ipa_polymorphic_call_context ipa_context_from_jfunc (ipa_node_params *,
+ 						     cgraph_edge *,
+ 						     int,
+ 						     ipa_jump_func *);
+-void ipa_dump_param (FILE *, struct ipa_node_params *info, int i);
++ipa_agg_value_set ipa_agg_value_set_from_jfunc (ipa_node_params *,
++						cgraph_node *,
++						ipa_agg_jump_function *);
++void ipa_dump_param (FILE *, class ipa_node_params *info, int i);
+ void ipa_release_body_info (struct ipa_func_body_info *);
+ tree ipa_get_callee_param_type (struct cgraph_edge *e, int i);
+ 
+diff -Nurp a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
+--- a/gcc/ipa-pure-const.c	2020-04-30 15:14:04.600000000 +0800
++++ b/gcc/ipa-pure-const.c	2020-04-30 15:14:56.588000000 +0800
+@@ -1360,12 +1360,14 @@ ignore_edge_for_nothrow (struct cgraph_e
+     return true;
+ 
+   enum availability avail;
+-  cgraph_node *n = e->callee->function_or_virtual_thunk_symbol (&avail,
+-							        e->caller);
+-  if (avail <= AVAIL_INTERPOSABLE || TREE_NOTHROW (n->decl))
++  cgraph_node *ultimate_target
++    = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
++  if (avail <= AVAIL_INTERPOSABLE || TREE_NOTHROW (ultimate_target->decl))
+     return true;
+-  return opt_for_fn (e->callee->decl, flag_non_call_exceptions)
+-	 && !e->callee->binds_to_current_def_p (e->caller);
++  return ((opt_for_fn (e->callee->decl, flag_non_call_exceptions)
++	   && !e->callee->binds_to_current_def_p (e->caller))
++	  || !opt_for_fn (e->caller->decl, flag_ipa_pure_const)
++	  || !opt_for_fn (ultimate_target->decl, flag_ipa_pure_const));
+ }
+ 
+ /* Return true if NODE is self recursive function.
+@@ -1395,16 +1397,21 @@ cdtor_p (cgraph_node *n, void *)
+   return false;
+ }
+ 
+-/* We only propagate across edges with non-interposable callee.  */
++/* Skip edges from and to nodes without ipa_pure_const enabled.
++   Ignore not available symbols.  */
+ 
+ static bool
+ ignore_edge_for_pure_const (struct cgraph_edge *e)
+ {
+   enum availability avail;
+-  e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
+-  return (avail <= AVAIL_INTERPOSABLE);
+-}
++  cgraph_node *ultimate_target
++    = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
+ 
++  return (avail <= AVAIL_INTERPOSABLE
++	  || !opt_for_fn (e->caller->decl, flag_ipa_pure_const)
++	  || !opt_for_fn (ultimate_target->decl,
++			  flag_ipa_pure_const));
++}
+ 
+ /* Produce transitive closure over the callgraph and compute pure/const
+    attributes.  */
+@@ -1670,7 +1677,7 @@ propagate_pure_const (void)
+ 	  /* Inline clones share declaration with their offline copies;
+ 	     do not modify their declarations since the offline copy may
+ 	     be different.  */
+-	  if (!w->global.inlined_to)
++	  if (!w->inlined_to)
+ 	    switch (this_state)
+ 	      {
+ 	      case IPA_CONST:
+@@ -1831,7 +1838,7 @@ propagate_nothrow (void)
+ 	      /* Inline clones share declaration with their offline copies;
+ 		 do not modify their declarations since the offline copy may
+ 		 be different.  */
+-	      if (!w->global.inlined_to)
++	      if (!w->inlined_to)
+ 		{
+ 		  w->set_nothrow_flag (true);
+ 		  if (dump_file)
+@@ -1958,7 +1965,7 @@ propagate_malloc (void)
+ 	funct_state l = funct_state_summaries->get (node);
+ 	if (!node->alias
+ 	    && l->malloc_state == STATE_MALLOC
+-	    && !node->global.inlined_to)
++	    && !node->inlined_to)
+ 	  {
+ 	    if (dump_file && (dump_flags & TDF_DETAILS))
+ 	      fprintf (dump_file, "Function %s found to be malloc\n",
+diff -Nurp a/gcc/ipa-reference.c b/gcc/ipa-reference.c
+--- a/gcc/ipa-reference.c	2020-04-30 15:14:04.644000000 +0800
++++ b/gcc/ipa-reference.c	2020-04-30 15:14:56.588000000 +0800
+@@ -46,7 +46,6 @@ along with GCC; see the file COPYING3.
+ #include "cgraph.h"
+ #include "data-streamer.h"
+ #include "calls.h"
+-#include "splay-tree.h"
+ #include "ipa-utils.h"
+ #include "ipa-reference.h"
+ #include "symbol-summary.h"
+@@ -75,8 +74,8 @@ struct ipa_reference_global_vars_info_d
+ 
+ struct ipa_reference_optimization_summary_d
+ {
+-  bitmap statics_not_read;
+-  bitmap statics_not_written;
++  bitmap statics_read;
++  bitmap statics_written;
+ };
+ 
+ typedef ipa_reference_local_vars_info_d *ipa_reference_local_vars_info_t;
+@@ -92,14 +91,20 @@ struct ipa_reference_vars_info_d
+ 
+ typedef struct ipa_reference_vars_info_d *ipa_reference_vars_info_t;
+ 
+-/* This splay tree contains all of the static variables that are
++/* This map contains all of the static variables that are
+    being considered by the compilation level alias analysis.  */
+-static splay_tree reference_vars_to_consider;
++typedef hash_map<tree, int> reference_vars_map_t;
++static reference_vars_map_t *ipa_reference_vars_map;
++static int ipa_reference_vars_uids;
++static vec<tree> *reference_vars_to_consider;
++varpool_node_hook_list *varpool_node_hooks;
+ 
+ /* Set of all interesting module statics.  A bit is set for every module
+    static we are considering.  This is added to the local info when asm
+    code is found that clobbers all memory.  */
+ static bitmap all_module_statics;
++/* Zero bitmap.  */
++static bitmap no_module_statics;
+ /* Set of all statics that should be ignored because they are touched by
+    -fno-ipa-reference code.  */
+ static bitmap ignore_module_statics;
+@@ -136,6 +141,31 @@ public:
+ 
+ static ipa_ref_opt_summary_t *ipa_ref_opt_sum_summaries = NULL;
+ 
++/* Return ID used by ipa-reference bitmaps.  -1 if failed.  */
++int
++ipa_reference_var_uid (tree t)
++{
++  if (!ipa_reference_vars_map)
++    return -1;
++  int *id = ipa_reference_vars_map->get
++    (symtab_node::get (t)->ultimate_alias_target (NULL)->decl);
++  if (!id)
++    return -1;
++  return *id;
++}
++
++/* Return ID used by ipa-reference bitmaps.  Create new entry if
++   T is not in map.  Set EXISTED accordinly  */
++int
++ipa_reference_var_get_or_insert_uid (tree t, bool *existed)
++{
++  int &id = ipa_reference_vars_map->get_or_insert
++    (symtab_node::get (t)->ultimate_alias_target (NULL)->decl, existed);
++  if (!*existed)
++    id = ipa_reference_vars_uids++;
++  return id;
++}
++
+ /* Return the ipa_reference_vars structure starting from the cgraph NODE.  */
+ static inline ipa_reference_vars_info_t
+ get_reference_vars_info (struct cgraph_node *node)
+@@ -165,7 +195,7 @@ get_reference_optimization_summary (stru
+    NULL if no data is available.  */
+ 
+ bitmap
+-ipa_reference_get_not_read_global (struct cgraph_node *fn)
++ipa_reference_get_read_global (struct cgraph_node *fn)
+ {
+   if (!opt_for_fn (current_function_decl, flag_ipa_reference))
+     return NULL;
+@@ -180,10 +210,10 @@ ipa_reference_get_not_read_global (struc
+ 	  || (avail == AVAIL_INTERPOSABLE
+ 	      && flags_from_decl_or_type (fn->decl) & ECF_LEAF))
+       && opt_for_fn (fn2->decl, flag_ipa_reference))
+-    return info->statics_not_read;
++    return info->statics_read;
+   else if (avail == AVAIL_NOT_AVAILABLE
+ 	   && flags_from_decl_or_type (fn->decl) & ECF_LEAF)
+-    return all_module_statics;
++    return no_module_statics;
+   else
+     return NULL;
+ }
+@@ -194,7 +224,7 @@ ipa_reference_get_not_read_global (struc
+    call.  Returns NULL if no data is available.  */
+ 
+ bitmap
+-ipa_reference_get_not_written_global (struct cgraph_node *fn)
++ipa_reference_get_written_global (struct cgraph_node *fn)
+ {
+   if (!opt_for_fn (current_function_decl, flag_ipa_reference))
+     return NULL;
+@@ -209,10 +239,10 @@ ipa_reference_get_not_written_global (st
+ 	  || (avail == AVAIL_INTERPOSABLE
+ 	      && flags_from_decl_or_type (fn->decl) & ECF_LEAF))
+       && opt_for_fn (fn2->decl, flag_ipa_reference))
+-    return info->statics_not_written;
++    return info->statics_written;
+   else if (avail == AVAIL_NOT_AVAILABLE
+ 	   && flags_from_decl_or_type (fn->decl) & ECF_LEAF)
+-    return all_module_statics;
++    return no_module_statics;
+   else
+     return NULL;
+ }
+@@ -256,7 +286,9 @@ is_improper (symtab_node *n, void *v ATT
+ static inline bool
+ is_proper_for_analysis (tree t)
+ {
+-  if (bitmap_bit_p (ignore_module_statics, ipa_reference_var_uid (t)))
++  int id = ipa_reference_var_uid (t);
++
++  if (id != -1 && bitmap_bit_p (ignore_module_statics, id))
+     return false;
+ 
+   if (symtab_node::get (t)
+@@ -272,9 +304,7 @@ is_proper_for_analysis (tree t)
+ static const char *
+ get_static_name (int index)
+ {
+-  splay_tree_node stn =
+-    splay_tree_lookup (reference_vars_to_consider, index);
+-  return fndecl_name ((tree)(stn->value));
++  return fndecl_name ((*reference_vars_to_consider)[index]);
+ }
+ 
+ /* Dump a set of static vars to FILE.  */
+@@ -287,6 +317,8 @@ dump_static_vars_set_to_file (FILE *f, b
+     return;
+   else if (set == all_module_statics)
+     fprintf (f, "ALL");
++  else if (set == no_module_statics)
++    fprintf (f, "NO");
+   else
+     EXECUTE_IF_SET_IN_BITMAP (set, 0, index, bi)
+       {
+@@ -330,10 +362,12 @@ union_static_var_sets (bitmap &x, bitmap
+    But if SET is NULL or the maximum set, return that instead.  */
+ 
+ static bitmap
+-copy_static_var_set (bitmap set)
++copy_static_var_set (bitmap set, bool for_propagation)
+ {
+   if (set == NULL || set == all_module_statics)
+     return set;
++  if (!for_propagation && set == no_module_statics)
++    return set;
+   bitmap_obstack *o = set->obstack;
+   gcc_checking_assert (o);
+   bitmap copy = BITMAP_ALLOC (o);
+@@ -403,6 +437,14 @@ propagate_bits (ipa_reference_global_var
+     }
+ }
+ 
++/* Delete NODE from map.  */
++
++static void
++varpool_removal_hook (varpool_node *node, void *)
++{
++  ipa_reference_vars_map->remove (node->decl);
++}
++
+ static bool ipa_init_p = false;
+ 
+ /* The init routine for analyzing global static variable usage.  See
+@@ -415,22 +457,28 @@ ipa_init (void)
+ 
+   ipa_init_p = true;
+ 
+-  if (dump_file)
+-    reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0);
++  vec_alloc (reference_vars_to_consider, 10);
++
++
++  if (ipa_ref_opt_sum_summaries != NULL)
++    {
++      delete ipa_ref_opt_sum_summaries;
++      ipa_ref_opt_sum_summaries = NULL;
++      delete ipa_reference_vars_map;
++    }
++  ipa_reference_vars_map = new reference_vars_map_t(257);
++  varpool_node_hooks
++	 = symtab->add_varpool_removal_hook (varpool_removal_hook, NULL);
++  ipa_reference_vars_uids = 0;
+ 
+   bitmap_obstack_initialize (&local_info_obstack);
+   bitmap_obstack_initialize (&optimization_summary_obstack);
+   all_module_statics = BITMAP_ALLOC (&optimization_summary_obstack);
++  no_module_statics = BITMAP_ALLOC (&optimization_summary_obstack);
+   ignore_module_statics = BITMAP_ALLOC (&optimization_summary_obstack);
+ 
+   if (ipa_ref_var_info_summaries == NULL)
+     ipa_ref_var_info_summaries = new ipa_ref_var_info_summary_t (symtab);
+-
+-  if (ipa_ref_opt_sum_summaries != NULL)
+-    {
+-      delete ipa_ref_opt_sum_summaries;
+-      ipa_ref_opt_sum_summaries = NULL;
+-    }
+ }
+ 
+ 
+@@ -465,6 +513,8 @@ analyze_function (struct cgraph_node *fn
+   local = init_function_info (fn);
+   for (i = 0; fn->iterate_reference (i, ref); i++)
+     {
++      int id;
++      bool existed;
+       if (!is_a <varpool_node *> (ref->referred))
+ 	continue;
+       var = ref->referred->decl;
+@@ -472,23 +522,22 @@ analyze_function (struct cgraph_node *fn
+ 	continue;
+       /* This is a variable we care about.  Check if we have seen it
+ 	 before, and if not add it the set of variables we care about.  */
+-      if (all_module_statics
+-	  && bitmap_set_bit (all_module_statics, ipa_reference_var_uid (var)))
++      id = ipa_reference_var_get_or_insert_uid (var, &existed);
++      if (!existed)
+ 	{
++	  bitmap_set_bit (all_module_statics, id);
+ 	  if (dump_file)
+-	    splay_tree_insert (reference_vars_to_consider,
+-			       ipa_reference_var_uid (var),
+-			       (splay_tree_value)var);
++	    reference_vars_to_consider->safe_push (var);
+ 	}
+       switch (ref->use)
+ 	{
+ 	case IPA_REF_LOAD:
+-          bitmap_set_bit (local->statics_read, ipa_reference_var_uid (var));
++          bitmap_set_bit (local->statics_read, id);
+ 	  break;
+ 	case IPA_REF_STORE:
+ 	  if (ref->cannot_lead_to_return ())
+ 	    break;
+-          bitmap_set_bit (local->statics_written, ipa_reference_var_uid (var));
++          bitmap_set_bit (local->statics_written, id);
+ 	  break;
+ 	case IPA_REF_ADDR:
+ 	  break;
+@@ -510,10 +559,10 @@ ipa_ref_opt_summary_t::duplicate (cgraph
+ 				  ipa_reference_optimization_summary_d
+ 				  *dst_ginfo)
+ {
+-  dst_ginfo->statics_not_read =
+-    copy_static_var_set (ginfo->statics_not_read);
+-  dst_ginfo->statics_not_written =
+-    copy_static_var_set (ginfo->statics_not_written);
++  dst_ginfo->statics_read =
++    copy_static_var_set (ginfo->statics_read, false);
++  dst_ginfo->statics_written =
++    copy_static_var_set (ginfo->statics_written, false);
+ }
+ 
+ /* Called when node is removed.  */
+@@ -522,13 +571,15 @@ void
+ ipa_ref_opt_summary_t::remove (cgraph_node *,
+ 			       ipa_reference_optimization_summary_d *ginfo)
+ {
+-  if (ginfo->statics_not_read
+-      && ginfo->statics_not_read != all_module_statics)
+-    BITMAP_FREE (ginfo->statics_not_read);
+-
+-  if (ginfo->statics_not_written
+-      && ginfo->statics_not_written != all_module_statics)
+-    BITMAP_FREE (ginfo->statics_not_written);
++  if (ginfo->statics_read
++      && ginfo->statics_read != all_module_statics
++      && ginfo->statics_read != no_module_statics)
++    BITMAP_FREE (ginfo->statics_read);
++
++  if (ginfo->statics_written
++      && ginfo->statics_written != all_module_statics
++      && ginfo->statics_written != no_module_statics)
++    BITMAP_FREE (ginfo->statics_written);
+ }
+ 
+ /* Analyze each function in the cgraph to see which global or statics
+@@ -676,16 +727,23 @@ get_read_write_all_from_node (struct cgr
+       }
+ }
+ 
+-/* Skip edges from and to nodes without ipa_reference enables.  This leave
+-   them out of strongy connected coponents and makes them easyto skip in the
++/* Skip edges from and to nodes without ipa_reference enabled.
++   Ignore not available symbols.  This leave
++   them out of strongly connected components and makes them easy to skip in the
+    propagation loop bellow.  */
+ 
+ static bool
+ ignore_edge_p (cgraph_edge *e)
+ {
+-  return (!opt_for_fn (e->caller->decl, flag_ipa_reference)
+-          || !opt_for_fn (e->callee->function_symbol ()->decl,
+-			  flag_ipa_reference));
++  enum availability avail;
++  cgraph_node *ultimate_target
++    = e->callee->function_or_virtual_thunk_symbol (&avail, e->caller);
++
++  return (avail < AVAIL_INTERPOSABLE
++	  || (avail == AVAIL_INTERPOSABLE
++	      && !(flags_from_decl_or_type (e->callee->decl) & ECF_LEAF))
++	  || !opt_for_fn (e->caller->decl, flag_ipa_reference)
++          || !opt_for_fn (ultimate_target->decl, flag_ipa_reference));
+ }
+ 
+ /* Produce the global information by preforming a transitive closure
+@@ -753,11 +811,12 @@ propagate (void)
+       if (read_all)
+ 	node_g->statics_read = all_module_statics;
+       else
+-	node_g->statics_read = copy_static_var_set (node_l->statics_read);
++	node_g->statics_read = copy_static_var_set (node_l->statics_read, true);
+       if (write_all)
+ 	node_g->statics_written = all_module_statics;
+       else
+-	node_g->statics_written = copy_static_var_set (node_l->statics_written);
++	node_g->statics_written
++	  = copy_static_var_set (node_l->statics_written, true);
+ 
+       /* Merge the sets of this cycle with all sets of callees reached
+          from this cycle.  */
+@@ -841,12 +900,26 @@ propagate (void)
+       ipa_reference_vars_info_t node_info;
+       ipa_reference_global_vars_info_t node_g;
+ 
++      /* No need to produce summaries for inline clones.  */
++      if (node->inlined_to)
++	continue;
++
+       node_info = get_reference_vars_info (node);
+-      if (!node->alias && opt_for_fn (node->decl, flag_ipa_reference)
+-	  && (node->get_availability () > AVAIL_INTERPOSABLE
+-	      || (flags_from_decl_or_type (node->decl) & ECF_LEAF)))
++      if (!node->alias && opt_for_fn (node->decl, flag_ipa_reference))
+ 	{
+ 	  node_g = &node_info->global;
++	  bool read_all = 
++		(node_g->statics_read == all_module_statics
++		 || bitmap_equal_p (node_g->statics_read, all_module_statics));
++	  bool written_all = 
++		(node_g->statics_written == all_module_statics
++		 || bitmap_equal_p (node_g->statics_written,
++				    all_module_statics));
++
++	  /* There is no need to produce summary if we collected nothing
++	     useful.  */
++	  if (read_all && written_all)
++	    continue;
+ 
+ 	  ipa_reference_optimization_summary_d *opt
+ 	    = ipa_ref_opt_sum_summaries->get_create (node);
+@@ -854,27 +927,25 @@ propagate (void)
+ 	  /* Create the complimentary sets.  */
+ 
+ 	  if (bitmap_empty_p (node_g->statics_read))
+-	    opt->statics_not_read = all_module_statics;
++	    opt->statics_read = no_module_statics;
++	  else if (read_all)
++	    opt->statics_read = all_module_statics;
+ 	  else
+ 	    {
+-	      opt->statics_not_read
++	      opt->statics_read
+ 		 = BITMAP_ALLOC (&optimization_summary_obstack);
+-	      if (node_g->statics_read != all_module_statics)
+-		bitmap_and_compl (opt->statics_not_read,
+-				  all_module_statics,
+-				  node_g->statics_read);
++	      bitmap_copy (opt->statics_read, node_g->statics_read);
+ 	    }
+ 
+ 	  if (bitmap_empty_p (node_g->statics_written))
+-	    opt->statics_not_written = all_module_statics;
++	    opt->statics_written = no_module_statics;
++	  else if (written_all)
++	    opt->statics_written = all_module_statics;
+ 	  else
+ 	    {
+-	      opt->statics_not_written
++	      opt->statics_written
+ 	        = BITMAP_ALLOC (&optimization_summary_obstack);
+-	      if (node_g->statics_written != all_module_statics)
+-		bitmap_and_compl (opt->statics_not_written,
+-				  all_module_statics,
+-				  node_g->statics_written);
++	      bitmap_copy (opt->statics_written, node_g->statics_written);
+ 	    }
+ 	}
+    }
+@@ -892,7 +963,7 @@ propagate (void)
+ 
+   ipa_ref_var_info_summaries = NULL;
+   if (dump_file)
+-    splay_tree_delete (reference_vars_to_consider);
++    vec_free (reference_vars_to_consider);
+   reference_vars_to_consider = NULL;
+   return remove_p ? TODO_remove_functions : 0;
+ }
+@@ -907,12 +978,10 @@ write_node_summary_p (struct cgraph_node
+   ipa_reference_optimization_summary_t info;
+ 
+   /* See if we have (non-empty) info.  */
+-  if (!node->definition || node->global.inlined_to)
++  if (!node->definition || node->inlined_to)
+     return false;
+   info = get_reference_optimization_summary (node);
+-  if (!info
+-      || (bitmap_empty_p (info->statics_not_read)
+-	  && bitmap_empty_p (info->statics_not_written)))
++  if (!info)
+     return false;
+ 
+   /* See if we want to encode it.
+@@ -925,11 +994,17 @@ write_node_summary_p (struct cgraph_node
+       && !referenced_from_this_partition_p (node, encoder))
+     return false;
+ 
+-  /* See if the info has non-empty intersections with vars we want to encode.  */
+-  if (!bitmap_intersect_p (info->statics_not_read, ltrans_statics)
+-      && !bitmap_intersect_p (info->statics_not_written, ltrans_statics))
+-    return false;
+-  return true;
++  /* See if the info has non-empty intersections with vars we want to
++     encode.  */
++  bitmap_iterator bi;
++  unsigned int i;
++  EXECUTE_IF_AND_COMPL_IN_BITMAP (ltrans_statics, info->statics_read, 0,
++				  i, bi)
++    return true;
++  EXECUTE_IF_AND_COMPL_IN_BITMAP (ltrans_statics, info->statics_written, 0,
++				  i, bi)
++    return true;
++  return false;
+ }
+ 
+ /* Stream out BITS&LTRANS_STATICS as list of decls to OB.
+@@ -962,8 +1037,7 @@ stream_out_bitmap (struct lto_simple_out
+     return;
+   EXECUTE_IF_AND_IN_BITMAP (bits, ltrans_statics, 0, index, bi)
+     {
+-      tree decl = (tree)splay_tree_lookup (reference_vars_to_consider,
+-					   index)->value;
++      tree decl = (*reference_vars_to_consider) [index];
+       lto_output_var_decl_index (ob->decl_state, ob->main_stream, decl);
+     }
+ }
+@@ -981,23 +1055,23 @@ ipa_reference_write_optimization_summary
+   auto_bitmap ltrans_statics;
+   int i;
+ 
+-  reference_vars_to_consider = splay_tree_new (splay_tree_compare_ints, 0, 0);
++  vec_alloc (reference_vars_to_consider, ipa_reference_vars_uids);
++  reference_vars_to_consider->safe_grow (ipa_reference_vars_uids);
+ 
+   /* See what variables we are interested in.  */
+   for (i = 0; i < lto_symtab_encoder_size (encoder); i++)
+     {
+       symtab_node *snode = lto_symtab_encoder_deref (encoder, i);
+       varpool_node *vnode = dyn_cast <varpool_node *> (snode);
++      int id;
++
+       if (vnode
+-	  && bitmap_bit_p (all_module_statics,
+-			    ipa_reference_var_uid (vnode->decl))
++	  && (id = ipa_reference_var_uid (vnode->decl)) != -1
+ 	  && referenced_from_this_partition_p (vnode, encoder))
+ 	{
+ 	  tree decl = vnode->decl;
+-	  bitmap_set_bit (ltrans_statics, ipa_reference_var_uid (decl));
+-	  splay_tree_insert (reference_vars_to_consider,
+-			     ipa_reference_var_uid (decl),
+-			     (splay_tree_value)decl);
++	  bitmap_set_bit (ltrans_statics, id);
++	  (*reference_vars_to_consider)[id] = decl;
+ 	  ltrans_statics_bitcount ++;
+ 	}
+     }
+@@ -1032,14 +1106,14 @@ ipa_reference_write_optimization_summary
+ 	    node_ref = lto_symtab_encoder_encode (encoder, snode);
+ 	    streamer_write_uhwi_stream (ob->main_stream, node_ref);
+ 
+-	    stream_out_bitmap (ob, info->statics_not_read, ltrans_statics,
++	    stream_out_bitmap (ob, info->statics_read, ltrans_statics,
+ 			       ltrans_statics_bitcount);
+-	    stream_out_bitmap (ob, info->statics_not_written, ltrans_statics,
++	    stream_out_bitmap (ob, info->statics_written, ltrans_statics,
+ 			       ltrans_statics_bitcount);
+ 	  }
+       }
+   lto_destroy_simple_output_block (ob);
+-  splay_tree_delete (reference_vars_to_consider);
++  delete reference_vars_to_consider;
+ }
+ 
+ /* Deserialize the ipa info for lto.  */
+@@ -1053,10 +1127,15 @@ ipa_reference_read_optimization_summary
+   unsigned int j = 0;
+   bitmap_obstack_initialize (&optimization_summary_obstack);
+ 
+-  if (ipa_ref_opt_sum_summaries == NULL)
+-    ipa_ref_opt_sum_summaries = new ipa_ref_opt_summary_t (symtab);
++  gcc_checking_assert (ipa_ref_opt_sum_summaries == NULL);
++  ipa_ref_opt_sum_summaries = new ipa_ref_opt_summary_t (symtab);
++  ipa_reference_vars_map = new reference_vars_map_t(257);
++  varpool_node_hooks
++	 = symtab->add_varpool_removal_hook (varpool_removal_hook, NULL);
++  ipa_reference_vars_uids = 0;
+ 
+   all_module_statics = BITMAP_ALLOC (&optimization_summary_obstack);
++  no_module_statics = BITMAP_ALLOC (&optimization_summary_obstack);
+ 
+   while ((file_data = file_data_vec[j++]))
+     {
+@@ -1081,8 +1160,11 @@ ipa_reference_read_optimization_summary
+ 	      unsigned int var_index = streamer_read_uhwi (ib);
+ 	      tree v_decl = lto_file_decl_data_get_var_decl (file_data,
+ 							     var_index);
++	      bool existed;
+ 	      bitmap_set_bit (all_module_statics,
+-			      ipa_reference_var_uid (v_decl));
++			      ipa_reference_var_get_or_insert_uid
++				 (v_decl, &existed));
++	      gcc_checking_assert (!existed);
+ 	      if (dump_file)
+ 		fprintf (dump_file, " %s", fndecl_name (v_decl));
+ 	    }
+@@ -1102,57 +1184,65 @@ ipa_reference_read_optimization_summary
+ 	      ipa_reference_optimization_summary_d *info
+ 		= ipa_ref_opt_sum_summaries->get_create (node);
+ 
+-	      info->statics_not_read = BITMAP_ALLOC
+-		(&optimization_summary_obstack);
+-	      info->statics_not_written = BITMAP_ALLOC
+-		(&optimization_summary_obstack);
+ 	      if (dump_file)
+ 		fprintf (dump_file,
+-			 "\nFunction name:%s:\n  static not read:",
++			 "\nFunction name:%s:\n  static read:",
+ 			 node->dump_asm_name ());
+ 
+-	      /* Set the statics not read.  */
++	      /* Set the statics read.  */
+ 	      v_count = streamer_read_hwi (ib);
+ 	      if (v_count == -1)
+ 		{
+-		  info->statics_not_read = all_module_statics;
++		  info->statics_read = all_module_statics;
+ 		  if (dump_file)
+ 		    fprintf (dump_file, " all module statics");
+ 		}
++	      else if (v_count == 0)
++		info->statics_read = no_module_statics;
+ 	      else
+-		for (j = 0; j < (unsigned int)v_count; j++)
+-		  {
+-		    unsigned int var_index = streamer_read_uhwi (ib);
+-		    tree v_decl = lto_file_decl_data_get_var_decl (file_data,
+-								   var_index);
+-		    bitmap_set_bit (info->statics_not_read,
+-				    ipa_reference_var_uid (v_decl));
+-		    if (dump_file)
+-		      fprintf (dump_file, " %s", fndecl_name (v_decl));
+-		  }
++		{
++		  info->statics_read = BITMAP_ALLOC
++		    (&optimization_summary_obstack);
++		  for (j = 0; j < (unsigned int)v_count; j++)
++		    {
++		      unsigned int var_index = streamer_read_uhwi (ib);
++		      tree v_decl = lto_file_decl_data_get_var_decl (file_data,
++								     var_index);
++		      bitmap_set_bit (info->statics_read,
++				      ipa_reference_var_uid (v_decl));
++		      if (dump_file)
++			fprintf (dump_file, " %s", fndecl_name (v_decl));
++		    }
++		}
+ 
+ 	      if (dump_file)
+ 		fprintf (dump_file,
+-			 "\n  static not written:");
+-	      /* Set the statics not written.  */
++			 "\n  static written:");
++	      /* Set the statics written.  */
+ 	      v_count = streamer_read_hwi (ib);
+ 	      if (v_count == -1)
+ 		{
+-		  info->statics_not_written = all_module_statics;
++		  info->statics_written = all_module_statics;
+ 		  if (dump_file)
+ 		    fprintf (dump_file, " all module statics");
+ 		}
++	      else if (v_count == 0)
++		info->statics_written = no_module_statics;
+ 	      else
+-		for (j = 0; j < (unsigned int)v_count; j++)
+-		  {
+-		    unsigned int var_index = streamer_read_uhwi (ib);
+-		    tree v_decl = lto_file_decl_data_get_var_decl (file_data,
+-								   var_index);
+-		    bitmap_set_bit (info->statics_not_written,
+-				    ipa_reference_var_uid (v_decl));
+-		    if (dump_file)
+-		      fprintf (dump_file, " %s", fndecl_name (v_decl));
+-		  }
++		{
++		  info->statics_written = BITMAP_ALLOC
++		    (&optimization_summary_obstack);
++		  for (j = 0; j < (unsigned int)v_count; j++)
++		    {
++		      unsigned int var_index = streamer_read_uhwi (ib);
++		      tree v_decl = lto_file_decl_data_get_var_decl (file_data,
++								     var_index);
++		      bitmap_set_bit (info->statics_written,
++				      ipa_reference_var_uid (v_decl));
++		      if (dump_file)
++			fprintf (dump_file, " %s", fndecl_name (v_decl));
++		    }
++		}
+ 	      if (dump_file)
+ 		fprintf (dump_file, "\n");
+ 	    }
+@@ -1233,6 +1323,9 @@ ipa_reference_c_finalize (void)
+     {
+       delete ipa_ref_opt_sum_summaries;
+       ipa_ref_opt_sum_summaries = NULL;
++      delete ipa_reference_vars_map;
++      ipa_reference_vars_map = NULL;
++      symtab->remove_varpool_removal_hook (varpool_node_hooks);
+     }
+ 
+   if (ipa_init_p)
+diff -Nurp a/gcc/ipa-reference.h b/gcc/ipa-reference.h
+--- a/gcc/ipa-reference.h	2020-04-30 15:14:04.580000000 +0800
++++ b/gcc/ipa-reference.h	2020-04-30 15:14:56.540000000 +0800
+@@ -22,15 +22,10 @@ along with GCC; see the file COPYING3.
+ #define GCC_IPA_REFERENCE_H
+ 
+ /* In ipa-reference.c  */
+-bitmap ipa_reference_get_not_read_global (struct cgraph_node *fn);
+-bitmap ipa_reference_get_not_written_global (struct cgraph_node *fn);
++bitmap ipa_reference_get_read_global (struct cgraph_node *fn);
++bitmap ipa_reference_get_written_global (struct cgraph_node *fn);
+ void ipa_reference_c_finalize (void);
+-
+-inline int
+-ipa_reference_var_uid (tree t)
+-{
+-  return DECL_UID (symtab_node::get (t)->ultimate_alias_target (NULL)->decl);
+-}
++int ipa_reference_var_uid (tree t);
+ 
+ #endif  /* GCC_IPA_REFERENCE_H  */
+ 
+diff -Nurp a/gcc/ipa-utils.c b/gcc/ipa-utils.c
+--- a/gcc/ipa-utils.c	2020-04-30 15:14:04.576000000 +0800
++++ b/gcc/ipa-utils.c	2020-04-30 15:14:56.588000000 +0800
+@@ -103,8 +103,7 @@ searchc (struct searchc_env* env, struct
+         continue;
+ 
+       if (w->aux
+-	  && (avail > AVAIL_INTERPOSABLE
+-	      || avail == AVAIL_INTERPOSABLE))
++	  && (avail >= AVAIL_INTERPOSABLE))
+ 	{
+ 	  w_info = (struct ipa_dfs_info *) w->aux;
+ 	  if (w_info->new_node)
+@@ -297,7 +296,7 @@ ipa_reverse_postorder (struct cgraph_nod
+       if (!node->aux
+ 	  && (pass
+ 	      || (!node->address_taken
+-		  && !node->global.inlined_to
++		  && !node->inlined_to
+ 		  && !node->alias && !node->thunk.thunk_p
+ 		  && !node->only_called_directly_p ())))
+ 	{
+diff -Nurp a/gcc/ipa-utils.h b/gcc/ipa-utils.h
+--- a/gcc/ipa-utils.h	2020-04-30 15:14:04.652000000 +0800
++++ b/gcc/ipa-utils.h	2020-04-30 15:14:56.624000000 +0800
+@@ -47,6 +47,9 @@ void ipa_merge_profiles (struct cgraph_n
+ 			 struct cgraph_node *src, bool preserve_body = false);
+ bool recursive_call_p (tree, tree);
+ 
++/* In ipa-prop.c  */
++void ipa_remove_useless_jump_functions ();
++
+ /* In ipa-profile.c  */
+ bool ipa_propagate_frequency (struct cgraph_node *node);
+ 
+@@ -54,6 +57,7 @@ bool ipa_propagate_frequency (struct cgr
+ 
+ struct odr_type_d;
+ typedef odr_type_d *odr_type;
++extern bool thunk_expansion;
+ void build_type_inheritance_graph (void);
+ void rebuild_type_inheritance_graph (void);
+ void update_type_inheritance_graph (void);
+@@ -263,5 +267,3 @@ odr_type_p (const_tree t)
+ }
+ 
+ #endif  /* GCC_IPA_UTILS_H  */
+-
+-
+diff -Nurp a/gcc/ipa-visibility.c b/gcc/ipa-visibility.c
+--- a/gcc/ipa-visibility.c	2020-04-30 15:14:04.568000000 +0800
++++ b/gcc/ipa-visibility.c	2020-04-30 15:14:56.588000000 +0800
+@@ -707,7 +707,7 @@ function_and_variable_visibility (bool w
+ 		  || DECL_EXTERNAL (node->decl));
+       if (cgraph_externally_visible_p (node, whole_program))
+         {
+-	  gcc_assert (!node->global.inlined_to);
++	  gcc_assert (!node->inlined_to);
+ 	  node->externally_visible = true;
+ 	}
+       else
+diff -Nurp a/gcc/lto/lto.c b/gcc/lto/lto.c
+--- a/gcc/lto/lto.c	2020-04-30 15:14:04.664000000 +0800
++++ b/gcc/lto/lto.c	2020-04-30 15:14:56.552000000 +0800
+@@ -3211,9 +3211,9 @@ do_whole_program_analysis (void)
+   else
+     gcc_unreachable ();
+ 
+-  /* Inline summaries are needed for balanced partitioning.  Free them now so
++  /* Size summaries are needed for balanced partitioning.  Free them now so
+      the memory can be used for streamer caches.  */
+-  ipa_free_fn_summary ();
++  ipa_free_size_summary ();
+ 
+   /* AUX pointers are used by partitioning code to bookkeep number of
+      partitions symbol is in.  This is no longer needed.  */
+diff -Nurp a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
+--- a/gcc/lto/lto-partition.c	2020-04-30 15:14:04.664000000 +0800
++++ b/gcc/lto/lto-partition.c	2020-04-30 15:14:56.592000000 +0800
+@@ -171,7 +171,7 @@ add_symbol_to_partition_1 (ltrans_partit
+     {
+       struct cgraph_edge *e;
+       if (!node->alias && c == SYMBOL_PARTITION)
+-	part->insns += ipa_fn_summaries->get (cnode)->size;
++	part->insns += ipa_size_summaries->get (cnode)->size;
+ 
+       /* Add all inline clones and callees that are duplicated.  */
+       for (e = cnode->callees; e; e = e->next_callee)
+@@ -182,7 +182,7 @@ add_symbol_to_partition_1 (ltrans_partit
+ 
+       /* Add all thunks associated with the function.  */
+       for (e = cnode->callers; e; e = e->next_caller)
+-	if (e->caller->thunk.thunk_p && !e->caller->global.inlined_to)
++	if (e->caller->thunk.thunk_p && !e->caller->inlined_to)
+ 	  add_symbol_to_partition_1 (part, e->caller);
+     }
+ 
+@@ -233,8 +233,8 @@ contained_in_symbol (symtab_node *node)
+   if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
+     {
+       cnode = cnode->function_symbol ();
+-      if (cnode->global.inlined_to)
+-	cnode = cnode->global.inlined_to;
++      if (cnode->inlined_to)
++	cnode = cnode->inlined_to;
+       return cnode;
+     }
+   else if (varpool_node *vnode = dyn_cast <varpool_node *> (node))
+@@ -291,7 +291,7 @@ undo_partition (ltrans_partition partiti
+ 
+       if (!node->alias && (cnode = dyn_cast <cgraph_node *> (node))
+           && node->get_partitioning_class () == SYMBOL_PARTITION)
+-	partition->insns -= ipa_fn_summaries->get (cnode)->size;
++	partition->insns -= ipa_size_summaries->get (cnode)->size;
+       lto_symtab_encoder_delete_node (partition->encoder, node);
+       node->aux = (void *)((size_t)node->aux - 1);
+     }
+@@ -529,7 +529,7 @@ lto_balanced_map (int n_lto_partitions,
+ 	else
+ 	  order.safe_push (node);
+ 	if (!node->alias)
+-	  total_size += ipa_fn_summaries->get (node)->size;
++	  total_size += ipa_size_summaries->get (node)->size;
+       }
+ 
+   original_total_size = total_size;
+diff -Nurp a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c
+--- a/gcc/lto/lto-symtab.c	2020-04-30 15:14:04.664000000 +0800
++++ b/gcc/lto/lto-symtab.c	2020-04-30 15:14:56.592000000 +0800
+@@ -63,7 +63,7 @@ lto_cgraph_replace_node (struct cgraph_n
+     prevailing_node->forced_by_abi = true;
+   if (node->address_taken)
+     {
+-      gcc_assert (!prevailing_node->global.inlined_to);
++      gcc_assert (!prevailing_node->inlined_to);
+       prevailing_node->mark_address_taken ();
+     }
+   if (node->definition && prevailing_node->definition
+@@ -909,7 +909,7 @@ lto_symtab_merge_symbols_1 (symtab_node
+       cgraph_node *ce = dyn_cast <cgraph_node *> (e);
+ 
+       if ((!TREE_PUBLIC (e->decl) && !DECL_EXTERNAL (e->decl))
+-	  || (ce != NULL && ce->global.inlined_to))
++	  || (ce != NULL && ce->inlined_to))
+ 	continue;
+       symtab_node *to = symtab_node::get (lto_symtab_prevailing_decl (e->decl));
+ 
+diff -Nurp a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c
+--- a/gcc/lto-cgraph.c	2020-04-30 15:14:04.636000000 +0800
++++ b/gcc/lto-cgraph.c	2020-04-30 15:14:56.588000000 +0800
+@@ -329,7 +329,7 @@ reachable_from_other_partition_p (struct
+   struct cgraph_edge *e;
+   if (!node->definition)
+     return false;
+-  if (node->global.inlined_to)
++  if (node->inlined_to)
+     return false;
+   for (e = node->callers; e; e = e->next_caller)
+     {
+@@ -399,7 +399,7 @@ lto_output_node (struct lto_simple_outpu
+   boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node);
+ 
+   if (node->analyzed && (!boundary_p || node->alias
+-			 || (node->thunk.thunk_p && !node->global.inlined_to)))
++			 || (node->thunk.thunk_p && !node->inlined_to)))
+     tag = LTO_symtab_analyzed_node;
+   else
+     tag = LTO_symtab_unavail_node;
+@@ -422,7 +422,7 @@ lto_output_node (struct lto_simple_outpu
+       && node->get_partitioning_class () == SYMBOL_PARTITION)
+     {
+       /* Inline clones cannot be part of boundary.  
+-         gcc_assert (!node->global.inlined_to);  
++	 gcc_assert (!node->inlined_to);
+ 
+ 	 FIXME: At the moment they can be, when partition contains an inline
+ 	 clone that is clone of inline clone from outside partition.  We can
+@@ -468,9 +468,9 @@ lto_output_node (struct lto_simple_outpu
+ 
+   if (tag == LTO_symtab_analyzed_node)
+     {
+-      if (node->global.inlined_to)
++      if (node->inlined_to)
+ 	{
+-	  ref = lto_symtab_encoder_lookup (encoder, node->global.inlined_to);
++	  ref = lto_symtab_encoder_lookup (encoder, node->inlined_to);
+ 	  gcc_assert (ref != LCC_NOT_FOUND);
+ 	}
+       else
+@@ -884,7 +884,7 @@ compute_ltrans_boundary (lto_symtab_enco
+ 	  if (!lto_symtab_encoder_in_partition_p (encoder, callee))
+ 	    {
+ 	      /* We should have moved all the inlines.  */
+-	      gcc_assert (!callee->global.inlined_to);
++	      gcc_assert (!callee->inlined_to);
+ 	      add_node_to (encoder, callee, false);
+ 	    }
+ 	}
+@@ -911,7 +911,7 @@ compute_ltrans_boundary (lto_symtab_enco
+ 			  && !lto_symtab_encoder_in_partition_p
+ 			       (encoder, callee))
+ 			{
+-			  gcc_assert (!callee->global.inlined_to);
++			  gcc_assert (!callee->inlined_to);
+ 			  add_node_to (encoder, callee, false);
+ 			}
+ 		    }
+@@ -928,7 +928,7 @@ compute_ltrans_boundary (lto_symtab_enco
+       if (node->alias && node->analyzed)
+ 	create_references (encoder, node);
+       if (cnode
+-	  && cnode->thunk.thunk_p && !cnode->global.inlined_to)
++	  && cnode->thunk.thunk_p && !cnode->inlined_to)
+ 	add_node_to (encoder, cnode->callees->callee, false);
+       while (node->transparent_alias && node->analyzed)
+ 	{
+@@ -984,7 +984,7 @@ output_symtab (void)
+     {
+       node = dyn_cast <cgraph_node *> (lto_symtab_encoder_deref (encoder, i));
+       if (node
+-	  && ((node->thunk.thunk_p && !node->global.inlined_to)
++	  && ((node->thunk.thunk_p && !node->inlined_to)
+ 	      || lto_symtab_encoder_in_partition_p (encoder, node)))
+ 	{
+ 	  output_outgoing_cgraph_edges (node->callees, ob, encoder);
+@@ -1283,7 +1283,7 @@ input_node (struct lto_file_decl_data *f
+   input_overwrite_node (file_data, node, tag, &bp);
+ 
+   /* Store a reference for now, and fix up later to be a pointer.  */
+-  node->global.inlined_to = (cgraph_node *) (intptr_t) ref;
++  node->inlined_to = (cgraph_node *) (intptr_t) ref;
+ 
+   if (group)
+     {
+@@ -1542,7 +1542,7 @@ input_cgraph_1 (struct lto_file_decl_dat
+       int ref;
+       if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
+ 	{
+-	  ref = (int) (intptr_t) cnode->global.inlined_to;
++	  ref = (int) (intptr_t) cnode->inlined_to;
+ 
+ 	  /* We share declaration of builtins, so we may read same node twice.  */
+ 	  if (!node->aux)
+@@ -1551,10 +1551,10 @@ input_cgraph_1 (struct lto_file_decl_dat
+ 
+ 	  /* Fixup inlined_to from reference to pointer.  */
+ 	  if (ref != LCC_NOT_FOUND)
+-	    dyn_cast<cgraph_node *> (node)->global.inlined_to
++	    dyn_cast<cgraph_node *> (node)->inlined_to
+ 	      = dyn_cast<cgraph_node *> (nodes[ref]);
+ 	  else
+-	    cnode->global.inlined_to = NULL;
++	    cnode->inlined_to = NULL;
+ 	}
+ 
+       ref = (int) (intptr_t) node->same_comdat_group;
+diff -Nurp a/gcc/omp-simd-clone.c b/gcc/omp-simd-clone.c
+--- a/gcc/omp-simd-clone.c	2020-04-30 15:14:04.644000000 +0800
++++ b/gcc/omp-simd-clone.c	2020-04-30 15:14:56.592000000 +0800
+@@ -1635,7 +1635,7 @@ expand_simd_clones (struct cgraph_node *
+   tree attr = lookup_attribute ("omp declare simd",
+ 				DECL_ATTRIBUTES (node->decl));
+   if (attr == NULL_TREE
+-      || node->global.inlined_to
++      || node->inlined_to
+       || lookup_attribute ("noclone", DECL_ATTRIBUTES (node->decl)))
+     return;
+ 
+diff -Nurp a/gcc/params.def b/gcc/params.def
+--- a/gcc/params.def	2020-04-30 15:14:04.560000000 +0800
++++ b/gcc/params.def	2020-04-30 15:14:56.700000000 +0800
+@@ -1093,6 +1093,18 @@ DEFPARAM (PARAM_IPA_CP_VALUE_LIST_SIZE,
+ 	  "interprocedural constant propagation.",
+ 	  8, 0, 0)
+ 
++DEFPARAM (PARAM_IPA_CP_MIN_RECURSIVE_PROBABILITY,
++	  "ipa-cp-min-recursive-probability",
++	  "Recursive cloning only when the probability of call being executed "
++	  "exceeds the parameter. ",
++	  2, 0, 0)
++
++DEFPARAM (PARAM_IPA_CP_MAX_RECURSIVE_DEPTH,
++	  "ipa-cp-max-recursive-depth",
++	  "Threshold ipa-cp opportunity evaluation that is still considered "
++	  "Maximum depth of recursive cloning for self-recursive function.",
++	  8, 0, 0)
++
+ DEFPARAM (PARAM_IPA_CP_EVAL_THRESHOLD,
+ 	  "ipa-cp-eval-threshold",
+ 	  "Threshold ipa-cp opportunity evaluation that is still considered "
+@@ -1129,6 +1141,18 @@ DEFPARAM (PARAM_IPA_MAX_AA_STEPS,
+ 	  "parameter analysis based on alias analysis in any given function.",
+ 	  25000, 0, 0)
+ 
++DEFPARAM (PARAM_IPA_MAX_SWITCH_PREDICATE_BOUNDS,
++	  "ipa-max-switch-predicate-bounds",
++	  "Maximal number of boundary endpoints of case ranges of switch "
++	  "statement used during IPA functoin summary generation.",
++	  5, 0, 0)
++
++DEFPARAM (PARAM_IPA_MAX_PARAM_EXPR_OPS,
++	  "ipa-max-param-expr-ops",
++	  "Maximum number of operations in a parameter expression that can "
++	  "be handled by IPA analysis.",
++	  10, 0, 0)
++
+ /* WHOPR partitioning configuration.  */
+ 
+ DEFPARAM (PARAM_LTO_PARTITIONS,
+diff -Nurp a/gcc/passes.c b/gcc/passes.c
+--- a/gcc/passes.c	2020-04-30 15:14:04.632000000 +0800
++++ b/gcc/passes.c	2020-04-30 15:14:56.592000000 +0800
+@@ -3047,7 +3047,7 @@ function_called_by_processed_nodes_p (vo
+         continue;
+       if (TREE_ASM_WRITTEN (e->caller->decl))
+         continue;
+-      if (!e->caller->process && !e->caller->global.inlined_to)
++      if (!e->caller->process && !e->caller->inlined_to)
+       	break;
+     }
+   if (dump_file && e)
+diff -Nurp a/gcc/symtab.c b/gcc/symtab.c
+--- a/gcc/symtab.c	2020-04-30 15:14:04.636000000 +0800
++++ b/gcc/symtab.c	2020-04-30 15:14:56.592000000 +0800
+@@ -1874,7 +1874,7 @@ symtab_node::get_partitioning_class (voi
+   if (DECL_ABSTRACT_P (decl))
+     return SYMBOL_EXTERNAL;
+ 
+-  if (cnode && cnode->global.inlined_to)
++  if (cnode && cnode->inlined_to)
+     return SYMBOL_DUPLICATE;
+ 
+   /* Transparent aliases are always duplicated.  */
+@@ -2274,7 +2274,7 @@ symtab_node::binds_to_current_def_p (sym
+     return true;
+ 
+   /* Inline clones always binds locally.  */
+-  if (cnode && cnode->global.inlined_to)
++  if (cnode && cnode->inlined_to)
+     return true;
+ 
+   if (DECL_EXTERNAL (decl))
+@@ -2286,7 +2286,7 @@ symtab_node::binds_to_current_def_p (sym
+     {
+       cgraph_node *cref = dyn_cast <cgraph_node *> (ref);
+       if (cref)
+-	ref = cref->global.inlined_to;
++	ref = cref->inlined_to;
+     }
+ 
+   /* If this is a reference from symbol itself and there are no aliases, we
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/flatten.c b/gcc/testsuite/gcc.c-torture/compile/flatten.c
+--- a/gcc/testsuite/gcc.c-torture/compile/flatten.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.c-torture/compile/flatten.c	2020-04-30 15:14:56.684000000 +0800
+@@ -0,0 +1,5 @@
++int you_shall_not_flatten_me () __attribute__ ((flatten));
++main()
++{
++  you_shall_not_flatten_me ();
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c
+--- a/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/ipa/ipa-clone-2.c	2020-04-30 15:14:56.696000000 +0800
+@@ -0,0 +1,47 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fdump-ipa-cp-details -fno-early-inlining --param ipa-cp-max-recursive-depth=8" } */
++
++int fn();
++
++int data[100];
++
++int recur_fn (int i)
++{
++  int j;
++   
++  if (i == 6)
++    {
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      fn();
++      return 10;
++    }
++
++  data[i] = i; 
++
++  for (j = 0; j < 100; j++)
++    recur_fn (i + 1);
++
++  return i; 
++}
++
++int main ()
++{
++  int i;
++
++  for (i = 0; i < 100; i++)
++    recur_fn (1) + recur_fn (-5);
++
++  return 1;
++}
++
++/* { dg-final { scan-ipa-dump-times "Creating a specialized node of recur_fn/\[0-9\]*\\." 12 "cp" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c
+--- a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c	2020-04-30 15:14:56.664000000 +0800
+@@ -0,0 +1,78 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fdump-ipa-cp-details -fno-inline" } */
++
++int data1;
++
++int callee1(int *v)
++{
++  if (*v < 2)
++    return 0;
++  else 
++    {
++      int t = data1;
++
++      data1 = *v;
++      *v = t;
++
++      return 1;
++    }
++}
++
++int __attribute__((pure)) callee2(int *v)
++{
++  if (*v < 2)
++    return 0;
++  else 
++    {
++      data1 = v[0] + v[2];
++
++      return 1;
++    }
++}
++
++int caller1(int c, int *r)
++{
++  int a = 1;
++
++  if (c)
++    return callee1(&a);
++  else
++    {
++      *r = 2;
++      return callee1(r);
++    }
++}
++
++int data2[200];
++int data3;
++
++int __attribute__((const)) gen_cond(int);
++
++int caller2(void)
++{
++  int i, j;
++  int sum = 0;
++  int a[8];
++
++  a[0] = 3;
++  for (i = 0; i < 100; i++)
++    {
++      if (gen_cond (i))
++        continue;
++
++      a[2] = 4;
++      for (j = 0; j < 100; j++)
++        {
++          data2[i + j] = (i ^ j) + data3;
++
++          sum += callee2(a);
++        }
++    }
++
++  return sum;
++}
++
++/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 1" 1 "cp" } } */
++/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 2" 1 "cp" } } */
++/* { dg-final { scan-ipa-dump-times "offset: 0, type: int, CONST: 3" 1 "cp" } } */
++/* { dg-final { scan-ipa-dump-times "offset: 64, type: int, CONST: 4" 1 "cp" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c
+--- a/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c	2020-04-30 15:14:56.664000000 +0800
+@@ -0,0 +1,77 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fno-ipa-sra -fdump-ipa-cp-details -fno-early-inlining" } */
++/* { dg-add-options bind_pic_locally } */
++
++struct S
++{
++  int a, b, c;
++};
++
++void *blah(int, void *);
++
++#define foo_body(p)\
++{ \
++  int i, c = (p)->c; \
++  int b = (p)->b; \
++  void *v = (void *) (p); \
++ \
++  for (i= 0; i< c; i++) \
++    v = blah(b + i, v); \
++}
++
++static void __attribute__ ((noinline))
++foo_v (struct S s)
++{
++  foo_body (&s);
++}
++
++static void __attribute__ ((noinline))
++foo_r (struct S *p)
++{
++  foo_body (p);
++}
++
++static void
++goo_v (int a, int *p)
++{
++  struct S s;
++  s.a = 101;
++  s.b = a % 7;
++  s.c = *p + 6;
++  foo_v (s);
++}
++
++static void
++goo_r (int a, struct S n)
++{
++  struct S s;
++  s.a = 1;
++  s.b = a + 5;
++  s.c = -n.b;
++  foo_r (&s);
++}
++
++void
++entry ()
++{
++  int a;
++  int v;
++  struct S s;
++
++  a = 9;
++  v = 3;
++  goo_v (a, &v);
++
++  a = 100;
++  s.b = 18;
++  goo_r (a, s);
++}
++
++/* { dg-final { scan-ipa-dump "offset: 0, type: int, CONST: 1" "cp" } } */
++/* { dg-final { scan-ipa-dump "offset: 32, type: int, PASS THROUGH: 0, op plus_expr 5" "cp" } } */
++/* { dg-final { scan-ipa-dump "offset: 64, type: int, LOAD AGG: 1 \\\[offset: 32, by value], op negate_expr" "cp" } } */
++/* { dg-final { scan-ipa-dump "offset: 0, type: int, CONST: 101" "cp" } } */
++/* { dg-final { scan-ipa-dump "offset: 32, type: int, PASS THROUGH: 0, op trunc_mod_expr 7" "cp" } } */
++/* { dg-final { scan-ipa-dump "offset: 64, type: int, LOAD AGG: 1 \\\[offset: 0, by reference], op plus_expr 6" "cp" } } */
++/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=1, 0\\\[32]=105, 0\\\[64]=-18" "cp" } } */
++/* { dg-final { scan-ipa-dump "Aggregate replacements: 0\\\[0]=101, 0\\\[32]=2, 0\\\[64]=9" "cp" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/ipa/pr91089.c b/gcc/testsuite/gcc.dg/ipa/pr91089.c
+--- a/gcc/testsuite/gcc.dg/ipa/pr91089.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/ipa/pr91089.c	2020-04-30 15:14:56.516000000 +0800
+@@ -0,0 +1,62 @@
++/* { dg-do compile } */
++/* { dg-options "-O3 -fdump-ipa-cp-details -fdump-ipa-fnsummary-details --param ipa-max-switch-predicate-bounds=10 -fno-inline" } */
++
++int fn ();
++
++int data;
++
++int callee (int i)
++{
++  switch (i)
++    {
++      case -126:  return i + 13;
++      case -127:  return i + 5;
++      case -8:    return i * i;
++      case 0:     return i % 9;
++      case 5:
++      case 7:
++      case 6:     return 3;
++      default:
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++	fn ();
++     }
++
++  return data += i;
++}
++
++int caller ()
++{
++  return callee (-127) +
++	 callee (-126) +
++	 callee (-8) +
++	 callee (0) +
++	 callee (5) +
++	 callee (6) +
++	 callee (7) +
++	 callee (100);
++}
++ 
++/* { dg-final { scan-ipa-dump-times "Creating a specialized node of callee" 7 "cp" } } */
++/* { dg-final { scan-ipa-dump "op0 < -127" "fnsummary" } } */
++/* { dg-final { scan-ipa-dump "op0 > -126" "fnsummary" } } */
++/* { dg-final { scan-ipa-dump "op0 != -8"  "fnsummary" } } */
++/* { dg-final { scan-ipa-dump "op0 != 0"   "fnsummary" } } */
++/* { dg-final { scan-ipa-dump "op0 < 5"    "fnsummary" } } */
++/* { dg-final { scan-ipa-dump "op0 > 7"    "fnsummary" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c b/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c	2020-04-30 15:14:05.756000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr46076.c	2020-04-30 15:14:56.640000000 +0800
+@@ -19,9 +19,12 @@ main()
+ {
+   /* Make sure we perform indirect inlining of one and two and optimize
+      the result to a constant.  */
+-  if (print(one) != 3)
+-    link_error ();
+-  if (print(two) != 5)
+-    link_error ();
++  for (int i = 0; i < 100; i++)
++    {
++      if (print(one) != 3)
++	link_error ();
++      if (print(two) != 5)
++	link_error ();
++    }
+   return 0;
+ }
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-73.c	2020-04-30 15:14:56.472000000 +0800
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-fre1" } */
++
++typedef int v2si __attribute__((vector_size(__SIZEOF_INT__ * 2)));
++int foo (int *a)
++{
++  a[0] = 1;
++  a[1] = 2;
++  v2si x = *(v2si *)a;
++  *(v2si *)&a[2] = x;
++  return a[3];
++}
++
++/* { dg-final { scan-tree-dump "return 2;" "fre1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-74.c	2020-04-30 15:14:56.472000000 +0800
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-fre1" } */
++
++typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
++int foo (int *a)
++{
++  a[2] = 2;
++  a[0] = 0;
++  a[1] = 1;
++  a[3] = 4;
++  v4si x = *(v4si *)a;
++  *(v4si *)&a[4] = x;
++  return a[4] + a[7];
++}
++
++/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-fre-76.c	2020-04-30 15:14:56.472000000 +0800
+@@ -0,0 +1,16 @@
++/* { dg-do compile } */
++/* { dg-options "-O -fdump-tree-fre1" } */
++
++typedef int v4si __attribute__((vector_size(__SIZEOF_INT__ * 4)));
++int foo (int *a)
++{
++  __builtin_memset (a, 0, 2 * __SIZEOF_INT__);
++  a[2] = 2;
++  a[0] = 1;
++  a[3] = 3;
++  v4si x = *(v4si *)a;
++  *(v4si *)&a[4] = x;
++  return a[4] + a[5] + a[7];
++}
++
++/* { dg-final { scan-tree-dump "return 4;" "fre1" } } */
+diff -Nurp a/gcc/tree-sra.c b/gcc/tree-sra.c
+--- a/gcc/tree-sra.c	2020-04-30 15:14:04.568000000 +0800
++++ b/gcc/tree-sra.c	2020-04-30 15:14:56.556000000 +0800
+@@ -5488,7 +5488,7 @@ ipa_sra_preliminary_function_checks (str
+ 
+   if ((DECL_ONE_ONLY (node->decl) || DECL_EXTERNAL (node->decl))
+       && ipa_fn_summaries->get (node)
+-      && ipa_fn_summaries->get (node)->size >= MAX_INLINE_INSNS_AUTO)
++      && ipa_size_summaries->get (node)->size >= MAX_INLINE_INSNS_AUTO)
+     {
+       if (dump_file)
+ 	fprintf (dump_file, "Function too big to be made truly local.\n");
+diff -Nurp a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
+--- a/gcc/tree-ssa-alias.c	2020-04-30 15:14:04.648000000 +0800
++++ b/gcc/tree-ssa-alias.c	2020-04-30 15:14:56.540000000 +0800
+@@ -1822,14 +1822,16 @@ ref_maybe_used_by_call_p_1 (gcall *call,
+   if (callee != NULL_TREE && VAR_P (base) && TREE_STATIC (base))
+     {
+       struct cgraph_node *node = cgraph_node::get (callee);
+-      bitmap not_read;
++      bitmap read;
++      int id;
+ 
+       /* FIXME: Callee can be an OMP builtin that does not have a call graph
+ 	 node yet.  We should enforce that there are nodes for all decls in the
+ 	 IL and remove this check instead.  */
+       if (node
+-	  && (not_read = ipa_reference_get_not_read_global (node))
+-	  && bitmap_bit_p (not_read, ipa_reference_var_uid (base)))
++	  && (id = ipa_reference_var_uid (base)) != -1
++	  && (read = ipa_reference_get_read_global (node))
++	  && !bitmap_bit_p (read, id))
+ 	goto process_args;
+     }
+ 
+@@ -2217,11 +2219,13 @@ call_may_clobber_ref_p_1 (gcall *call, a
+   if (callee != NULL_TREE && VAR_P (base) && TREE_STATIC (base))
+     {
+       struct cgraph_node *node = cgraph_node::get (callee);
+-      bitmap not_written;
++      bitmap written;
++      int id;
+ 
+       if (node
+-	  && (not_written = ipa_reference_get_not_written_global (node))
+-	  && bitmap_bit_p (not_written, ipa_reference_var_uid (base)))
++	  && (id = ipa_reference_var_uid (base)) != -1
++	  && (written = ipa_reference_get_written_global (node))
++	  && !bitmap_bit_p (written, id))
+ 	return false;
+     }
+ 
+diff -Nurp a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+--- a/gcc/tree-ssa-sccvn.c	2020-04-30 15:14:04.632000000 +0800
++++ b/gcc/tree-ssa-sccvn.c	2020-04-30 15:14:56.480000000 +0800
+@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.
+ #include "config.h"
+ #include "system.h"
+ #include "coretypes.h"
++#include "splay-tree.h"
+ #include "backend.h"
+ #include "rtl.h"
+ #include "tree.h"
+@@ -361,6 +362,8 @@ static void init_vn_nary_op_from_stmt (v
+ static void init_vn_nary_op_from_pieces (vn_nary_op_t, unsigned int,
+ 					 enum tree_code, tree, tree *);
+ static tree vn_lookup_simplify_result (gimple_match_op *);
++static vn_reference_t vn_reference_lookup_or_insert_for_pieces
++	  (tree, alias_set_type, tree, vec<vn_reference_op_s, va_heap>, tree);
+ 
+ /* Return whether there is value numbering information for a given SSA name.  */
+ 
+@@ -1676,20 +1679,245 @@ vn_reference_lookup_1 (vn_reference_t vr
+   return NULL_TREE;
+ }
+ 
++
++/* Partial definition tracking support.  */
++
++struct pd_range
++{
++  HOST_WIDE_INT offset;
++  HOST_WIDE_INT size;
++};
++
++struct pd_data
++{
++  tree rhs;
++  HOST_WIDE_INT offset;
++  HOST_WIDE_INT size;
++};
++
++/* Context for alias walking.  */
++
+ struct vn_walk_cb_data
+ {
+   vn_walk_cb_data (vn_reference_t vr_, tree *last_vuse_ptr_,
+-                   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
++		   vn_lookup_kind vn_walk_kind_, bool tbaa_p_)
+     : vr (vr_), last_vuse_ptr (last_vuse_ptr_), vn_walk_kind (vn_walk_kind_),
+-      tbaa_p (tbaa_p_)
+-    {}
++      tbaa_p (tbaa_p_), known_ranges (NULL)
++   {}
++  ~vn_walk_cb_data ();
++  void *push_partial_def (const pd_data& pd, tree, HOST_WIDE_INT);
+ 
+   vn_reference_t vr;
+   tree *last_vuse_ptr;
+   vn_lookup_kind vn_walk_kind;
+   bool tbaa_p;
++
++  /* The VDEFs of partial defs we come along.  */
++  auto_vec<pd_data, 2> partial_defs;
++  /* The first defs range to avoid splay tree setup in most cases.  */
++  pd_range first_range;
++  tree first_vuse;
++  splay_tree known_ranges;
++  obstack ranges_obstack;
+ };
+ 
++vn_walk_cb_data::~vn_walk_cb_data ()
++{
++  if (known_ranges)
++    {
++      splay_tree_delete (known_ranges);
++      obstack_free (&ranges_obstack, NULL);
++    }
++}
++
++/* pd_range splay-tree helpers.  */
++
++static int
++pd_range_compare (splay_tree_key offset1p, splay_tree_key offset2p)
++{
++  HOST_WIDE_INT offset1 = *(HOST_WIDE_INT *)offset1p;
++  HOST_WIDE_INT offset2 = *(HOST_WIDE_INT *)offset2p;
++  if (offset1 < offset2)
++    return -1;
++  else if (offset1 > offset2)
++    return 1;
++  return 0;
++}
++
++static void *
++pd_tree_alloc (int size, void *data_)
++{
++  vn_walk_cb_data *data = (vn_walk_cb_data *)data_;
++  return obstack_alloc (&data->ranges_obstack, size);
++}
++
++static void
++pd_tree_dealloc (void *, void *)
++{
++}
++
++/* Push PD to the vector of partial definitions returning a
++   value when we are ready to combine things with VUSE and MAXSIZEI,
++   NULL when we want to continue looking for partial defs or -1
++   on failure.  */
++
++void *
++vn_walk_cb_data::push_partial_def (const pd_data &pd, tree vuse,
++				   HOST_WIDE_INT maxsizei)
++{
++  if (partial_defs.is_empty ())
++    {
++      partial_defs.safe_push (pd);
++      first_range.offset = pd.offset;
++      first_range.size = pd.size;
++      first_vuse = vuse;
++      last_vuse_ptr = NULL;
++    }
++  else
++    {
++      if (!known_ranges)
++	{
++	  /* ???  Optimize the case where the second partial def
++	     completes things.  */
++	  gcc_obstack_init (&ranges_obstack);
++	  known_ranges
++	      = splay_tree_new_with_allocator (pd_range_compare, 0, 0,
++					       pd_tree_alloc,
++					       pd_tree_dealloc, this);
++	  splay_tree_insert (known_ranges,
++			     (splay_tree_key)&first_range.offset,
++			     (splay_tree_value)&first_range);
++	}
++      if (known_ranges)
++	{
++	  pd_range newr = { pd.offset, pd.size };
++	  splay_tree_node n;
++	  pd_range *r;
++	  /* Lookup the predecessor of offset + 1 and see if
++	     we need to merge with it.  */
++	  HOST_WIDE_INT loffset = newr.offset + 1;
++	  if ((n = splay_tree_predecessor (known_ranges,
++					   (splay_tree_key)&loffset))
++	      && ((r = (pd_range *)n->value), true)
++	      && ranges_known_overlap_p (r->offset, r->size + 1,
++					 newr.offset, newr.size))
++	    {
++	      /* Ignore partial defs already covered.  */
++	      if (known_subrange_p (newr.offset, newr.size,
++				    r->offset, r->size))
++		return NULL;
++	      r->size = MAX (r->offset + r->size,
++			     newr.offset + newr.size) - r->offset;
++	    }
++	  else
++	    {
++	      /* newr.offset wasn't covered yet, insert the
++		 range.  */
++	      r = XOBNEW (&ranges_obstack, pd_range);
++	      *r = newr;
++	      splay_tree_insert (known_ranges,
++				 (splay_tree_key)&r->offset,
++				 (splay_tree_value)r);
++	    }
++	  /* Merge r which now contains newr and is a member
++	     of the splay tree with adjacent overlapping ranges.  */
++	  pd_range *rafter;
++	  while ((n = splay_tree_successor (known_ranges,
++					    (splay_tree_key)&r->offset))
++		 && ((rafter = (pd_range *)n->value), true)
++		 && ranges_known_overlap_p (r->offset, r->size + 1,
++					    rafter->offset, rafter->size))
++	    {
++	      r->size = MAX (r->offset + r->size,
++			     rafter->offset + rafter->size) - r->offset;
++	      splay_tree_remove (known_ranges,
++				 (splay_tree_key)&rafter->offset);
++	    }
++	  partial_defs.safe_push (pd);
++
++	  /* Now we have merged newr into the range tree.
++	     When we have covered [offseti, sizei] then the
++	     tree will contain exactly one node which has
++	     the desired properties and it will be 'r'.  */
++	  if (known_subrange_p (0, maxsizei / BITS_PER_UNIT,
++				r->offset, r->size))
++	    {
++	      /* Now simply native encode all partial defs
++		 in reverse order.  */
++	      unsigned ndefs = partial_defs.length ();
++	      /* We support up to 512-bit values (for V8DFmode).  */
++	      unsigned char buffer[64];
++	      int len;
++
++	      while (!partial_defs.is_empty ())
++		{
++		  pd_data pd = partial_defs.pop ();
++		  if (TREE_CODE (pd.rhs) == CONSTRUCTOR)
++		    /* Empty CONSTRUCTOR.  */
++		    memset (buffer + MAX (0, pd.offset),
++			    0, MIN ((HOST_WIDE_INT)sizeof (buffer), pd.size));
++		  else
++		    {
++		      len = native_encode_expr (pd.rhs,
++						buffer + MAX (0, pd.offset),
++						sizeof (buffer - MAX (0, pd.offset)),
++						MAX (0, -pd.offset));
++		      if (len <= 0
++			  || len < (pd.size - MAX (0, -pd.offset)))
++			{
++			  if (dump_file && (dump_flags & TDF_DETAILS))
++			    fprintf (dump_file, "Failed to encode %u "
++				     "partial definitions\n", ndefs);
++			  return (void *)-1;
++			}
++		    }
++		}
++
++	      tree type = vr->type;
++	      /* Make sure to interpret in a type that has a range
++		 covering the whole access size.  */
++	      if (INTEGRAL_TYPE_P (vr->type)
++		  && maxsizei != TYPE_PRECISION (vr->type))
++		type = build_nonstandard_integer_type (maxsizei,
++						       TYPE_UNSIGNED (type));
++	      tree val = native_interpret_expr (type, buffer,
++						maxsizei / BITS_PER_UNIT);
++	      /* If we chop off bits because the types precision doesn't
++		 match the memory access size this is ok when optimizing
++		 reads but not when called from the DSE code during
++		 elimination.  */
++	      if (val
++		  && type != vr->type)
++		{
++		  if (! int_fits_type_p (val, vr->type))
++		    val = NULL_TREE;
++		  else
++		    val = fold_convert (vr->type, val);
++		}
++
++	      if (val)
++		{
++		  if (dump_file && (dump_flags & TDF_DETAILS))
++		    fprintf (dump_file, "Successfully combined %u "
++			     "partial definitions\n", ndefs);
++		  return vn_reference_lookup_or_insert_for_pieces
++		      (first_vuse,
++		       vr->set, vr->type, vr->operands, val);
++		}
++	      else
++		{
++		  if (dump_file && (dump_flags & TDF_DETAILS))
++		    fprintf (dump_file, "Failed to interpret %u "
++			     "encoded partial definitions\n", ndefs);
++		  return (void *)-1;
++		}
++	    }
++	}
++    }
++  /* Continue looking for partial defs.  */
++  return NULL;
++}
++
+ /* Callback for walk_non_aliased_vuses.  Adjusts the vn_reference_t VR_
+    with the current VUSE and performs the expression lookup.  */
+ 
+@@ -1701,6 +1929,11 @@ vn_reference_lookup_2 (ao_ref *op ATTRIB
+   vn_reference_s **slot;
+   hashval_t hash;
+ 
++  /* If we have partial definitions recorded we have to go through
++     vn_reference_lookup_3.  */
++  if (!data->partial_defs.is_empty ())
++    return NULL;
++
+   if (data->last_vuse_ptr)
+     *data->last_vuse_ptr = vuse;
+ 
+@@ -1964,6 +2197,33 @@ public:
+ static rpo_elim *rpo_avail;
+ basic_block vn_context_bb;
+ 
++/* Return true if BASE1 and BASE2 can be adjusted so they have the
++   same address and adjust *OFFSET1 and *OFFSET2 accordingly.
++   Otherwise return false.  */
++
++static bool
++adjust_offsets_for_equal_base_address (tree base1, poly_int64 *offset1,
++				       tree base2, poly_int64 *offset2)
++{
++  poly_int64 soff;
++  if (TREE_CODE (base1) == MEM_REF
++      && TREE_CODE (base2) == MEM_REF)
++    {
++      if (mem_ref_offset (base1).to_shwi (&soff))
++	{
++	  base1 = TREE_OPERAND (base1, 0);
++	  *offset1 += soff * BITS_PER_UNIT;
++	}
++      if (mem_ref_offset (base2).to_shwi (&soff))
++	{
++	  base2 = TREE_OPERAND (base2, 0);
++	  *offset2 += soff * BITS_PER_UNIT;
++	}
++      return operand_equal_p (base1, base2, 0);
++    }
++  return operand_equal_p (base1, base2, OEP_ADDRESS_OF);
++}
++
+ /* Callback for walk_non_aliased_vuses.  Tries to perform a lookup
+    from the statement defining VUSE and if not successful tries to
+    translate *REFP and VR_ through an aggregate copy at the definition
+@@ -2175,8 +2435,10 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       else
+ 	return (void *)-1;
+       tree len = gimple_call_arg (def_stmt, 2);
+-      if (known_subrange_p (offset, maxsize, offset2,
+-			    wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
++      HOST_WIDE_INT leni, offset2i, offseti;
++      if (data->partial_defs.is_empty ()
++	  && known_subrange_p (offset, maxsize, offset2,
++			       wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
+ 	{
+ 	  tree val;
+ 	  if (integer_zerop (gimple_call_arg (def_stmt, 1)))
+@@ -2205,6 +2467,19 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	  return vn_reference_lookup_or_insert_for_pieces
+ 	           (vuse, vr->set, vr->type, vr->operands, val);
+ 	}
++      /* For now handle clearing memory with partial defs.  */
++      else if (integer_zerop (gimple_call_arg (def_stmt, 1))
++	       && tree_to_poly_int64 (len).is_constant (&leni)
++	       && offset.is_constant (&offseti)
++	       && offset2.is_constant (&offset2i)
++	       && maxsize.is_constant (&maxsizei))
++	{
++	  pd_data pd;
++	  pd.rhs = build_constructor (NULL_TREE, NULL);
++	  pd.offset = offset2i - offseti;
++	  pd.size = leni;
++	  return data->push_partial_def (pd, vuse, maxsizei);
++	}
+     }
+ 
+   /* 2) Assignment from an empty CONSTRUCTOR.  */
+@@ -2215,17 +2490,37 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+     {
+       tree base2;
+       poly_int64 offset2, size2, maxsize2;
++      HOST_WIDE_INT offset2i, size2i;
+       bool reverse;
+       base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
+ 				       &offset2, &size2, &maxsize2, &reverse);
+       if (known_size_p (maxsize2)
+ 	  && known_eq (maxsize2, size2)
+-	  && operand_equal_p (base, base2, 0)
+-	  && known_subrange_p (offset, maxsize, offset2, size2))
++	  && adjust_offsets_for_equal_base_address (base, &offset,
++						    base2, &offset2))
+ 	{
+-	  tree val = build_zero_cst (vr->type);
+-	  return vn_reference_lookup_or_insert_for_pieces
+-	           (vuse, vr->set, vr->type, vr->operands, val);
++	  if (data->partial_defs.is_empty ()
++	      && known_subrange_p (offset, maxsize, offset2, size2))
++	    {
++	      tree val = build_zero_cst (vr->type);
++	      return vn_reference_lookup_or_insert_for_pieces
++		  (vuse, vr->set, vr->type, vr->operands, val);
++	    }
++	  else if (maxsize.is_constant (&maxsizei)
++		   && maxsizei % BITS_PER_UNIT == 0
++		   && offset.is_constant (&offseti)
++		   && offseti % BITS_PER_UNIT == 0
++		   && offset2.is_constant (&offset2i)
++		   && offset2i % BITS_PER_UNIT == 0
++		   && size2.is_constant (&size2i)
++		   && size2i % BITS_PER_UNIT == 0)
++	    {
++	      pd_data pd;
++	      pd.rhs = gimple_assign_rhs1 (def_stmt);
++	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
++	      pd.size = size2i / BITS_PER_UNIT;
++	      return data->push_partial_def (pd, vuse, maxsizei);
++	    }
+ 	}
+     }
+ 
+@@ -2247,65 +2542,85 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 		   && is_gimple_min_invariant (SSA_VAL (gimple_assign_rhs1 (def_stmt))))))
+     {
+       tree base2;
+-      HOST_WIDE_INT offset2, size2;
++      poly_int64 offset2, size2, maxsize2;
++      HOST_WIDE_INT offset2i, size2i;
+       bool reverse;
+-      base2 = get_ref_base_and_extent_hwi (gimple_assign_lhs (def_stmt),
+-					   &offset2, &size2, &reverse);
++      base2 = get_ref_base_and_extent (gimple_assign_lhs (def_stmt),
++				       &offset2, &size2, &maxsize2, &reverse);
+       if (base2
+ 	  && !reverse
+-	  && size2 % BITS_PER_UNIT == 0
+-	  && offset2 % BITS_PER_UNIT == 0
+-	  && operand_equal_p (base, base2, 0)
+-	  && known_subrange_p (offseti, maxsizei, offset2, size2))
+-	{
+-	  /* We support up to 512-bit values (for V8DFmode).  */
+-	  unsigned char buffer[64];
+-	  int len;
+-
+-	  tree rhs = gimple_assign_rhs1 (def_stmt);
+-	  if (TREE_CODE (rhs) == SSA_NAME)
+-	    rhs = SSA_VAL (rhs);
+-	  unsigned pad = 0;
+-	  if (BYTES_BIG_ENDIAN
+-	      && is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
+-	    {
+-	      /* On big-endian the padding is at the 'front' so
+-		 just skip the initial bytes.  */
+-	      fixed_size_mode mode
+-		  = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
+-	      pad = GET_MODE_SIZE (mode) - size2 / BITS_PER_UNIT;
+-	    }
+-	  len = native_encode_expr (rhs,
+-				    buffer, sizeof (buffer),
+-				    ((offseti - offset2) / BITS_PER_UNIT
+-				     + pad));
+-	  if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
+-	    {
+-	      tree type = vr->type;
+-	      /* Make sure to interpret in a type that has a range
+-	         covering the whole access size.  */
+-	      if (INTEGRAL_TYPE_P (vr->type)
+-		  && maxsizei != TYPE_PRECISION (vr->type))
+-		type = build_nonstandard_integer_type (maxsizei,
+-						       TYPE_UNSIGNED (type));
+-	      tree val = native_interpret_expr (type, buffer,
+-						maxsizei / BITS_PER_UNIT);
+-	      /* If we chop off bits because the types precision doesn't
+-		 match the memory access size this is ok when optimizing
+-		 reads but not when called from the DSE code during
+-		 elimination.  */
+-	      if (val
+-		  && type != vr->type)
++	  && known_eq (maxsize2, size2)
++	  && multiple_p (size2, BITS_PER_UNIT)
++	  && multiple_p (offset2, BITS_PER_UNIT)
++	  && adjust_offsets_for_equal_base_address (base, &offset,
++						    base2, &offset2)
++	  && offset.is_constant (&offseti)
++	  && offset2.is_constant (&offset2i)
++	  && size2.is_constant (&size2i))
++	{
++	  if (data->partial_defs.is_empty ()
++	      && known_subrange_p (offseti, maxsizei, offset2, size2))
++	    {
++	      /* We support up to 512-bit values (for V8DFmode).  */
++	      unsigned char buffer[64];
++	      int len;
++
++	      tree rhs = gimple_assign_rhs1 (def_stmt);
++	      if (TREE_CODE (rhs) == SSA_NAME)
++		rhs = SSA_VAL (rhs);
++	      unsigned pad = 0;
++	      if (BYTES_BIG_ENDIAN
++		  && is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
+ 		{
+-		  if (! int_fits_type_p (val, vr->type))
+-		    val = NULL_TREE;
+-		  else
+-		    val = fold_convert (vr->type, val);
++		  /* On big-endian the padding is at the 'front' so
++		     just skip the initial bytes.  */
++		  fixed_size_mode mode
++		    = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (rhs)));
++		  pad = GET_MODE_SIZE (mode) - size2i / BITS_PER_UNIT;
+ 		}
+-
+-	      if (val)
+-		return vn_reference_lookup_or_insert_for_pieces
+-			 (vuse, vr->set, vr->type, vr->operands, val);
++	      len = native_encode_expr (rhs,
++					buffer, sizeof (buffer),
++					((offseti - offset2i) / BITS_PER_UNIT
++					 + pad));
++	      if (len > 0 && len * BITS_PER_UNIT >= maxsizei)
++		{
++		  tree type = vr->type;
++		  /* Make sure to interpret in a type that has a range
++		     covering the whole access size.  */
++		  if (INTEGRAL_TYPE_P (vr->type)
++		      && maxsizei != TYPE_PRECISION (vr->type))
++		    type = build_nonstandard_integer_type (maxsizei,
++							   TYPE_UNSIGNED (type));
++		  tree val = native_interpret_expr (type, buffer,
++						    maxsizei / BITS_PER_UNIT);
++		  /* If we chop off bits because the types precision doesn't
++		     match the memory access size this is ok when optimizing
++		     reads but not when called from the DSE code during
++		     elimination.  */
++		  if (val
++		      && type != vr->type)
++		    {
++		      if (! int_fits_type_p (val, vr->type))
++			val = NULL_TREE;
++		      else
++			val = fold_convert (vr->type, val);
++		    }
++
++		  if (val)
++		    return vn_reference_lookup_or_insert_for_pieces
++		      (vuse, vr->set, vr->type, vr->operands, val);
++		}
++	    }
++	  else if (ranges_known_overlap_p (offseti, maxsizei, offset2i, size2i))
++	    {
++	      pd_data pd;
++	      tree rhs = gimple_assign_rhs1 (def_stmt);
++	      if (TREE_CODE (rhs) == SSA_NAME)
++		rhs = SSA_VAL (rhs);
++	      pd.rhs = rhs;
++	      pd.offset = (offset2i - offseti) / BITS_PER_UNIT;
++	      pd.size = size2i / BITS_PER_UNIT;
++	      return data->push_partial_def (pd, vuse, maxsizei);
+ 	    }
+ 	}
+     }
+@@ -2316,7 +2631,12 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	   && is_gimple_reg_type (vr->type)
+ 	   && !contains_storage_order_barrier_p (vr->operands)
+ 	   && gimple_assign_single_p (def_stmt)
+-	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME)
++	   && TREE_CODE (gimple_assign_rhs1 (def_stmt)) == SSA_NAME
++	   /* A subset of partial defs from non-constants can be handled
++	      by for example inserting a CONSTRUCTOR, a COMPLEX_EXPR or
++	      even a (series of) BIT_INSERT_EXPR hoping for simplifications
++	      downstream, not so much for actually doing the insertion.  */
++	   && data->partial_defs.is_empty ())
+     {
+       tree base2;
+       poly_int64 offset2, size2, maxsize2;
+@@ -2328,7 +2648,8 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+       if (!reverse
+ 	  && known_size_p (maxsize2)
+ 	  && known_eq (maxsize2, size2)
+-	  && operand_equal_p (base, base2, 0)
++	  && adjust_offsets_for_equal_base_address (base, &offset,
++						    base2, &offset2)
+ 	  && known_subrange_p (offset, maxsize, offset2, size2)
+ 	  /* ???  We can't handle bitfield precision extracts without
+ 	     either using an alternate type for the BIT_FIELD_REF and
+@@ -2363,7 +2684,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	   && gimple_assign_single_p (def_stmt)
+ 	   && (DECL_P (gimple_assign_rhs1 (def_stmt))
+ 	       || TREE_CODE (gimple_assign_rhs1 (def_stmt)) == MEM_REF
+-	       || handled_component_p (gimple_assign_rhs1 (def_stmt))))
++	       || handled_component_p (gimple_assign_rhs1 (def_stmt)))
++	   /* Handling this is more complicated, give up for now.  */
++	   && data->partial_defs.is_empty ())
+     {
+       tree base2;
+       int i, j, k;
+@@ -2497,7 +2820,9 @@ vn_reference_lookup_3 (ao_ref *ref, tree
+ 	       || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
+ 	   && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
+ 	       || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
+-	   && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size))
++	   && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size)
++	   /* Handling this is more complicated, give up for now.  */
++	   && data->partial_defs.is_empty ())
+     {
+       tree lhs, rhs;
+       ao_ref r;
+diff -Nurp a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
+--- a/gcc/tree-ssa-structalias.c	2020-04-30 15:14:04.644000000 +0800
++++ b/gcc/tree-ssa-structalias.c	2020-04-30 15:14:56.592000000 +0800
+@@ -7817,7 +7817,7 @@ associate_varinfo_to_alias (struct cgrap
+ {
+   if ((node->alias
+        || (node->thunk.thunk_p
+-	   && ! node->global.inlined_to))
++	   && ! node->inlined_to))
+       && node->analyzed
+       && !node->ifunc_resolver)
+     insert_vi_for_tree (node->decl, (varinfo_t)data);
+@@ -7987,7 +7987,7 @@ ipa_pta_execute (void)
+       /* Nodes without a body are not interesting.  Especially do not
+          visit clones at this point for now - we get duplicate decls
+ 	 there for inline clones at least.  */
+-      if (!node->has_gimple_body_p () || node->global.inlined_to)
++      if (!node->has_gimple_body_p () || node->inlined_to)
+ 	continue;
+       node->get_body ();
+ 
diff --git a/ipa-struct-reorg-bugfix.patch b/ipa-struct-reorg-bugfix.patch
new file mode 100644
index 0000000..0ec8ba2
--- /dev/null
+++ b/ipa-struct-reorg-bugfix.patch
@@ -0,0 +1,613 @@
+diff -Nurp a/gcc/fold-const.c b/gcc/fold-const.c
+--- a/gcc/fold-const.c	2020-06-16 22:27:46.116000000 -0400
++++ b/gcc/fold-const.c	2020-06-16 22:27:58.412000000 -0400
+@@ -7165,15 +7165,9 @@ fold_plusminus_mult_expr (location_t loc
+ 	     increased the number of multiplications necessary.  */
+ 	  && TREE_CODE (arg10) != INTEGER_CST)
+         {
+-	  HOST_WIDE_INT tmp1 = int01 / int11;
+-	  HOST_WIDE_INT t = exact_log2 (absu_hwi (int11));
+-	  HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (arg00))) * BITS_PER_UNIT;
+-	  HOST_WIDE_INT sign_bit = HOST_WIDE_INT_1U << (size - t - 1);
+-	  if (tmp1 & sign_bit)
+-	    tmp1 |= HOST_WIDE_INT_M1U << (size - t);
+-	  tree tmp2 = build_int_cst (TREE_TYPE (arg00), tmp1);
+ 	  alt0 = fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg00), arg00,
+-				 tmp2);
++			      build_int_cst (TREE_TYPE (arg00),
++					     int01 / int11));
+ 	  alt1 = arg10;
+ 	  same = maybe_same;
+ 	  if (swap)
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2020-06-16 22:27:46.116000000 -0400
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2020-06-16 22:33:18.968000000 -0400
+@@ -112,6 +112,23 @@ is_va_list_type (tree type)
+   return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node);
+ }
+ 
++static const char *
++get_type_name (tree type)
++{
++  const char *tname = NULL;
++  if (TYPE_NAME (type) != NULL)
++    {
++      if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE)
++	{
++	  tname = IDENTIFIER_POINTER (TYPE_NAME (type));
++	}
++      else if (DECL_NAME (TYPE_NAME (type)) != NULL)
++	{
++	  tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
++	}
++    }
++  return tname;
++}
+ 
+ /* Return the inner most type for arrays and pointers of TYPE.  */
+ 
+@@ -463,10 +480,10 @@ srtype::analyze (void)
+   if (fields.length () == 2)
+     fields[1]->clusternum = 1;
+ 
+-  /* REMOVEME: FIXME: this is here for testing more testcases. */
++  /* FIXME: Currently Return.  */
+   if (fields.length () >= 3)
+     {
+-      fields[1]->clusternum = 1;
++      return;
+     }
+ }
+ 
+@@ -875,6 +892,7 @@ private:
+   void analyze_types (void);
+   void clear_visited (void);
+   bool create_new_types (void);
++  void restore_field_type (void);
+   void create_new_decls (void);
+   srdecl *find_decl (tree);
+   void create_new_functions (void);
+@@ -1096,6 +1114,11 @@ ipa_struct_reorg::record_type (tree type
+         {
+ 	  tree t = TREE_TYPE (field);
+ 	  process_union (t);
++	  if (TREE_CODE (inner_type (t)) == UNION_TYPE
++	      || TREE_CODE (inner_type (t)) == QUAL_UNION_TYPE)
++	    {
++	      type1->mark_escape (escape_union, NULL);
++	    }
+ 	  if (isvolatile_type (t))
+ 	    type1->mark_escape (escape_volatile, NULL);
+ 	  escape_type e = escape_type_volatile_array_or_ptrptr (t);
+@@ -2818,6 +2841,49 @@ ipa_struct_reorg::analyze_types (void)
+     }
+ }
+ 
++/* When struct A has a struct B member, B's type info
++   is not stored in
++     TYPE_FIELDS (TREE_TYPE (TYPE_FIELDS (typeA)))
++   Try to restore B's type information.  */
++void
++ipa_struct_reorg::restore_field_type (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      for (unsigned j = 0; j < types[i]->fields.length (); j++)
++	{
++	  srfield *field = types[i]->fields[j];
++	  if (TREE_CODE (inner_type (field->fieldtype)) == RECORD_TYPE)
++	    {
++	      /* If field type has TYPE_FIELDS information,
++		 we do not need to do this.  */
++	      if (TYPE_FIELDS (field->type->type) != NULL)
++		{
++		  continue;
++		}
++	      for (unsigned k = 0; k < types.length (); k++)
++		{
++		  if (i == k)
++		    {
++		      continue;
++		    }
++		  const char *type1 = get_type_name (field->type->type);
++		  const char *type2 = get_type_name (types[k]->type);
++		  if (type1 == NULL || type2 == NULL)
++		    {
++		      continue;
++		    }
++		  if (type1 == type2
++		      && TYPE_FIELDS (types[k]->type))
++		    {
++		      field->type = types[k];
++		    }
++		}
++	    }
++	}
++    }
++}
++
+ /* Create all new types we want to create. */
+ 
+ bool
+@@ -3669,7 +3735,7 @@ ipa_struct_reorg::rewrite_functions (voi
+ {
+   unsigned retval = 0;
+ 
+-
++  restore_field_type ();
+   /* Create new types, if we did not create any new types,
+      then don't rewrite any accesses. */
+   if (!create_new_types ())
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c
+--- a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,19 +0,0 @@
+-struct a
+-{
+-  int t, t1;
+-};
+-
+-static struct a *b;
+-
+-void *xmalloc(int);
+-
+-
+-void f(void)
+-{
+-  b = xmalloc (sizeof(*b));
+-}
+-
+-int g(void)
+-{
+- return b->t;
+-}
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/nested-3.c b/gcc/testsuite/gcc.c-torture/compile/nested-3.c
+--- a/gcc/testsuite/gcc.c-torture/compile/nested-3.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.c-torture/compile/nested-3.c	2020-06-16 22:27:58.416000000 -0400
+@@ -1,4 +1,3 @@
+-/* This used to crash Struct reorg.  */
+ struct a
+ {
+   int t;
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c
+--- a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,18 +0,0 @@
+-#include <stdlib.h>
+-typedef struct {
+-   long laststart_offset;
+-   unsigned regnum;
+-} compile_stack_elt_t;
+-typedef struct {
+-   compile_stack_elt_t *stack;
+-   unsigned size;
+-} compile_stack_type;
+-void f (const char *p, const char *pend, int c)
+-{
+-  compile_stack_type compile_stack;
+-  while (p != pend)
+-    if (c)
+-      compile_stack.stack = realloc (compile_stack.stack,
+-				     (compile_stack.size << 1)
+-				       * sizeof (compile_stack_elt_t));
+-}
+diff -Nurp a/gcc/testsuite/gcc.dg/pr33136-4.c b/gcc/testsuite/gcc.dg/pr33136-4.c
+--- a/gcc/testsuite/gcc.dg/pr33136-4.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.dg/pr33136-4.c	1969-12-31 19:00:00.000000000 -0500
+@@ -1,59 +0,0 @@
+-/* PR tree-optimization/33136 */
+-/* { dg-do run } */
+-/* { dg-options "-O2" } */
+-
+-extern void abort (void);
+-
+-struct S
+-{
+-  int b;
+-  int *c;
+-};
+-static int d, e;
+-
+-static struct S s;
+-
+-static int *
+-__attribute__((noinline, const))
+-foo (void)
+-{
+-  return &s.b;
+-}
+-
+-int *
+-__attribute__((noinline))
+-bar (int **f)
+-{
+-  s.c = &d;
+-  *f = &e;
+-  /* As nothing ever takes the address of any int * field in struct S,
+-     the write to *f can't alias with the s.c field.  */
+-  return s.c;
+-}
+-
+-int
+-__attribute__((noinline))
+-baz (int *x)
+-{
+-  s.b = 1;
+-  *x = 4;
+-  /* Function foo takes address of an int field in struct S,
+-     so *x can alias with the s.b field (and it does in this testcase).  */
+-  return s.b;
+-}
+-
+-int
+-__attribute__((noinline))
+-t (void)
+-{
+-  int *f = (int *) 0;
+-  return 10 * (bar (&f) != &d) + baz (foo ());
+-}
+-
+-int
+-main (void)
+-{
+-  if (t () != 4)
+-    abort ();
+-  return 0;
+-}
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c
+--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-1.c	2020-06-16 22:27:58.436000000 -0400
+@@ -0,0 +1,24 @@
++// { dg-do compile }
++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" }
++
++struct a
++{
++  int t, t1;
++};
++
++static struct a *b;
++
++void *xmalloc(int);
++
++
++void f(void)
++{
++  b = xmalloc (sizeof(*b));
++}
++
++int g(void)
++{
++  return b->t;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c
+--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-2.c	2020-06-16 22:27:58.440000000 -0400
+@@ -0,0 +1,29 @@
++// { dg-do run }
++
++#include <assert.h>
++
++struct a
++{
++  int t;
++  int t1;
++};
++
++__attribute__((noinline)) int f(int i, int j)
++{
++  struct a *t;
++  struct a t1 = {i, j};
++  t = &t1;
++  auto int g(void) __attribute__((noinline));
++  int g(void)
++  {
++    return t->t + t->t1;
++  }
++  return g();
++}
++
++int main()
++{
++  assert (f(1, 2) == 3);
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c
+--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-3.c	2020-06-16 22:27:58.440000000 -0400
+@@ -0,0 +1,23 @@
++// { dg-do compile }
++// { dg-options "-O3 -flto-partition=one -fipa-struct-reorg -fdump-ipa-all" }
++
++#include <stdlib.h>
++typedef struct {
++   long laststart_offset;
++   unsigned regnum;
++} compile_stack_elt_t;
++typedef struct {
++   compile_stack_elt_t *stack;
++   unsigned size;
++} compile_stack_type;
++void f (const char *p, const char *pend, int c)
++{
++  compile_stack_type compile_stack;
++  while (p != pend)
++    if (c)
++      compile_stack.stack = realloc (compile_stack.stack,
++				     (compile_stack.size << 1)
++				       * sizeof (compile_stack_elt_t));
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c
+--- a/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-4.c	2020-06-16 22:27:58.440000000 -0400
+@@ -0,0 +1,59 @@
++/* { dg-do run } */
++
++extern void abort (void);
++
++struct S
++{
++  int b;
++  int *c;
++};
++static int d, e;
++
++static struct S s;
++
++static int *
++__attribute__((noinline, const))
++foo (void)
++{
++  return &s.b;
++}
++
++int *
++__attribute__((noinline))
++bar (int **f)
++{
++  s.c = &d;
++  *f = &e;
++  /* As nothing ever takes the address of any int * field in struct S,
++     the write to *f can't alias with the s.c field.  */
++  return s.c;
++}
++
++int
++__attribute__((noinline))
++baz (int *x)
++{
++  s.b = 1;
++  *x = 4;
++  /* Function foo takes address of an int field in struct S,
++     so *x can alias with the s.b field (and it does in this testcase).  */
++  return s.b;
++}
++
++int
++__attribute__((noinline))
++t (void)
++{
++  int *f = (int *) 0;
++  return 10 * (bar (&f) != &d) + baz (foo ());
++}
++
++int
++main (void)
++{
++  if (t () != 4)
++    abort ();
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp	2020-06-16 22:27:58.440000000 -0400
+@@ -1,5 +1,4 @@
+-#   Copyright (C) 2007, 2008, 2009, 2010
+-#   Free Software Foundation, Inc.
++#   Copyright (C) 1997-2019 Free Software Foundation, Inc.
+ 
+ # This program is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+@@ -12,12 +11,9 @@
+ # GNU General Public License for more details.
+ # 
+ # You should have received a copy of the GNU General Public License
+-# along with this program; see the file COPYING3.  If not see
++# along with GCC; see the file COPYING3.  If not see
+ # <http://www.gnu.org/licenses/>.
+ 
+-# Test the functionality of programs compiled with profile-directed structure
+-# rearrangement using -fprofile-generate followed by -fprofile-use.
+-
+ load_lib gcc-dg.exp
+ load_lib target-supports.exp
+ 
+@@ -26,62 +22,14 @@ dg-init
+ torture-init
+ 
+ set STRUCT_REORG_TORTURE_OPTIONS [list \
+-        { -O1 } \
+-        { -O1 -g } \
+-        { -O2 } \
+-        { -O2 -g } \
+-        { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions } \
+-        { -O3 -g } \
+-        { -Os } ]
+-
++	{ -O3 } \
++	{ -Ofast } ]
+ 
+-set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
++set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}}
+ 
+-gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] "" "-fipa-struct-reorg -fdump-ipa-all -fwhole-program"
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/*.c]] \
++	"" "-fipa-struct-reorg -fdump-ipa-all -flto-partition=one -fwhole-program"
+ 
++# All done.
+ torture-finish
+-dg-final
+-
+-# Some targets don't support tree profiling.
+-if { ![check_profiling_available ""] } {
+-    return
+-}
+-
+-# The procedures in profopt.exp need these parameters.
+-set tool gcc
+-set prof_ext "gcda"
+-
+-# Override the list defined in profopt.exp.
+-set PROFOPT_OPTIONS [list {}]
+-
+-if $tracelevel then {
+-    strace $tracelevel
+-}
+-
+-# Load support procs.
+-load_lib profopt.exp
+-
+-# These are globals used by profopt-execute.  The first is options
+-# needed to generate profile data, the second is options to use the
+-# profile data.
+-set common "-O3 -fwhole-program"
+-set profile_option [concat $common " -fprofile-generate"]
+-set feedback_option [concat $common " -fprofile-use -fipa-struct-reorg -fdump-ipa-all"]
+-
+-foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] {
+-    # If we're only testing specific files and this isn't one of them, skip it.
+-    if ![runtest_file_p $runtests $src] then {
+-        continue
+-    }
+-    profopt-execute $src
+-}
+-
+-set feedback_option [concat $feedback_option " --param struct-reorg-cold-struct-ratio=30"]
+-
+-foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] {
+-    # If we're only testing specific files and this isn't one of them, skip it.
+-    if ![runtest_file_p $runtests $src] then {
+-        continue
+-    }
+-    profopt-execute $src
+-}
++dg-finish
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c	2020-06-16 22:27:58.440000000 -0400
+@@ -28,4 +28,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c	2020-06-16 22:27:58.440000000 -0400
+@@ -38,5 +38,5 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* The structure str_t is erroneously peeled into 4 structures instead of 2.  */
+-/* { dg-final { scan-ipa-dump "the number of new types is 2" "struct_reorg" } } */
++/* Two more fields structure is not splitted.  */
++/* { dg-final { scan-ipa-dump "No structures to transform." "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c	2020-06-16 22:27:58.440000000 -0400
+@@ -26,4 +26,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg"  { xfail *-*-* } } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c	2020-06-16 22:27:46.120000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c	2020-06-16 22:27:58.440000000 -0400
+@@ -39,4 +39,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c	2020-06-16 22:27:46.124000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c	2020-06-16 22:27:58.472000000 -0400
+@@ -34,4 +34,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c	2020-06-16 22:27:46.124000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c	2020-06-16 22:27:58.472000000 -0400
+@@ -37,4 +37,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c	2020-06-16 22:27:46.124000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c	2020-06-16 22:27:58.472000000 -0400
+@@ -28,4 +28,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c	2020-06-16 22:27:46.124000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c	2020-06-16 22:27:58.472000000 -0400
+@@ -61,4 +61,4 @@ main ()
+ }
+ 
+ /*--------------------------------------------------------------------------*/
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c
+--- a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c	2020-06-16 22:27:46.124000000 -0400
++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c	2020-06-16 22:27:58.472000000 -0400
+@@ -40,4 +40,4 @@ main ()
+ 
+ /*--------------------------------------------------------------------------*/
+ /* Arrays are not handled. */
+-/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/g++.dg/torture/pr38355.C b/gcc/testsuite/g++.dg/torture/pr38355.C
+--- a/gcc/testsuite/g++.dg/torture/pr38355.C	2020-06-16 22:27:46.124000000 -0400
++++ b/gcc/testsuite/g++.dg/torture/pr38355.C	1969-12-31 19:00:00.000000000 -0500
+@@ -1,25 +0,0 @@
+-// { dg-do run }
+-// { dg-options "-fwhole-program -fipa-struct-reorg" }
+-template<int> struct A
+-{
+-  char c;
+-  void foo(int);
+-  void bar(int i) { foo(i+1); }
+-};
+-
+-template<int> struct B : virtual A<0> {};
+-
+-template<int T> inline void baz(B<T>& b, int i)
+-{
+-  if (i) b.bar(0);
+-}
+-
+-extern template class A<0>;
+-extern template void baz(B<0>&, int);
+-
+-int main()
+-{
+-  B<0> b;
+-  baz(b, 0);
+-  return 0;
+-}
diff --git a/ipa-struct-reorg.patch b/ipa-struct-reorg.patch
new file mode 100644
index 0000000..cf3ae23
--- /dev/null
+++ b/ipa-struct-reorg.patch
@@ -0,0 +1,5846 @@
+This backport contains 1 patch from gcc personal branch tree.
+
+ipa-struct-reorg-2019-06-07-Update-with-Andrew-Pinski-s-struct-reorg-patch.patch
+commit 6e1bd1c900533c627b5e4fbbecb41dcd7974b522
+
+The original of this commit can be found on
+    https://gcc.gnu.org/git/?p=gcc-old.git;a=shortlog;h=refs/heads/sje/struct-reorg
+
+diff -Nurp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/common.opt	2020-06-16 22:56:07.720000000 -0400
+@@ -1762,8 +1762,8 @@ Common Ignore
+ Does nothing. Preserved for backward compatibility.
+ 
+ fipa-struct-reorg
+-Common Ignore
+-Does nothing. Preserved for backward compatibility.
++Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization
++Perform structure layout optimizations.
+ 
+ fipa-vrp
+ Common Report Var(flag_ipa_vrp) Optimization
+diff -Nurp a/gcc/configure b/gcc/configure
+--- a/gcc/configure	2020-03-12 07:08:30.000000000 -0400
++++ b/gcc/configure	2020-06-16 22:56:07.724000000 -0400
+@@ -31614,7 +31614,7 @@ $as_echo "$as_me: executing $ac_file com
+     "depdir":C) $SHELL $ac_aux_dir/mkinstalldirs $DEPDIR ;;
+     "gccdepdir":C)
+   ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR
+-  for lang in $subdirs c-family common
++  for lang in $subdirs c-family common ipa-struct-reorg
+   do
+       ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR
+   done ;;
+diff -Nurp a/gcc/configure.ac b/gcc/configure.ac
+--- a/gcc/configure.ac	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/configure.ac	2020-06-16 22:56:07.724000000 -0400
+@@ -1170,7 +1170,7 @@ AC_CHECK_HEADERS(ext/hash_map)
+ ZW_CREATE_DEPDIR
+ AC_CONFIG_COMMANDS([gccdepdir],[
+   ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs build/$DEPDIR
+-  for lang in $subdirs c-family common
++  for lang in $subdirs c-family common ipa-struct-reorg
+   do
+       ${CONFIG_SHELL-/bin/sh} $ac_aux_dir/mkinstalldirs $lang/$DEPDIR
+   done], [subdirs="$subdirs" ac_aux_dir=$ac_aux_dir DEPDIR=$DEPDIR])
+diff -Nurp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
+--- a/gcc/doc/invoke.texi	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/doc/invoke.texi	2020-06-16 22:56:07.728000000 -0400
+@@ -420,6 +420,7 @@ Objective-C and Objective-C++ Dialects}.
+ -finline-functions  -finline-functions-called-once  -finline-limit=@var{n} @gol
+ -finline-small-functions  -fipa-cp  -fipa-cp-clone @gol
+ -fipa-bit-cp  -fipa-vrp  -fipa-pta  -fipa-profile  -fipa-pure-const @gol
++-fipa-struct-reorg @gol
+ -fipa-reference  -fipa-reference-addressable @gol
+ -fipa-stack-alignment  -fipa-icf  -fira-algorithm=@var{algorithm} @gol
+ -flive-patching=@var{level} @gol
+@@ -9312,6 +9313,19 @@ Enabled by default at @option{-O} and hi
+ Reduce stack alignment on call sites if possible.
+ Enabled by default.
+ 
++@item -fipa-struct-reorg
++@opindex fipa-struct-reorg
++Perform structure reorganization optimization, that change C-like structures
++layout in order to better utilize spatial locality.  This transformation is
++affective for programs containing arrays of structures.  Available in two
++compilation modes: profile-based (enabled with @option{-fprofile-generate})
++or static (which uses built-in heuristics).  It works only in whole program
++mode, so it requires @option{-fwhole-program} to be
++enabled.  Structures considered @samp{cold} by this transformation are not
++affected (see @option{--param struct-reorg-cold-struct-ratio=@var{value}}).
++
++With this flag, the program debug info reflects a new structure layout.
++
+ @item -fipa-pta
+ @opindex fipa-pta
+ Perform interprocedural pointer analysis and interprocedural modification
+@@ -11025,6 +11039,15 @@ In each case, the @var{value} is an inte
+ @var{name} are:
+ 
+ @table @gcctabopt
++@item struct-reorg-cold-struct-ratio
++The threshold ratio (as a percentage) between a structure frequency
++and the frequency of the hottest structure in the program.  This parameter
++is used by struct-reorg optimization enabled by @option{-fipa-struct-reorg}.
++We say that if the ratio of a structure frequency, calculated by profiling,
++to the hottest structure frequency in the program is less than this
++parameter, then structure reorganization is not applied to this structure.
++The default is 10.
++
+ @item predictable-branch-outcome
+ When branch is predicted to be taken with probability lower than this threshold
+ (in percent), then it is considered well predictable.
+diff -Nurp a/gcc/fold-const.c b/gcc/fold-const.c
+--- a/gcc/fold-const.c	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/fold-const.c	2020-06-16 22:56:07.732000000 -0400
+@@ -7165,9 +7165,15 @@ fold_plusminus_mult_expr (location_t loc
+ 	     increased the number of multiplications necessary.  */
+ 	  && TREE_CODE (arg10) != INTEGER_CST)
+         {
++	  HOST_WIDE_INT tmp1 = int01 / int11;
++	  HOST_WIDE_INT t = exact_log2 (absu_hwi (int11));
++	  HOST_WIDE_INT size = tree_to_shwi (TYPE_SIZE_UNIT (TREE_TYPE (arg00))) * BITS_PER_UNIT;
++	  HOST_WIDE_INT sign_bit = HOST_WIDE_INT_1U << (size - t - 1);
++	  if (tmp1 & sign_bit)
++	    tmp1 |= HOST_WIDE_INT_M1U << (size - t);
++	  tree tmp2 = build_int_cst (TREE_TYPE (arg00), tmp1);
+ 	  alt0 = fold_build2_loc (loc, MULT_EXPR, TREE_TYPE (arg00), arg00,
+-			      build_int_cst (TREE_TYPE (arg00),
+-					     int01 / int11));
++				 tmp2);
+ 	  alt1 = arg10;
+ 	  same = maybe_same;
+ 	  if (swap)
+diff -Nurp a/gcc/ipa-struct-reorg/escapes.def b/gcc/ipa-struct-reorg/escapes.def
+--- a/gcc/ipa-struct-reorg/escapes.def	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/ipa-struct-reorg/escapes.def	2020-06-16 22:56:07.732000000 -0400
+@@ -0,0 +1,60 @@
++/* Copyright (C) 2016 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* Before including this file, you should define a macro:
++   DEF_ESCAPE (ENUM, TEXT)
++   
++   This macro will be called once for each escape reason. The
++   ENUM will be of type "escape_type".  The TEXT is describing
++   the reason for the escape.
++*/
++DEF_ESCAPE (escape_marked_as_used, "Type used in variable marked as used")
++DEF_ESCAPE (escape_via_global_var, "Type used via a external visible variable")
++DEF_ESCAPE (escape_via_global_init, "Type used via a global init of a variable")
++DEF_ESCAPE (escape_non_supported_allocator, "Type used by allocation which is not currently supported")
++DEF_ESCAPE (escape_dependent_type_escapes, "Type uses a type which escapes or is used by a type which escapes")
++DEF_ESCAPE (escape_var_arg_function, "Types escapes via a variable argument function")
++DEF_ESCAPE (escape_bitfields, "Types has bitfields")
++DEF_ESCAPE (escape_recusive_type, "Type has a recusive relationship")
++DEF_ESCAPE (escape_variable_sized_array, "Type has a variable sized type")
++DEF_ESCAPE (escape_external_function, "Type escapes via an external function call")
++DEF_ESCAPE (escape_visible_function, "Type escapes via expternally visible function call")
++DEF_ESCAPE (escape_pointer_function, "Type escapes via an function pointer call")
++DEF_ESCAPE (escape_unkown_field, "Type escapes via an unkown field accessed")
++DEF_ESCAPE (escape_union, "Type escapes via an union")
++DEF_ESCAPE (escape_inline_asm, "Type escapes via inline-asm")
++DEF_ESCAPE (escape_non_multiply_size, "Type escapes a pointer plus which is not a multiplicate of the size")
++DEF_ESCAPE (escape_cast_void, "Type escapes a cast to/from void*")
++DEF_ESCAPE (escape_cast_another_ptr, "Type escapes a cast to a different pointer")
++DEF_ESCAPE (escape_cast_int, "Type escapes a cast from/to intergral type")
++DEF_ESCAPE (escape_int_const, "Type escapes via integer constant")
++DEF_ESCAPE (escape_vce, "Type escapes via a VIEW_CONVERT_EXPR")
++DEF_ESCAPE (escape_array_access, "Type escapes via an array access")
++DEF_ESCAPE (escape_noclonable_function, "Type escapes via a non-clonable function")
++DEF_ESCAPE (escape_rescusive_type, "Recusive type")
++DEF_ESCAPE (escape_user_alignment, "Type has an user alignment set")
++DEF_ESCAPE (escape_volatile, "Type has an variable which is volatile")
++DEF_ESCAPE (escape_non_eq, "Type has a comparison other than equals or not equals")
++DEF_ESCAPE (escape_addr, "Type escapes via taking the address of field")
++DEF_ESCAPE (escape_cannot_change_signature, "Type used in a call that cannot change signature")
++DEF_ESCAPE (escape_non_optimize, "Type used by a function which turns off struct reorg")
++DEF_ESCAPE (escape_array, "Type is used in an array [not handled yet]")
++DEF_ESCAPE (escape_ptr_ptr, "Type is used in a pointer to a pointer [not handled yet]")
++DEF_ESCAPE (escape_return, "Type escapes via a return [not handled yet]")
++
++#undef DEF_ESCAPE
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c	2020-06-16 22:56:27.548000000 -0400
+@@ -0,0 +1,3840 @@
++/* Struct-reorg optimizations.
++   Copyright (C) 2016-2017 Free Software Foundation, Inc.
++   Contributed by Andrew Pinski  <apinski@cavium.com>
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* This pass implements the structure reorganization organization (struct-reorg).
++   Right now it handles just splitting off the hottest fields for a struct of 2 fields:
++   struct s {
++     type1 field1; // Hot field
++     type2 field2;
++   };
++   s *v;
++   into:
++   struct s_hot {
++     type1 field1;
++   };
++   struct c_cold {
++     type2 field2;
++   };
++   s_hot *v_hot;
++   s_cold *v_cold;
++  
++   TODO: This pass can be extended to more fields, and other alogrothims like reordering.
++
++   This pass operate in four stages:
++    1. All of the field accesses, declarations (struct types and pointers to that type)
++       and struct types are scanned and recorded.  This includes global declarations.
++       Also record all allocation and freeing sites; this is needed for the rewriting
++       phase.
++
++       FIXME: If there is a top-level inline-asm, the pass immediately returns.
++
++    2. Prune out the types which are considered escaping.
++       Examples of types which are considered escaping:
++       1. A declaration has been marked as having the attribute used or has user defined
++	  alignment (type too).
++       2. Accesses are via a BIT_FIELD_REF. FIXME: Handle VECTOR_TYPE for this case.
++       3. The "allocation" site is not a known builtin function.
++       4. Casting to/from an integer.
++
++    3. Analyze the types for which optimization to do.
++       a. Split the fields into two different structs.
++	  (FIXME: two field case handled only)
++	  Look at all structs which contain two fields, if one of the fields is hotter
++	  then split it and put it on the rewritting for accesses.
++	  Allocations and freeing are marked to split into two functions; all uses of
++	  that type will now be considered as two.
++       b. Reorder fields hottest to the coldest.  TODO: Implement.
++
++    4. Rewrite each access and allocation and free which is marked as rewriting.
++
++ */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++#include "tree.h"
++#include "tree-pass.h"
++#include "cgraph.h"
++#include "diagnostic-core.h"
++#include "function.h"
++#include "basic-block.h"
++#include "gimple.h"
++#include "vec.h"
++#include "tree-pretty-print.h"
++#include "gimple-pretty-print.h"
++#include "gimple-iterator.h"
++#include "cfg.h"
++#include "ssa.h"
++#include "tree-dfa.h"
++#include "fold-const.h"
++#include "tree-inline.h"
++#include "stor-layout.h"
++#include "tree-into-ssa.h"
++#include "tree-cfg.h"
++#include "symbol-summary.h"
++#include "alloc-pool.h"
++#include "ipa-prop.h"
++#include "ipa-struct-reorg.h"
++#include "tree-eh.h"
++#include "bitmap.h"
++#include "ipa-param-manipulation.h"
++#include "tree-ssa-live.h"  /* For remove_unused_locals.  */
++
++#define VOID_POINTER_P(type) (POINTER_TYPE_P (type) && VOID_TYPE_P (TREE_TYPE (type)))
++
++namespace {
++
++using namespace struct_reorg;
++
++/* Return true iff TYPE is stdarg va_list type.  */
++
++static inline bool
++is_va_list_type (tree type)
++{
++  return TYPE_MAIN_VARIANT (type) == TYPE_MAIN_VARIANT (va_list_type_node);
++}
++
++
++/* Return the inner most type for arrays and pointers of TYPE.  */
++
++tree
++inner_type (tree type)
++{
++  while (POINTER_TYPE_P (type)
++	 || TREE_CODE (type) == ARRAY_TYPE)
++    type = TREE_TYPE (type);
++  return type;
++}
++
++/*  Return true if TYPE is a type which struct reorg should handled.  */
++
++bool
++handled_type (tree type)
++{
++  type = inner_type (type);
++  if (TREE_CODE (type) == RECORD_TYPE)
++    return !is_va_list_type (type);
++  return false;
++}
++
++} // anon namespace
++
++namespace struct_reorg {
++
++/* Constructor of srfunction. */
++
++srfunction::srfunction (cgraph_node *n)
++  : node (n),
++    old (NULL),
++    newnode (NULL),
++    newf (NULL)
++{
++}
++
++/* Add an ARG to the list of arguments for the function. */
++
++void
++srfunction::add_arg(srdecl *arg)
++{
++  args.safe_push(arg);
++}
++
++/* Dump the SRFUNCTION to the file FILE.  */
++
++void
++srfunction::dump (FILE *file)
++{
++  if (node)
++    {
++      fprintf (file, "function : ");
++      print_generic_expr (file, node->decl);
++      fprintf (file, " with arguments: ");
++      for (unsigned i = 0; i < args.length (); i++)
++	{
++	  if (i == 0)
++	    fprintf (file, "\n  ");
++	  else
++	    fprintf (file, "\n,  ");
++	  args[i]->dump (file);
++	}
++
++      fprintf (file, "\nuses globals: ");
++      for(unsigned i = 0; i < globals.length (); i++)
++	{
++	  fprintf (file, "\n  ");
++          globals[i]->dump (file);
++        }
++
++      fprintf (file, "\ndecls: ");
++    }
++  else
++    fprintf (file, "globals : ");
++
++  for(unsigned i = 0; i < decls.length (); i++)
++    {
++      fprintf (file, "\n  ");
++      decls[i]->dump (file);
++    }
++}
++
++/* Simple dump the SRFUNCTION to the file FILE; used so it is not recusive.  */
++
++void
++srfunction::simple_dump (FILE *file)
++{
++  print_generic_expr (file, node->decl);
++}
++
++
++/* Constructor of FIELD. */
++
++srfield::srfield (tree field, srtype *base)
++  : offset(int_byte_position (field)),
++    fieldtype (TREE_TYPE (field)),
++    fielddecl (field),
++    base(base),
++    type(NULL),
++    clusternum(0)
++{
++  for(int i = 0;i < max_split; i++)
++    newfield[i] = NULL_TREE;
++}
++
++/* Constructor of TYPE. */
++
++srtype::srtype (tree type)
++  : type (type),
++    chain_type (false),
++    escapes (does_not_escape),
++    visited (false)
++{
++  for (int i = 0; i < max_split; i++)
++    newtype[i] = NULL_TREE;
++
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  if (DECL_BIT_FIELD (field))
++	    {
++	      escapes = escape_bitfields;
++	      continue;
++	    }
++	  else if (!DECL_SIZE (field)
++	           || TREE_CODE (DECL_SIZE (field)) != INTEGER_CST)
++	    {
++	      escapes = escape_variable_sized_array;
++	      break;
++	    }
++	  srfield *t = new srfield (field, this);
++	  fields.safe_push(t);
++	}
++    }
++}
++
++/* Mark the type as escaping type E at statement STMT. */
++
++void
++srtype::mark_escape (escape_type e, gimple *stmt)
++{
++  /* Once the type has escaped, it should never
++     change back to non escaping. */
++  gcc_assert (e != does_not_escape);
++  if (has_escaped ())
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nO type: ");
++	  simple_dump (dump_file);
++	  fprintf (dump_file, " has already escaped.");
++          fprintf (dump_file, " old = \"%s\" ", escape_type_string[escapes - 1]);
++          fprintf (dump_file, " new = \"%s\"\n", escape_type_string[e - 1]);
++	  if (stmt)
++	    print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      return;
++    }
++  escapes = e;
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nN type: ");
++      simple_dump (dump_file);
++      fprintf (dump_file, " new = \"%s\"\n", escape_reason ());
++      if (stmt)
++	print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++}
++
++/* Add FIELD to the list of fields that use this type.  */
++
++void
++srtype::add_field_site (srfield *field)
++{
++  field_sites.safe_push(field);
++}
++
++
++/* Constructor of DECL. */
++
++srdecl::srdecl (srtype *tp, tree decl, int argnum)
++  : type (tp),
++    decl (decl),
++    func (NULL_TREE),
++    argumentnum (argnum),
++    visited (false)
++{
++  if (TREE_CODE (decl) == SSA_NAME)
++    func = current_function_decl;
++  else if (!is_global_var (decl))
++    func = DECL_CONTEXT (decl);
++  for(int i = 0;i < max_split; i++)
++    newdecl[i] = NULL_TREE;
++}
++
++/* Find DECL in the function. */
++
++srdecl *
++srfunction::find_decl (tree decl)
++{
++  for (unsigned i = 0; i < decls.length (); i++)
++    if (decls[i]->decl == decl)
++      return decls[i];
++  return NULL;
++}
++
++/* Record DECL of the TYPE with argument num ARG. */
++
++srdecl *
++srfunction::record_decl (srtype *type, tree decl, int arg)
++{
++  // Search for the decl to see if it is already there.
++  srdecl *decl1 = find_decl (decl);
++
++  if (decl1)
++    return decl1;
++
++  gcc_assert (type);
++
++  decl1 = new srdecl (type, decl, arg);
++  decls.safe_push(decl1);
++  return decl1;
++}
++
++/* Find the field at OFF offset.  */
++
++srfield *
++srtype::find_field (unsigned HOST_WIDE_INT off)
++{
++  unsigned int i;
++  srfield *field;
++
++  /* FIXME: handle array/struct field inside the current struct. */
++  /* NOTE This does not need to be fixed to handle libquatumn */
++  FOR_EACH_VEC_ELT (fields, i, field)
++    {
++      if (off == field->offset)
++	return field;
++    }
++  return NULL;
++}
++
++/* Add the function FN to the list of functions if it
++   is there not already. */
++
++void
++srtype::add_function (srfunction *fn)
++{
++  unsigned decluid;
++  unsigned i;
++  decluid = DECL_UID (fn->node->decl);
++
++  srfunction *fn1;
++  // Search for the decl to see if it is already there.
++  FOR_EACH_VEC_ELT (functions, i, fn1)
++    {
++      if (DECL_UID (fn1->node->decl) == decluid)
++        return;
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "Recording new function: %u.\n", decluid);
++
++  functions.safe_push(fn);
++}
++
++/* Dump out the type structure to FILE. */
++
++void
++srtype::dump (FILE *f)
++{
++  unsigned int i;
++  srfield *field;
++  srfunction *fn;
++  sraccess *access;
++
++  if (chain_type)
++    fprintf (f, "chain decl ");
++
++  fprintf (f, "type : ");
++  print_generic_expr (f, type);
++  fprintf (f, "(%d) { ", TYPE_UID (type));
++  if (escapes != does_not_escape)
++    fprintf (f, " escapes = \"%s\"\n", escape_reason ());
++  fprintf (f, " fields = { ");
++  FOR_EACH_VEC_ELT (fields, i, field)
++    {
++      if (i == 0)
++	fprintf (f, "\n  ");
++      else
++	fprintf (f, "\n,  ");
++      field->dump (f);
++    }
++  fprintf (f, " }\n ");
++  fprintf (f, "\n accesses = {");
++  FOR_EACH_VEC_ELT (accesses, i, access)
++    {
++      fprintf (f, "\n");
++      access->dump (f);
++    }
++  fprintf (f, " }\n ");
++  fprintf (f, "\n functions = {");
++  FOR_EACH_VEC_ELT (functions, i, fn)
++    {
++      fprintf (f, "  \n");
++      fn->simple_dump (f);
++    }
++  fprintf (f, "\n }\n");
++  fprintf (f, "\n field_sites = {");
++  FOR_EACH_VEC_ELT (field_sites, i, field)
++    {
++      fprintf (f, "  \n");
++      field->simple_dump (f);
++    }
++  fprintf (f, "\n }\n");
++  fprintf (f, "}\n");
++}
++
++/* A simplified dump out the type structure to FILE. */
++
++void
++srtype::simple_dump (FILE *f)
++{
++  print_generic_expr (f, type);
++}
++
++/* Analyze the type and decide what to be done with it. */
++
++void
++srtype::analyze (void)
++{
++  /* Chain decl types can't be split
++     so don't try. */
++  if (chain_type)
++    return;
++
++  /* If there is only one field then there is nothing
++     to be done. */
++  if (fields.length () == 1)
++    return;
++
++  /*  For now we unconditionally split only structures with 2 fields
++      into 2 different structures.  In future we intend to add profile
++      info and/or static heuristics to differentiate splitting process.  */
++  if (fields.length () == 2)
++    fields[1]->clusternum = 1;
++
++  /* REMOVEME: FIXME: this is here for testing more testcases. */
++  if (fields.length () >= 3)
++    {
++      fields[1]->clusternum = 1;
++    }
++}
++
++/* Create the new fields for this field. */
++
++void
++srfield::create_new_fields (tree newtype[max_split],
++			    tree newfields[max_split],
++			    tree newlast[max_split])
++{
++  tree nt[max_split];
++
++  for (unsigned i = 0; i < max_split; i++)
++    nt[i] = NULL;
++
++  if (type == NULL)
++    nt[0] = fieldtype;
++  else
++    memcpy (nt, type->newtype, sizeof(type->newtype));
++
++  for (unsigned i = 0; i < max_split && nt[i] != NULL; i++)
++    {
++      tree field = make_node (FIELD_DECL);
++      if (nt[1] != NULL && DECL_NAME (fielddecl))
++	{
++	  const char *tname = IDENTIFIER_POINTER (DECL_NAME (fielddecl));
++	  char id[10];
++	  char *name;
++
++	  sprintf(id, "%d", i);
++	  name = concat (tname, ".reorg.", id, NULL);
++	  DECL_NAME (field) = get_identifier (name);
++	  free (name);
++	}
++      else
++	DECL_NAME (field) = DECL_NAME (fielddecl);
++
++      TREE_TYPE (field) = reconstruct_complex_type (TREE_TYPE (fielddecl), nt[i]);
++      DECL_SOURCE_LOCATION (field) = DECL_SOURCE_LOCATION (fielddecl);
++      SET_DECL_ALIGN (field, DECL_ALIGN (fielddecl));
++      DECL_USER_ALIGN (field) = DECL_USER_ALIGN (fielddecl);
++      TREE_ADDRESSABLE (field) = TREE_ADDRESSABLE (fielddecl);
++      DECL_NONADDRESSABLE_P (field) = !TREE_ADDRESSABLE (fielddecl);
++      TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (fielddecl);
++      DECL_CONTEXT (field) = newtype[clusternum];
++
++      if (newfields[clusternum] == NULL)
++	newfields[clusternum] = newlast[clusternum] = field;
++      else
++	{
++	  DECL_CHAIN (newlast[clusternum]) = field;
++	  newlast[clusternum] = field;
++        }
++      newfield[i] = field;
++    }
++
++}
++
++/* Create the new TYPE corresponding to THIS type. */
++
++bool
++srtype::create_new_type (void)
++{
++  /* If the type has been visited,
++     then return if a new type was
++     created or not. */
++  if (visited)
++    return has_new_type ();
++
++  visited = true;
++
++  if (escapes != does_not_escape)
++    {
++      newtype[0] = type;
++      return false;
++    }
++
++  bool createnewtype = false;
++  unsigned maxclusters = 0;
++
++  /* Create a new type for each field. */
++  for (unsigned i = 0; i < fields.length (); i++)
++    {
++      srfield *field = fields[i];
++      if (field->type)
++	createnewtype |= field->type->create_new_type ();
++      if (field->clusternum > maxclusters)
++	maxclusters = field->clusternum;
++    }
++
++  /* If the fields' types did have a change or
++     we are not splitting the struct into two clusters,
++     then just return false and don't change the type. */
++  if (!createnewtype && maxclusters == 0)
++    {
++      newtype[0] = type;
++      return false;
++    }
++
++  /* Should have at most max_split clusters.  */
++  gcc_assert (maxclusters < max_split);
++
++  tree newfields[max_split];
++  tree newlast[max_split];
++
++  maxclusters++;
++
++  const char *tname = NULL;
++
++  if (TYPE_NAME (type) != NULL)
++    {
++      if (TREE_CODE (TYPE_NAME (type)) == IDENTIFIER_NODE)
++        tname = IDENTIFIER_POINTER (TYPE_NAME (type));
++      else if (DECL_NAME (TYPE_NAME (type)) != NULL)
++        tname = IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type)));
++    }
++
++  for (unsigned i = 0; i < maxclusters; i++)
++    {
++      newfields[i] = NULL_TREE;
++      newlast[i] = NULL_TREE;
++      newtype[i] = make_node (RECORD_TYPE);
++
++      char *name = NULL;
++      char id[10];
++      sprintf(id, "%d", i);
++      if (tname) 
++	{
++          name = concat (tname, ".reorg.", id, NULL);
++          TYPE_NAME (newtype[i]) = get_identifier (name);
++          free (name);
++        }
++    }
++
++  for (unsigned i = 0; i < fields.length (); i++)
++    {
++      srfield *f = fields[i];
++      f->create_new_fields (newtype, newfields, newlast);
++    }
++
++
++  /* No reason to warn about these structs since the warning would
++     have happened already.  */
++  int save_warn_padded = warn_padded;
++  warn_padded = 0;
++
++  for (unsigned i = 0; i < maxclusters; i++)
++    {
++      TYPE_FIELDS (newtype[i]) = newfields[i];
++      layout_type (newtype[i]);
++    }
++
++  warn_padded = save_warn_padded;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Created %d types:\n", maxclusters);
++      for (unsigned i = 0; i < maxclusters; i++)
++	{
++	  print_generic_expr (dump_file, newtype[i]);
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  return true;
++}
++
++/* Helper function to copy some attributes from ORIG_DECL to the NEW_DECL. */
++
++static inline void
++copy_var_attributes (tree new_decl, tree orig_decl)
++{
++  DECL_ARTIFICIAL (new_decl) = 1;
++  DECL_EXTERNAL (new_decl) = DECL_EXTERNAL (orig_decl);
++  TREE_STATIC (new_decl) = TREE_STATIC (orig_decl);
++  TREE_PUBLIC (new_decl) = TREE_PUBLIC (orig_decl);
++  TREE_USED (new_decl) = TREE_USED (orig_decl);
++  DECL_CONTEXT (new_decl) = DECL_CONTEXT (orig_decl);
++  TREE_THIS_VOLATILE (new_decl) = TREE_THIS_VOLATILE (orig_decl);
++  TREE_ADDRESSABLE (new_decl) = TREE_ADDRESSABLE (orig_decl);
++  TREE_READONLY (new_decl) = TREE_READONLY (orig_decl);
++  if (is_global_var (orig_decl))
++    set_decl_tls_model (new_decl, DECL_TLS_MODEL (orig_decl));
++}
++
++/* Create all of the new decls (SSA_NAMES included) for THIS function. */
++
++void
++srfunction::create_new_decls (void)
++{
++  /* If this function has been cloned, we don't need to
++     create the new decls. */
++  if (newnode)
++    return;
++
++  if (node)
++    set_cfun (DECL_STRUCT_FUNCTION (node->decl));
++
++  for (unsigned i = 0; i < decls.length (); i++)
++    {
++      srdecl *decl = decls[i];
++      srtype *type = decl->type;
++      /* If the type of the decl does not change,
++	 then don't create a new decl. */
++      if (!type->has_new_type ())
++	{
++	  decl->newdecl[0] = decl->decl;
++	  continue;
++	}
++
++      /* Handle SSA_NAMEs. */
++      if (TREE_CODE (decl->decl) == SSA_NAME)
++	{
++	  tree newtype1[max_split];
++	  tree inner = SSA_NAME_VAR (decl->decl);
++	  tree newinner[max_split];
++	  memset (newinner, 0, sizeof(newinner));
++	  for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
++	    newtype1[j] = reconstruct_complex_type (TREE_TYPE (decls[i]->decl), type->newtype[j]);
++	  if (inner)
++	    {
++	      srdecl *in = find_decl (inner);
++	      gcc_assert (in);
++	      memcpy (newinner, in->newdecl, sizeof(newinner));
++	    }
++	  tree od = decls[i]->decl;
++	  /* Create the new ssa names and copy some attributes from the old one.  */
++	  for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
++	    {
++	      tree nd = make_ssa_name (newinner[j] ? newinner[j] : newtype1[j]);
++	      decl->newdecl[j] = nd;
++	      /* If the old decl was a default defition, handle it specially. */
++	      if (SSA_NAME_IS_DEFAULT_DEF (od))
++		{
++	          SSA_NAME_IS_DEFAULT_DEF (nd) = true;
++		  SSA_NAME_DEF_STMT (nd) = gimple_build_nop ();
++
++		  /* Set the default definition for the ssaname if needed. */
++		  if (inner)
++		    {
++		      gcc_assert (newinner[j]);
++		      set_ssa_default_def (cfun, newinner[j], nd);
++		    }
++		}
++	      SSA_NAME_OCCURS_IN_ABNORMAL_PHI (nd)
++		= SSA_NAME_OCCURS_IN_ABNORMAL_PHI (od);
++	      statistics_counter_event (cfun, "Create new ssa_name", 1);
++	    }
++	}
++      else if (TREE_CODE (decls[i]->decl) == VAR_DECL)
++	{
++	 tree orig_var = decl->decl;
++	 const char *tname = NULL;
++	 if (DECL_NAME (orig_var))
++	   tname = IDENTIFIER_POINTER (DECL_NAME (orig_var));
++	 for (unsigned j = 0; j < max_split && type->newtype[j]; j++)
++	   {
++	      tree new_name = NULL;
++	      char *name = NULL;
++	      char id[10];
++	      sprintf(id, "%d", j);
++	      if (tname)
++	        {
++	          name = concat (tname, ".reorg.", id, NULL);
++		  new_name = get_identifier (name);
++		  free (name);
++		}
++	      tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), type->newtype[j]);
++	      decl->newdecl[j] = build_decl (DECL_SOURCE_LOCATION (orig_var),
++					     VAR_DECL, new_name, newtype1);
++	      copy_var_attributes (decl->newdecl[j], orig_var);
++	      if (!is_global_var (orig_var))
++		add_local_decl (cfun, decl->newdecl[j]);
++	      else
++		varpool_node::add (decl->newdecl[j]);
++	      statistics_counter_event (cfun, "Create new var decl", 1);
++	    }
++        }
++      /* Paramater decls are already handled in create_new_functions. */
++      else if (TREE_CODE (decls[i]->decl) == PARM_DECL)
++	;
++      else
++	internal_error ("Unhandled decl type stored");
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Created New decls for decl:\n");
++	  fprintf (dump_file, "\n");
++	  decls[i]->dump (dump_file);
++	  fprintf (dump_file, "\n");
++	  for (unsigned j = 0; j < max_split && decls[i]->newdecl[j]; j++)
++	    {
++	      print_generic_expr (dump_file, decls[i]->newdecl[j]);
++	      fprintf (dump_file, "\n");
++	    }
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  set_cfun (NULL);
++
++}
++
++/* Dump out the field structure to FILE. */
++
++void
++srfield::dump (FILE *f)
++{
++  fprintf (f, "field (%d) { ", DECL_UID (fielddecl));
++  fprintf (f, "base = ");
++  base->simple_dump (f);
++  fprintf (f, ", offset = " HOST_WIDE_INT_PRINT_DEC, offset);
++  fprintf (f, ", type = ");
++  print_generic_expr (f, fieldtype);
++  if (type)
++    {
++      fprintf (f, "( srtype = ");
++      type->simple_dump (f);
++      fprintf (f, ")");
++    }
++  fprintf (f, "\n}\n");
++}
++
++
++/* A simplified dump out the field structure to FILE. */
++
++void
++srfield::simple_dump (FILE *f)
++{
++  fprintf (f, "field (%d)", DECL_UID (fielddecl));
++}
++
++/* Dump out the access structure to FILE. */
++
++void
++sraccess::dump (FILE *f)
++{
++  fprintf (f, "access { ");
++  fprintf (f, "type = '(");
++  type->simple_dump (f);
++  fprintf (f, ")'");
++  if (field)
++    {
++      fprintf (f, ", field = '(");
++      field->simple_dump (f);
++      fprintf (f, ")'");
++    }
++  else
++    fprintf (f, ", whole type");
++  fprintf (f, " in function: %s/%d", node->name (), node->order);
++  fprintf (f, ", stmt:\n");
++  print_gimple_stmt (f, stmt, 0);
++  fprintf (f, "\n }\n");
++  
++}
++
++/* Dump out the decl structure to FILE. */
++
++void
++srdecl::dump (FILE *file)
++{
++  if (!func)
++    fprintf (file, "global ");
++  if (argumentnum != -1)
++    fprintf (file, "argument(%d) ", argumentnum);
++  fprintf (file, "decl: ");
++  print_generic_expr (file, decl);
++  fprintf (file, " type: ");
++  type->simple_dump (file);
++}
++
++} // namespace struct_reorg
++
++namespace {
++
++struct ipa_struct_reorg
++{
++  // Constructors
++  ipa_struct_reorg(void)
++    : current_function (NULL),
++      done_recording(false)
++  {
++  }
++
++  // public methods
++  unsigned execute(void);
++  void mark_type_as_escape (tree type, escape_type, gimple *stmt = NULL);
++private:
++  // fields
++  auto_vec_del<srtype> types;
++  auto_vec_del<srfunction> functions;
++  srglobal globals;
++  srfunction *current_function;
++
++  bool done_recording;
++
++  // private methods
++  void dump_types (FILE *f);
++  void dump_types_escaped (FILE *f);
++  void dump_functions (FILE *f);
++  void record_accesses (void);
++  void detect_cycles (void);
++  bool walk_field_for_cycles (srtype*);
++  void prune_escaped_types (void);
++  void propagate_escape (void);
++  void analyze_types (void);
++  void clear_visited (void);
++  bool create_new_types (void);
++  void create_new_decls (void);
++  srdecl *find_decl (tree);
++  void create_new_functions (void);
++  void create_new_args (cgraph_node *new_node);
++  unsigned rewrite_functions (void);
++  srdecl *record_var (tree decl, escape_type escapes = does_not_escape, int arg = -1);
++  srfunction *record_function (cgraph_node *node);
++  srfunction *find_function (cgraph_node *node);
++  srtype *record_type (tree type);
++  void process_union (tree type);
++  srtype *find_type (tree type);
++  void maybe_record_stmt (cgraph_node *, gimple *);
++  void maybe_record_assign (cgraph_node *, gassign *);
++  void maybe_record_call (cgraph_node *, gcall *);
++  void maybe_record_allocation_site (cgraph_node *, gimple *);
++  void record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt);
++  void mark_expr_escape(tree, escape_type, gimple *stmt);
++  tree allocate_size (srtype *t, gimple *stmt);
++
++  void mark_decls_in_as_not_needed (tree fn);
++
++  bool rewrite_stmt (gimple*, gimple_stmt_iterator *);
++  bool rewrite_assign (gassign *, gimple_stmt_iterator *);
++  bool rewrite_call (gcall *, gimple_stmt_iterator *);
++  bool rewrite_cond (gcond *, gimple_stmt_iterator *);
++  bool rewrite_debug (gimple *, gimple_stmt_iterator *);
++  bool rewrite_phi (gphi *);
++  bool rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_missing_decl = false);
++  bool rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], tree newrhs[max_split]);
++  bool get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create = false, bool can_escape = false);
++  bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t);
++
++  void check_definition (srdecl *decl, vec<srdecl*>&);
++  void check_uses (srdecl *decl, vec<srdecl*>&);
++  void check_use (srdecl *decl, gimple *stmt, vec<srdecl*>&);
++  void check_type_and_push (tree newdecl, srtype *type, vec<srdecl*> &worklist, gimple *stmt);
++  void check_other_side (srdecl *decl, tree other, gimple *stmt, vec<srdecl*> &worklist);
++
++  void find_vars (gimple *stmt);
++  void find_var (tree expr, gimple *stmt);
++  void mark_types_asm (gasm *astmt);
++
++  bool has_rewritten_type (srfunction*);
++  void maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt);
++};
++
++/* Dump all of the recorded types to file F. */
++
++void
++ipa_struct_reorg::dump_types (FILE *f)
++{
++  unsigned i;
++  srtype *type;
++  FOR_EACH_VEC_ELT (types, i, type)
++    {
++      type->dump(f);
++    }
++  fprintf (f, "\n");
++}
++
++/* Dump all of the recorded types to file F. */
++
++void
++ipa_struct_reorg::dump_types_escaped (FILE *f)
++{
++  unsigned i;
++  srtype *type;
++  FOR_EACH_VEC_ELT (types, i, type)
++    {
++      if (type->has_escaped ())
++	{
++	  type->simple_dump (f);
++	  fprintf (f, " has escaped: \"%s\"\n", type->escape_reason());
++	}
++    }
++  fprintf (f, "\n");
++}
++
++
++/* Dump all of the record functions to file F. */
++
++void
++ipa_struct_reorg::dump_functions (FILE *f)
++{
++  unsigned i;
++  srfunction *fn;
++
++  fprintf (f, "\n\n");
++  globals.dump (f);
++  fprintf (f, "\n\n");
++  FOR_EACH_VEC_ELT (functions, i, fn)
++    {
++      fn->dump(f);
++      fprintf (f, "\n");
++    }
++  fprintf (f, "\n\n");
++}
++
++/* Find the recorded srtype corresponding to TYPE.  */
++
++srtype *
++ipa_struct_reorg::find_type (tree type)
++{
++  unsigned i;
++  /* Get the main variant as we are going
++     to find that type only. */
++  type = TYPE_MAIN_VARIANT (type);
++
++  srtype *type1;
++  // Search for the type to see if it is already there.
++  FOR_EACH_VEC_ELT (types, i, type1)
++    {
++      if (types_compatible_p (type1->type, type))
++	return type1;
++    }
++  return NULL;
++}
++
++/* Is TYPE a volatile type or one which points
++   to a volatile type. */
++
++bool isvolatile_type (tree type)
++{
++  if (TYPE_VOLATILE (type))
++    return true;
++  while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
++    {
++      type = TREE_TYPE (type);
++      if (TYPE_VOLATILE (type))
++        return true;
++    }
++  return false;
++}
++
++/* Is TYPE an array type or points to an array type. */
++
++bool isarraytype (tree type)
++{
++  if (TREE_CODE (type) == ARRAY_TYPE)
++    return true;
++  while (POINTER_TYPE_P (type))
++    {
++      type = TREE_TYPE (type);
++      if (TREE_CODE (type) == ARRAY_TYPE)
++        return true;
++    }
++  return false;
++}
++
++/*  Is TYPE a pointer to another pointer. */
++
++bool isptrptr (tree type)
++{
++  bool firstptr = false;
++  while (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
++    {
++      if (POINTER_TYPE_P (type))
++	{
++	  if (firstptr)
++	    return true;
++	  firstptr = true;
++	}
++      type = TREE_TYPE (type);
++    }
++  return false;
++}
++
++/* Return the escape type which corresponds to if
++   this is an volatile type, an array type or a pointer
++   to a pointer type.  */
++
++escape_type escape_type_volatile_array_or_ptrptr (tree type)
++{
++  if (isvolatile_type (type))
++    return escape_volatile;
++  if (isarraytype (type))
++    return escape_array;
++  if (isptrptr (type))
++    return escape_ptr_ptr;
++  return does_not_escape;
++}
++
++/* Record TYPE if not already recorded. */
++
++srtype *
++ipa_struct_reorg::record_type (tree type)
++{
++  unsigned typeuid;
++
++  /* Get the main variant as we are going
++     to record that type only. */
++  type = TYPE_MAIN_VARIANT (type);
++  typeuid = TYPE_UID (type);
++
++  srtype *type1;
++
++  type1 = find_type (type);
++  if (type1)
++    return type1;
++
++  /* If already done recording just return NULL. */
++  if (done_recording)
++    return NULL;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "Recording new type: %u.\n", typeuid);
++
++  type1 = new srtype (type);
++  types.safe_push(type1);
++
++  /* If the type has an user alignment set,
++     that means the user most likely already setup the type. */
++  if (TYPE_USER_ALIGN (type))
++    type1->mark_escape (escape_user_alignment, NULL);
++
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++        {
++	  tree t = TREE_TYPE (field);
++	  process_union (t);
++	  if (isvolatile_type (t))
++	    type1->mark_escape (escape_volatile, NULL);
++	  escape_type e = escape_type_volatile_array_or_ptrptr (t);
++	  if (e != does_not_escape)
++	    type1->mark_escape (e, NULL);
++	  if (handled_type (t))
++	    {
++	      srtype *t1 = record_type (inner_type (t));
++	      srfield *f = type1->find_field (int_byte_position (field));
++	      /* We might have an variable sized type which we don't set the handle. */
++	      if (f)
++		{
++		  f->type = t1;
++		  t1->add_field_site (f);
++		}
++	      if (t1 == type1)
++		type1->mark_escape (escape_rescusive_type, NULL);
++	    }
++        }
++    }
++
++  return type1;
++}
++
++/* Mark TYPE as escaping with ESCAPES as the reason.  */
++
++void
++ipa_struct_reorg::mark_type_as_escape (tree type, escape_type escapes, gimple *stmt)
++{
++  if (handled_type (type))
++    {
++      srtype *stype = record_type (inner_type (type));
++
++      if (!stype)
++	return;
++
++      stype->mark_escape (escapes, stmt);
++    }
++}
++
++/* Maybe process the union of type TYPE, such that marking all of the fields'
++   types as being escaping. */
++
++void
++ipa_struct_reorg::process_union (tree type)
++{
++  static hash_set<tree> unions_recorded;
++
++  type = inner_type (type);
++  if (TREE_CODE (type) != UNION_TYPE
++      && TREE_CODE (type) != QUAL_UNION_TYPE)
++    return;
++
++  type = TYPE_MAIN_VARIANT (type);
++
++  /* We already processed this type. */
++  if (unions_recorded.add (type))
++    return;
++
++  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
++    {
++      if (TREE_CODE (field) == FIELD_DECL)
++	{
++	  mark_type_as_escape (TREE_TYPE (field), escape_union);
++	  process_union (TREE_TYPE (field));
++	}
++    }
++}
++
++/*  Used by record_var function as a callback to walk_tree.
++    Mark the type as escaping if it has expressions which
++    cannot be converted for global initializations. */
++
++static tree
++record_init_types (tree *tp, int *walk_subtrees, void *data)
++{
++  ipa_struct_reorg *c = (ipa_struct_reorg *)data;
++  switch (TREE_CODE (*tp))
++    {
++      CASE_CONVERT:
++      case COMPONENT_REF:
++      case VIEW_CONVERT_EXPR:
++      case ARRAY_REF:
++	{
++	  tree typeouter = TREE_TYPE (*tp);
++	  tree typeinner = TREE_TYPE (TREE_OPERAND (*tp, 0));
++	  c->mark_type_as_escape (typeouter, escape_via_global_init);
++	  c->mark_type_as_escape (typeinner, escape_via_global_init);
++	  break;
++	}
++      case INTEGER_CST:
++	if (!integer_zerop (*tp))
++	  c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init);
++	break;
++     case VAR_DECL:
++     case PARM_DECL:
++     case FIELD_DECL:
++	c->mark_type_as_escape (TREE_TYPE (*tp), escape_via_global_init);
++	*walk_subtrees = false;
++	break;
++     default:
++	*walk_subtrees = true;
++	break;
++    }
++  return NULL_TREE;
++}
++
++/* Record var DECL; optionally specify the escape reason and the argument
++   number in a function. */
++
++srdecl *
++ipa_struct_reorg::record_var (tree decl, escape_type escapes, int arg)
++{
++  srtype *type;
++  srdecl *sd = NULL;
++
++  process_union (TREE_TYPE (decl));
++
++  /* */
++  if (handled_type (TREE_TYPE (decl)))
++    {
++      type = record_type (inner_type (TREE_TYPE (decl)));
++      escape_type e;
++
++      if (done_recording && !type)
++	return NULL;
++
++      gcc_assert (type);
++      if (TREE_CODE (decl) == VAR_DECL && is_global_var (decl))
++	sd = globals.record_decl (type, decl, arg);
++      else
++	{
++	  gcc_assert (current_function);
++          sd = current_function->record_decl (type, decl, arg);
++	}
++
++      /* If the variable has the "used" attribute, then treat the type as escaping. */
++      if (escapes != does_not_escape)
++	e = escapes;
++      else if (TREE_CODE (decl) != SSA_NAME && DECL_PRESERVE_P (decl))
++	e = escape_marked_as_used;
++      else if (TREE_THIS_VOLATILE (decl))
++	e = escape_volatile;
++      else if (TREE_CODE (decl) != SSA_NAME && DECL_USER_ALIGN (decl))
++	e = escape_user_alignment;
++      else if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) && TREE_PUBLIC (decl))
++	e = escape_via_global_var;
++      /* We don't have an initlizer. */
++      else if (TREE_CODE (decl) != SSA_NAME && DECL_INITIAL (decl) == error_mark_node)
++	e = escape_via_global_var;
++      else
++	e = escape_type_volatile_array_or_ptrptr (TREE_TYPE (decl));
++
++      if (e != does_not_escape)
++	type->mark_escape (e, NULL);
++    }
++
++  /* Record the initial usage of variables as types escapes.  */
++  if (TREE_CODE (decl) != SSA_NAME && TREE_STATIC (decl) && DECL_INITIAL (decl))
++    {
++      walk_tree_without_duplicates (&DECL_INITIAL (decl), record_init_types, this);
++      if (!integer_zerop (DECL_INITIAL (decl))
++	  && DECL_INITIAL (decl) != error_mark_node)
++	mark_type_as_escape (TREE_TYPE (decl), escape_via_global_init);
++    }
++  return sd;
++}
++
++/* Find void* ssa_names which are used inside MEM[] or if we have &a.c,
++   mark the type as escaping. */
++
++void
++ipa_struct_reorg::find_var (tree expr, gimple *stmt)
++{
++  /* If we have VCE<a> mark the outer type as escaping and the inner one
++     Also mark the inner most operand.  */
++  if (TREE_CODE (expr) == VIEW_CONVERT_EXPR)
++    {
++      mark_type_as_escape (TREE_TYPE (expr), escape_vce, stmt);
++      mark_type_as_escape (TREE_TYPE (TREE_OPERAND (expr, 0)),
++			   escape_vce, stmt);
++    }
++
++  /* If we have &b.c then we need to mark the type of b
++     as escaping as tracking a will be hard.  */
++  if (TREE_CODE (expr) == ADDR_EXPR
++      || TREE_CODE (expr) == VIEW_CONVERT_EXPR)
++    {
++      tree r = TREE_OPERAND (expr, 0);
++      if (handled_component_p (r)
++          || TREE_CODE (r) == MEM_REF)
++        {
++          while (handled_component_p (r)
++		 || TREE_CODE (r) == MEM_REF)
++	    {
++	      if (TREE_CODE (r) == VIEW_CONVERT_EXPR)
++		{
++		  mark_type_as_escape (TREE_TYPE (r), escape_vce, stmt);
++		  mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 0)),
++				       escape_vce, stmt);
++		}
++	      if (TREE_CODE (r) == MEM_REF)
++		mark_type_as_escape (TREE_TYPE (TREE_OPERAND (r, 1)),
++				     escape_addr, stmt);
++              r = TREE_OPERAND (r, 0);
++	    }
++          mark_expr_escape (r, escape_addr, stmt);
++        }
++    }
++
++  tree base;
++  bool indirect;
++  srtype *type;
++  srfield *field;
++  bool realpart, imagpart, address;
++  get_type_field (expr, base, indirect, type, field,
++		  realpart, imagpart, address, true, true);
++}
++
++
++void
++ipa_struct_reorg::find_vars (gimple *stmt)
++{
++  gasm *astmt;
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS
++          || gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++	{
++	  tree lhs = gimple_assign_lhs (stmt);
++	  tree rhs = gimple_assign_rhs1 (stmt);
++	  find_var (gimple_assign_lhs (stmt), stmt);
++	  find_var (gimple_assign_rhs1 (stmt), stmt);
++	  if (TREE_CODE (lhs) == SSA_NAME
++	      && VOID_POINTER_P (TREE_TYPE (lhs))
++	      && handled_type (TREE_TYPE (rhs)))
++	    {
++	      srtype *t = find_type (inner_type (TREE_TYPE (rhs)));
++	      srdecl *d = find_decl (lhs);
++	      if (!d && t)
++		current_function->record_decl (t, lhs, -1);
++	    }
++	  if (TREE_CODE (rhs) == SSA_NAME
++	      && VOID_POINTER_P (TREE_TYPE (rhs))
++	      && handled_type (TREE_TYPE (lhs)))
++	    {
++	      srtype *t = find_type (inner_type (TREE_TYPE (lhs)));
++	      srdecl *d = find_decl (rhs);
++	      if (!d && t)
++		current_function->record_decl (t, rhs, -1);
++	    }
++	}
++      break;
++
++    case GIMPLE_CALL:
++      if (gimple_call_lhs (stmt))
++	find_var (gimple_call_lhs (stmt), stmt);
++
++      if (gimple_call_chain (stmt))
++	find_var (gimple_call_chain (stmt), stmt);
++
++      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++	find_var (gimple_call_arg (stmt, i), stmt);
++      break;
++
++    case GIMPLE_ASM:
++      astmt = as_a <gasm*>(stmt);
++      for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++)
++	find_var (TREE_VALUE (gimple_asm_input_op (astmt, i)), stmt);
++      for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++)
++	find_var (TREE_VALUE (gimple_asm_output_op (astmt, i)), stmt);
++      mark_types_asm (astmt);
++      break;
++
++    case GIMPLE_RETURN:
++      {
++	tree expr = gimple_return_retval (as_a<greturn*>(stmt));
++	if (expr)
++          find_var (expr, stmt);
++	/* return &a; should mark the type of a as escaping through a return. */
++	if (expr && TREE_CODE (expr) == ADDR_EXPR)
++	  {
++	    expr = TREE_OPERAND (expr, 0);
++	    srdecl *d = find_decl (expr);
++	    if (d)
++	      d->type->mark_escape (escape_return, stmt);
++	  }
++      }
++      break;
++
++    default:
++      break;
++    }
++}
++
++/* Maybe record access of statement for further analaysis. */
++
++void
++ipa_struct_reorg::maybe_record_stmt (cgraph_node *node, gimple *stmt)
++{
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      maybe_record_assign (node, as_a <gassign *> (stmt));
++      break;
++    case GIMPLE_CALL:
++      maybe_record_call (node, as_a <gcall *> (stmt));
++      break;
++    case GIMPLE_DEBUG:
++      break;
++    case GIMPLE_GOTO:
++    case GIMPLE_SWITCH:
++      break;
++    default:
++      break;
++    }
++}
++
++/* This function checks whether ARG is a result of multiplication
++   of some number by STRUCT_SIZE. If yes, the function returns true
++   and this number is filled into NUM.  */
++
++static bool
++is_result_of_mult (tree arg, tree *num, tree struct_size)
++{
++  if (!struct_size
++      || TREE_CODE (struct_size) != INTEGER_CST
++      || integer_zerop (struct_size))
++    return false;
++
++  /* If we have a integer, just check if it is a multiply of STRUCT_SIZE. */
++  if (TREE_CODE (arg) == INTEGER_CST)
++    {
++      if (integer_zerop (size_binop (FLOOR_MOD_EXPR, arg, struct_size)))
++	{
++	  *num = size_binop (FLOOR_DIV_EXPR, arg, struct_size);
++	  return true;
++	}
++      return false;
++    }
++  gimple *size_def_stmt = SSA_NAME_DEF_STMT (arg);
++
++  /* If the allocation statement was of the form
++     D.2229_10 = <alloc_func> (D.2228_9);
++     then size_def_stmt can be D.2228_9 = num.3_8 * 8;  */    
++
++  while (size_def_stmt && is_gimple_assign (size_def_stmt))
++    {
++      tree lhs = gimple_assign_lhs (size_def_stmt);
++
++      /* We expect temporary here.  */
++      if (!is_gimple_reg (lhs))
++	return false;
++
++      // FIXME: this should handle SHIFT also.
++      if (gimple_assign_rhs_code (size_def_stmt) == PLUS_EXPR)
++	{
++	  tree num1, num2;
++	  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++	  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
++	  if (!is_result_of_mult (arg0, &num1, struct_size))
++	    return false;
++	  if (!is_result_of_mult (arg1, &num2, struct_size))
++	    return false;
++	  *num = size_binop (PLUS_EXPR, num1, num2);
++	  return true;
++	}
++      if (gimple_assign_rhs_code (size_def_stmt) == MULT_EXPR)
++	{
++	  tree arg0 = gimple_assign_rhs1 (size_def_stmt);
++	  tree arg1 = gimple_assign_rhs2 (size_def_stmt);
++	  tree num1;
++
++	  if (is_result_of_mult (arg0, &num1, struct_size))
++	    {
++	      *num = size_binop (MULT_EXPR, arg1, num1);
++	      return true;
++	    }
++	  if (is_result_of_mult (arg1, &num1, struct_size))
++	    {
++	      *num = size_binop (MULT_EXPR, arg0, num1);
++	      return true;
++	    }
++
++	  *num = NULL_TREE;
++	  return false;
++	}
++      else if (gimple_assign_rhs_code (size_def_stmt) == SSA_NAME)
++	{
++	  arg = gimple_assign_rhs1 (size_def_stmt);
++	  size_def_stmt = SSA_NAME_DEF_STMT (arg);
++	}
++      else
++	{
++	  *num = NULL_TREE;
++	  return false;
++	}	
++    }
++
++  *num = NULL_TREE;
++  return false;
++}
++
++/* Return TRUE if STMT is an allocation statement that is handled. */
++
++static bool
++handled_allocation_stmt (gimple *stmt)
++{
++  if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++      || gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++      || gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)
++      || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC)
++      || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)
++      || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA_WITH_ALIGN))
++    return true;
++  return false;
++}
++
++
++/* Returns the allocated size / T size for STMT.  That is the number of
++   elements in the array allocated.   */
++
++tree
++ipa_struct_reorg::allocate_size (srtype *type, gimple *stmt)
++{
++  if (!stmt
++      || gimple_code (stmt) != GIMPLE_CALL
++      || !handled_allocation_stmt (stmt))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nNot a allocate statment:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      return NULL;
++    }
++
++  if (type->has_escaped ())
++    return NULL;
++
++  tree struct_size = TYPE_SIZE_UNIT (type->type);
++
++  tree size = gimple_call_arg (stmt, 0);
++
++  if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)
++      || gimple_call_builtin_p (stmt, BUILT_IN_ALIGNED_ALLOC))
++    size = gimple_call_arg (stmt, 1);
++  else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++    {
++      tree arg1;
++      arg1 = gimple_call_arg (stmt, 1);
++      /* Check that second argument is a constant equal to the size of structure.  */
++      if (operand_equal_p (arg1, struct_size, 0))
++	return size;
++      /* Check that first argument is a constant equal to the size of structure.  */
++      if (operand_equal_p (size, struct_size, 0))
++	return arg1;
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\ncalloc the correct size:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      return NULL;
++    }
++
++  tree num;
++  if (!is_result_of_mult (size, &num, struct_size))
++    return NULL;
++
++  return num;
++
++}
++
++
++void
++ipa_struct_reorg::maybe_mark_or_record_other_side (tree side, tree other, gimple *stmt)
++{
++  gcc_assert (TREE_CODE (side) == SSA_NAME || TREE_CODE (side) == ADDR_EXPR);
++  srtype *type = NULL;
++  if (handled_type (TREE_TYPE (other)))
++    type = record_type (inner_type (TREE_TYPE (other)));
++  if (TREE_CODE (side) == ADDR_EXPR)
++    side = TREE_OPERAND (side, 0);
++  srdecl *d = find_decl (side);
++  if (!type)
++    {
++      if (!d)
++	return;
++      if (TREE_CODE (side) == SSA_NAME
++	  && VOID_POINTER_P (TREE_TYPE (side)))
++	return;
++      d->type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++
++  if (!d)
++    {
++      if (VOID_POINTER_P (TREE_TYPE (side))
++	  && TREE_CODE (side) == SSA_NAME)
++	current_function->record_decl (type, side, -1);
++      else
++	type->mark_escape (escape_cast_another_ptr, stmt);
++    }
++  else if (type != d->type)
++    {
++      type->mark_escape (escape_cast_another_ptr, stmt);
++      d->type->mark_escape (escape_cast_another_ptr, stmt);
++    }
++}
++
++/* Record accesses in an assignment statement STMT.  */
++
++void
++ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
++{
++
++  /*  */
++
++  if (gimple_clobber_p (stmt))
++    {
++      record_stmt_expr (gimple_assign_lhs (stmt), node, stmt);
++      return;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree num;
++      if (!handled_type (TREE_TYPE (lhs)))
++	return;
++      /* Check if rhs2 is a multiplication of the size of the type. */
++      if (is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)))))
++	{
++	  record_stmt_expr (lhs, node, stmt);
++	  record_stmt_expr (rhs1, node, stmt);
++	}
++      else
++	{
++	  mark_expr_escape (lhs, escape_non_multiply_size, stmt);
++	  mark_expr_escape (rhs1, escape_non_multiply_size, stmt);
++	}
++      return;
++    }
++  /* Copies, References, Taking addresses. */
++  if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs = gimple_assign_rhs1 (stmt);
++      /* If we have a = &b.c then we need to mark the type of b
++         as escaping as tracking a will be hard.  */
++      if (TREE_CODE (rhs) == ADDR_EXPR)
++	{
++	  tree r = TREE_OPERAND (rhs, 0);
++	  if (handled_component_p (r))
++	    {
++	      while (handled_component_p (r))
++		r = TREE_OPERAND (r, 0);
++	      mark_expr_escape (r, escape_addr, stmt);
++	      return;
++	    }
++	}
++      if ((TREE_CODE (rhs) == SSA_NAME || TREE_CODE (rhs) == ADDR_EXPR))
++	maybe_mark_or_record_other_side (rhs, lhs, stmt);
++      if (TREE_CODE (lhs) == SSA_NAME)
++	maybe_mark_or_record_other_side (lhs, rhs, stmt);
++    }
++}
++
++tree
++get_ref_base_and_offset (tree &e, HOST_WIDE_INT &offset, bool &realpart, bool &imagpart, tree &accesstype)
++{
++  offset = 0;
++  realpart = false;
++  imagpart = false;
++  accesstype = NULL_TREE;
++  if (TREE_CODE (e) == REALPART_EXPR)
++    {
++      e = TREE_OPERAND (e, 0);
++      realpart = true;
++    }
++  if (TREE_CODE (e) == IMAGPART_EXPR)
++    {
++      e = TREE_OPERAND (e, 0);
++      imagpart = true;
++    }
++  tree expr = e;
++  while (true)
++    {
++      switch (TREE_CODE (expr))
++	{
++	  case COMPONENT_REF:
++	  {
++	    tree field = TREE_OPERAND (expr, 1);
++	    tree field_off = byte_position (field);
++	    if (TREE_CODE (field_off) != INTEGER_CST)
++	      return NULL;
++	    offset += tree_to_shwi (field_off);
++	    expr = TREE_OPERAND (expr, 0);
++	    accesstype = NULL;
++	    break;
++	  }
++	  case MEM_REF:
++	  {
++	    tree field_off = TREE_OPERAND (expr, 1);
++	    gcc_assert (TREE_CODE (field_off) == INTEGER_CST);
++	    /* So we can mark the types as escaping if different. */
++	    accesstype = TREE_TYPE (field_off);
++	    offset += tree_to_uhwi (field_off);
++	    return TREE_OPERAND (expr, 0);
++	  }
++	  default:
++	    return expr;
++	}
++    }
++}
++
++/* Return true if EXPR was accessing the whole type T.  */
++
++bool
++ipa_struct_reorg::wholeaccess (tree expr, tree base, tree accesstype, srtype *t)
++{
++  if (expr == base)
++    return true;
++
++  if (TREE_CODE (expr) == ADDR_EXPR && TREE_OPERAND (expr, 0) == base)
++    return true;
++
++  if (!accesstype)
++    return false;
++
++  if (!types_compatible_p (TREE_TYPE (expr), TREE_TYPE (accesstype)))
++    return false;
++
++  if (!handled_type (TREE_TYPE (expr)))
++    return false;
++
++  srtype *other_type = find_type (inner_type (TREE_TYPE (expr)));
++
++  if (t == other_type)
++    return true;
++
++  return false;
++}
++
++bool
++ipa_struct_reorg::get_type_field (tree expr, tree &base, bool &indirect, srtype *&type, srfield *&field, bool &realpart, bool &imagpart, bool &address, bool should_create, bool can_escape)
++{
++  HOST_WIDE_INT offset;
++  tree accesstype;
++  address = false;
++  bool mark_as_bit_field = false;
++
++  if (TREE_CODE (expr) == BIT_FIELD_REF)
++    {
++      expr = TREE_OPERAND (expr, 0);
++      mark_as_bit_field = true;
++    }
++
++  base = get_ref_base_and_offset (expr, offset, realpart, imagpart, accesstype);
++
++  /* Variable access, unkown type. */
++  if (base == NULL)
++    return false;
++
++  if (TREE_CODE (base) == ADDR_EXPR)
++    {
++      address = true;
++      base = TREE_OPERAND (base, 0);
++    }
++
++  if (offset != 0 && accesstype)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Non zero offset (%d) with MEM.\n", (int)offset);
++	  print_generic_expr (dump_file, expr);
++	  fprintf (dump_file, "\n");
++	  print_generic_expr (dump_file, base);
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  srdecl *d = find_decl (base);
++  srtype *t;
++
++  if (integer_zerop (base))
++    {
++      gcc_assert (!d);
++      if (!accesstype)
++	return false;
++      t = find_type (inner_type (inner_type (accesstype)));
++      if (!t && should_create && handled_type (accesstype))
++	t = record_type (inner_type (accesstype));
++      if (!t)
++	return false;
++    }
++  else if (!d && accesstype)
++    {
++      if (!should_create)
++	return false;
++      if (!handled_type (accesstype))
++	return false;
++      t = find_type (inner_type (inner_type (accesstype)));
++      if (!t)
++	t = record_type (inner_type (accesstype));
++      if (!t || t->has_escaped ())
++	return false;
++      /* If base is not void* mark the type as escaping. */
++      if (!VOID_POINTER_P (TREE_TYPE (base)))
++	{
++          gcc_assert (can_escape);
++	  t->mark_escape (escape_cast_another_ptr, NULL);
++	  return false;
++	}
++      if (TREE_CODE (base) == SSA_NAME)
++	current_function->record_decl (t, base, -1);
++    }
++  else if (!d)
++    return false;
++  else
++    t = d->type;
++
++  if (t->has_escaped ())
++    return false;
++
++  if (mark_as_bit_field)
++    {
++      gcc_assert (can_escape);
++      t->mark_escape (escape_bitfields, NULL);
++      return false;
++    }
++
++  if (wholeaccess (expr, base, accesstype, t))
++    {
++      field = NULL;
++      type = t;
++      indirect = accesstype != NULL;
++      return true;
++    }
++
++  srfield *f = t->find_field (offset);
++  if (!f)
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nunkown field\n");
++	  print_generic_expr (dump_file, expr);
++	  fprintf (dump_file, "\n");
++	  print_generic_expr (dump_file, base);
++	  fprintf (dump_file, "\n");
++	}
++      gcc_assert (can_escape);
++      t->mark_escape (escape_unkown_field, NULL);
++      return false;
++    }
++  if (!types_compatible_p (f->fieldtype, TREE_TYPE (expr)))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nfieldtype = ");
++	  print_generic_expr (dump_file, f->fieldtype);
++	  fprintf (dump_file, "\naccess type = ");
++	  print_generic_expr (dump_file, TREE_TYPE (expr));
++	  fprintf (dump_file, "original expr = ");
++	  print_generic_expr (dump_file, expr);
++	  fprintf (dump_file, "\n");
++	}
++      gcc_assert (can_escape);
++      t->mark_escape (escape_unkown_field, NULL);
++      return false;
++    }
++  field = f;
++  type = t;
++  indirect = accesstype != NULL;
++  return true;
++}
++
++/* Mark the type used in EXPR as escaping. */
++
++void
++ipa_struct_reorg::mark_expr_escape (tree expr, escape_type escapes, gimple *stmt)
++{
++  tree base;
++  bool indirect;
++  srtype *type;
++  srfield *field;
++  bool realpart, imagpart, address;
++  if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart, address))
++    return;
++
++  type->mark_escape (escapes, stmt);
++}
++
++/* Record accesses in a call statement STMT.  */
++
++void
++ipa_struct_reorg::maybe_record_call (cgraph_node *node, gcall *stmt)
++{
++  tree argtype;
++  tree fndecl;
++  escape_type escapes = does_not_escape;
++  bool free_or_realloc = gimple_call_builtin_p (stmt, BUILT_IN_FREE)
++			 || gimple_call_builtin_p (stmt, BUILT_IN_REALLOC);
++
++  /* We check allocation sites in a different location. */
++  if (handled_allocation_stmt (stmt))
++    return;
++
++
++  /* A few cases here:
++     1) assigned from the lhs
++     2) Used in argument
++     If a function being called is global (or indirect)
++      then we reject the types as being escaping. */
++
++  if (tree chain = gimple_call_chain (stmt))
++    record_stmt_expr (chain, node, stmt); 
++
++  /* Assigned from LHS.  */
++  if (tree lhs = gimple_call_lhs (stmt))
++    {
++      /* FIXME: handle return types.. */
++      mark_type_as_escape (TREE_TYPE (lhs), escape_return);
++    }
++
++  /* If we have an internal call, just record the stmt. */
++  if (gimple_call_internal_p (stmt))
++    {
++      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++	record_stmt_expr (gimple_call_arg (stmt, i), node, stmt);
++      return;
++    }
++
++  fndecl = gimple_call_fndecl (stmt);
++
++  /* If we have an indrect call, just mark the types as escape. */
++  if (!fndecl)
++    escapes = escape_pointer_function;
++  /* Non local functions cause escape except for calls to free
++     and realloc.
++     FIXME: should support function annotations too.  */
++  else if (!free_or_realloc
++	   && !cgraph_node::local_info (fndecl)->local)
++    escapes = escape_external_function;
++  else if (!free_or_realloc
++	   && !cgraph_node::local_info (fndecl)->can_change_signature)
++    escapes = escape_cannot_change_signature;
++  /* FIXME: we should be able to handle functions in other partitions.  */
++  else if (symtab_node::get(fndecl)->in_other_partition)
++    escapes = escape_external_function;
++
++  if (escapes != does_not_escape)
++    {
++      for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++	mark_type_as_escape (TREE_TYPE (gimple_call_arg (stmt, i)),
++			     escapes);
++      return;
++    }
++
++  argtype = TYPE_ARG_TYPES (gimple_call_fntype (stmt));
++  for (unsigned i = 0; i < gimple_call_num_args (stmt); i++)
++    {
++      tree arg = gimple_call_arg (stmt, i);
++      if (argtype)
++	{
++	  tree argtypet = TREE_VALUE (argtype);
++	  if (!free_or_realloc
++	      && VOID_POINTER_P (argtypet))
++	    mark_type_as_escape (TREE_TYPE (arg), escape_cast_void);
++	  else
++	    record_stmt_expr (arg, node, stmt);
++	}
++      else
++	mark_type_as_escape (TREE_TYPE (arg), escape_var_arg_function);
++
++      argtype = argtype ? TREE_CHAIN (argtype) : NULL_TREE;
++    }
++
++}
++
++
++void
++ipa_struct_reorg::record_stmt_expr (tree expr, cgraph_node *node, gimple *stmt)
++{
++  tree base;
++  bool indirect;
++  srtype *type;
++  srfield *field;
++  bool realpart, imagpart, address;
++  if (!get_type_field (expr, base, indirect, type, field, realpart, imagpart, address))
++    return;
++
++  if (!opt_for_fn (current_function_decl, flag_ipa_struct_reorg))
++    type->mark_escape (escape_non_optimize, stmt);
++
++  /* Record it. */
++  type->add_access (new sraccess (stmt, node, type, field));
++}
++
++/* Find function corresponding to NODE.  */
++
++srfunction *
++ipa_struct_reorg::find_function (cgraph_node *node)
++{
++  for (unsigned i = 0; i < functions.length (); i++)
++    if (functions[i]->node == node)
++	return functions[i];
++  return NULL;
++}
++
++void
++ipa_struct_reorg::check_type_and_push (tree newdecl, srtype *type, vec<srdecl*> &worklist, gimple *stmt)
++{
++  if (integer_zerop (newdecl))
++    return;
++
++  if (TREE_CODE (newdecl) == ADDR_EXPR)
++    {
++      srdecl *d = find_decl (TREE_OPERAND (newdecl, 0));
++      if (!d)
++	{
++          type->mark_escape (escape_cast_another_ptr, stmt);
++	  return;
++	}
++      if (d->type == type)
++        return;
++
++      srtype *type1 = d->type;
++      type->mark_escape (escape_cast_another_ptr, stmt);
++      type1->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++
++  srdecl *d = find_decl (newdecl);
++  if (!d)
++    {
++      if (TREE_CODE (newdecl) == INTEGER_CST)
++	{
++          type->mark_escape (escape_int_const, stmt);
++	  return;
++	}
++      /* If we have a non void* or a decl (which is hard to track),
++         then mark the type as escaping.  */
++      if (!VOID_POINTER_P (TREE_TYPE (newdecl))
++	  || DECL_P (newdecl))
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      fprintf (dump_file, "\nunkown decl: ");
++	      print_generic_expr (dump_file, newdecl);
++	      fprintf (dump_file, " in type:\n");
++	      print_generic_expr (dump_file, TREE_TYPE (newdecl));
++	      fprintf (dump_file, "\n");
++	    }
++          type->mark_escape (escape_cast_another_ptr, stmt);
++	  return;
++	}
++      /* At this point there should only be unkown void* ssa names. */
++      gcc_assert (TREE_CODE (newdecl) == SSA_NAME);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nrecording unkown decl: ");
++	  print_generic_expr (dump_file, newdecl);
++	  fprintf (dump_file, " as type:\n");
++	  type->simple_dump (dump_file);
++	  fprintf (dump_file, "\n");
++	}
++      d = current_function->record_decl (type, newdecl, -1);
++      worklist.safe_push (d);
++      return;
++    }
++
++  /* Only add to the worklist if the decl is a SSA_NAME.  */
++  if (TREE_CODE (newdecl) == SSA_NAME)
++    worklist.safe_push (d);
++  if (d->type == type)
++    return;
++
++  srtype *type1 = d->type;
++  type->mark_escape (escape_cast_another_ptr, stmt);
++  type1->mark_escape (escape_cast_another_ptr, stmt);
++
++}
++
++/*
++  2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
++     a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
++	check to make sure the addition was a multiple of the size.
++	check the pointer type too.
++     b) If the name is sourced from an allocation check the allocation
++	i) Add SSA_NAME (void*) to the worklist if allocated from realloc
++     c) if the name is from a param, make sure the param type was of the original type
++     d) if the name is from a cast/assignment, make sure it is used as that type or void*
++	i) If void* then push the ssa_name into worklist
++*/
++void
++ipa_struct_reorg::check_definition (srdecl *decl, vec<srdecl*> &worklist)
++{
++  tree ssa_name = decl->decl;
++  srtype *type = decl->type;
++
++  /* c) if the name is from a param, make sure the param type was
++     of the original type */
++  if (SSA_NAME_IS_DEFAULT_DEF (ssa_name))
++    {
++      tree var = SSA_NAME_VAR (ssa_name);
++      if (var
++	  && TREE_CODE (var) == PARM_DECL
++	  && VOID_POINTER_P (TREE_TYPE (ssa_name)))
++        type->mark_escape (escape_cast_void, NULL);
++      return;
++    }
++  gimple *stmt = SSA_NAME_DEF_STMT (ssa_name);
++
++  /*
++     b) If the name is sourced from an allocation check the allocation
++	i) Add SSA_NAME (void*) to the worklist if allocated from realloc
++  */
++  if (gimple_code (stmt) == GIMPLE_CALL)
++    {
++      /* For realloc, check the type of the argument. */
++      if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++        check_type_and_push (gimple_call_arg (stmt, 0), type, worklist, stmt);
++
++      if (!handled_allocation_stmt (stmt)
++          || !allocate_size (type, stmt))
++        type->mark_escape (escape_return, stmt);
++      return;
++    }
++  /* If the SSA_NAME is sourced from an inline-asm, just mark the type as escaping.  */
++  if (gimple_code (stmt) == GIMPLE_ASM)
++    {
++      type->mark_escape (escape_inline_asm, stmt);
++      return;
++    }
++
++  /* If the SSA_NAME is sourced from a PHI check add each name to the worklist and
++     check to make sure they are used correctly.  */
++  if (gimple_code (stmt) == GIMPLE_PHI)
++    {
++      for (unsigned i = 0; i < gimple_phi_num_args (stmt); i++)
++	check_type_and_push (gimple_phi_arg_def (stmt, i), type, worklist, stmt);
++      return;
++    }
++
++  gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN);
++  /*
++     a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
++	check to make sure the addition was a multiple of the size.
++	check the pointer type too.
++  */
++
++  tree rhs = gimple_assign_rhs1 (stmt);
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree num;
++      if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type)))
++        type->mark_escape (escape_non_multiply_size, stmt);
++
++      if (TREE_CODE (rhs) == SSA_NAME)
++        check_type_and_push (rhs, type, worklist, stmt);
++      return;
++    }
++
++  /* Casts between pointers and integer are escaping.  */
++  if (gimple_assign_cast_p (stmt))
++    {
++      type->mark_escape (escape_cast_int, stmt);
++      return;
++    }
++
++  /*
++     d) if the name is from a cast/assignment, make sure it is used as that type or void*
++	i) If void* then push the ssa_name into worklist
++  */
++  gcc_assert (gimple_assign_single_p (stmt));
++  check_other_side (decl, rhs, stmt, worklist);
++}
++
++/* Mark the types used by the inline-asm as escaping.  It is unkown what happens inside
++   an inline-asm. */
++
++void
++ipa_struct_reorg::mark_types_asm (gasm *astmt)
++{
++  for (unsigned i = 0; i < gimple_asm_ninputs (astmt); i++)
++    {
++      tree v = TREE_VALUE (gimple_asm_input_op (astmt, i));
++      /* If we have &b, just strip the & here. */
++      if (TREE_CODE (v) == ADDR_EXPR)
++	v = TREE_OPERAND (v, 0);
++      mark_expr_escape (v, escape_inline_asm, astmt);
++    }
++  for (unsigned i = 0; i < gimple_asm_noutputs (astmt); i++)
++    {
++      tree v = TREE_VALUE (gimple_asm_output_op (astmt, i));
++      /* If we have &b, just strip the & here. */
++      if (TREE_CODE (v) == ADDR_EXPR)
++	v = TREE_OPERAND (v, 0);
++      mark_expr_escape (v, escape_inline_asm, astmt);
++    }
++}
++
++void
++ipa_struct_reorg::check_other_side (srdecl *decl, tree other, gimple *stmt, vec<srdecl*> &worklist)
++{
++  srtype *type = decl->type;
++
++  if (TREE_CODE (other) == SSA_NAME
++      || DECL_P (other)
++      || TREE_CODE (other) == INTEGER_CST)
++    {
++      check_type_and_push (other, type, worklist, stmt);
++      return;
++    }
++
++  tree t = TREE_TYPE (other);
++  if (!handled_type (t))
++    {
++      type->mark_escape (escape_cast_another_ptr, stmt);
++      return;
++    }
++
++  srtype *t1 = find_type (inner_type (t));
++  if (t1 == type)
++    {
++      tree base;
++      bool indirect;
++      srtype *type1;
++      srfield *field;
++      bool realpart, imagpart, address;
++      if (!get_type_field (other, base, indirect, type1, field, realpart, imagpart, address))
++        type->mark_escape (escape_cast_another_ptr, stmt);
++
++      return;
++    }
++
++  if (t1)
++    t1->mark_escape (escape_cast_another_ptr, stmt);
++
++  type->mark_escape (escape_cast_another_ptr, stmt);
++}
++
++
++void
++ipa_struct_reorg::check_use (srdecl *decl, gimple *stmt, vec<srdecl*> &worklist)
++{
++  srtype *type = decl->type;
++
++  if (gimple_code (stmt) == GIMPLE_RETURN)
++    {
++      type->mark_escape (escape_return, stmt);
++      return;
++    }
++  /* If the SSA_NAME PHI check and add the src to the worklist and
++     check to make sure they are used correctly.  */
++  if (gimple_code (stmt) == GIMPLE_PHI)
++    {
++      check_type_and_push (gimple_phi_result (stmt), type, worklist, stmt);
++      return;
++    }
++
++  if (gimple_code (stmt) == GIMPLE_ASM)
++    {
++      mark_types_asm (as_a <gasm*>(stmt));
++      return;
++    }
++
++  if (gimple_code (stmt) == GIMPLE_COND)
++    {
++      tree rhs1 = gimple_cond_lhs (stmt);
++      tree rhs2 = gimple_cond_rhs (stmt);
++      tree orhs = rhs1;
++      if (gimple_cond_code (stmt) != EQ_EXPR
++          && gimple_cond_code (stmt) != NE_EXPR)
++	{
++	  mark_expr_escape (rhs1, escape_non_eq, stmt);
++	  mark_expr_escape (rhs2, escape_non_eq, stmt);
++	}
++      if (rhs1 == decl->decl)
++	orhs = rhs2;
++      if (integer_zerop (orhs))
++	return;
++      if (TREE_CODE (orhs) != SSA_NAME)
++	mark_expr_escape (rhs1, escape_non_eq, stmt);
++      check_type_and_push (orhs, type, worklist, stmt);
++      return;
++    }
++
++
++  /* Casts between pointers and integer are escaping.  */
++  if (gimple_assign_cast_p (stmt))
++    {
++      type->mark_escape (escape_cast_int, stmt);
++      return;
++    }
++
++  /* We might have a_1 = ptr_2 == ptr_3; */
++  if (is_gimple_assign (stmt)
++      && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree orhs = rhs1;
++      if (gimple_assign_rhs_code (stmt) != EQ_EXPR
++          && gimple_assign_rhs_code (stmt) != NE_EXPR)
++	{
++	  mark_expr_escape (rhs1, escape_non_eq, stmt);
++	  mark_expr_escape (rhs2, escape_non_eq, stmt);
++	}
++      if (rhs1 == decl->decl)
++	orhs = rhs2;
++      if (integer_zerop (orhs))
++	return;
++      if (TREE_CODE (orhs) != SSA_NAME)
++	mark_expr_escape (rhs1, escape_non_eq, stmt);
++      check_type_and_push (orhs, type, worklist, stmt);
++      return;
++    }
++
++  if (gimple_assign_single_p (stmt))
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs = gimple_assign_rhs1 (stmt);
++      /* Check if we have a_1 = b_2; that a_1 is in the correct type. */
++      if (decl->decl == rhs)
++	{
++	  check_other_side (decl, lhs, stmt, worklist);
++	  return;
++	}
++    }
++
++  if (is_gimple_assign (stmt)
++      && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree lhs = gimple_assign_lhs (stmt);
++      tree num;
++      check_other_side (decl, lhs, stmt, worklist);
++      if (!is_result_of_mult (rhs2, &num, TYPE_SIZE_UNIT (type->type)))
++        type->mark_escape (escape_non_multiply_size, stmt);
++    }
++
++}
++
++/*
++   2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
++	d) if the name is used in a cast/assignment, make sure it is used as that type or void*
++	  i) If void* then push the ssa_name into worklist
++	e) if used in conditional check the other side
++	  i) If the conditional is non NE/EQ then mark the type as non rejecting
++	f) Check if the use in a Pointer PLUS EXPR Is used by mulitplication of its size
++  */
++void
++ipa_struct_reorg::check_uses (srdecl *decl, vec<srdecl*> &worklist)
++{
++  tree ssa_name = decl->decl;
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, ssa_name)
++    {
++      gimple *stmt = USE_STMT (use_p);
++
++      if (is_gimple_debug (stmt))
++	continue;
++
++      check_use (decl, stmt, worklist);
++    }
++}
++
++/* Record function corresponding to NODE. */
++
++srfunction *
++ipa_struct_reorg::record_function (cgraph_node *node)
++{
++  function *fn;
++  tree parm, var;
++  unsigned int i;
++  srfunction *sfn;
++  escape_type escapes = does_not_escape;
++
++  sfn = new srfunction (node);
++  functions.safe_push (sfn);
++
++  if (dump_file  && (dump_flags & TDF_DETAILS))
++    fprintf (dump_file, "\nRecording accesses and types from function: %s/%u\n",
++             node->name (), node->order);
++
++  /* Nodes without a body are not interesting.  Especially do not
++     visit clones at this point for now - we get duplicate decls
++     there for inline clones at least.  */
++  if (!node->has_gimple_body_p () || node->inlined_to)
++    return sfn;
++
++  node->get_body ();
++  fn = DECL_STRUCT_FUNCTION (node->decl);
++
++  if (!fn)
++    return sfn;
++
++  current_function = sfn;
++
++  if (DECL_PRESERVE_P (node->decl))
++    escapes = escape_marked_as_used;
++  else if (!node->local.local)
++    escapes = escape_visible_function;
++  else if (!node->local.can_change_signature)
++    escapes = escape_cannot_change_signature;
++  else if (!tree_versionable_function_p (node->decl))
++    escapes = escape_noclonable_function;
++  else if (!opt_for_fn (node->decl, flag_ipa_struct_reorg))
++    escapes = escape_non_optimize;
++
++  basic_block bb;
++  gimple_stmt_iterator si;
++
++  /* Record the static chain decl.  */
++  if (fn->static_chain_decl)
++   {
++     srdecl *sd = record_var (fn->static_chain_decl,
++                              escapes,
++                              -2);
++      if (sd)
++        {
++	  /* Specify that this type is used by the static
++	     chain so it cannot be split. */
++	  sd->type->chain_type = true;
++          sfn->add_arg (sd);
++          sd->type->add_function (sfn);
++        }
++    }
++
++  /* Record the arguments. */
++  for (parm = DECL_ARGUMENTS (node->decl), i = 0;
++       parm;
++       parm = DECL_CHAIN (parm), i++)
++   {
++      srdecl *sd = record_var (parm, escapes, i);
++      if (sd)
++	{
++	  sfn->add_arg (sd);
++	  sd->type->add_function (sfn);
++	}
++    }
++
++  /* Mark the return type as escaping */
++  {
++    tree return_type = TREE_TYPE (TREE_TYPE (node->decl));
++    mark_type_as_escape (return_type, escape_return, NULL);
++  }
++
++  /* If the cfg does not exist for the function, don't process the function.  */
++  if (!fn->cfg)
++    {
++      current_function = NULL;
++      return sfn;
++    }
++
++  /* The following order is done for recording stage:
++     0) Record all variables/SSA_NAMES that are of struct type
++     1) Record MEM_REF/COMPONENT_REFs
++	a) Record SSA_NAMEs (void*) and record that as the accessed type.
++  */
++
++  push_cfun (fn);
++
++  FOR_EACH_LOCAL_DECL (cfun, i, var)
++    {
++      if (TREE_CODE (var) != VAR_DECL)
++        continue;
++
++      record_var (var);
++    }
++
++  for (i = 1; i < num_ssa_names; ++i)
++    {
++      tree name = ssa_name (i);
++      if (!name
++          || has_zero_uses (name)
++          || virtual_operand_p (name))
++        continue;
++
++      record_var (name);
++    }
++
++  /* Find the variables which are used via MEM_REF and are void* types. */
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  find_vars (stmt);
++	}
++    }
++
++  auto_vec<srdecl *> worklist;
++  for (unsigned i = 0; i < current_function->decls.length (); i++)
++    {
++      srdecl *decl = current_function->decls[i];
++      if (TREE_CODE (decl->decl) == SSA_NAME)
++	{
++	  decl->visited = false;
++	  worklist.safe_push (decl);
++	}
++    }
++
++  /*
++     2) Check SSA_NAMEs for non type usages (source or use) (worlist of srdecl)
++	a) if the SSA_NAME is sourced from a pointer plus, record the pointer and
++	   check to make sure the addition was a multiple of the size.
++	   check the pointer type too.
++	b) If the name is sourced from an allocation check the allocation
++	  i) Add SSA_NAME (void*) to the worklist if allocated from realloc
++	c) if the name is from a param, make sure the param type was of the original type
++	d) if the name is used in a cast/assignment, make sure it is used as that type or void*
++	  i) If void* then push the ssa_name into worklist
++	e) if used in conditional check the other side
++	  i) If the conditional is non NE/EQ then mark the type as non rejecting
++	f) Check if the use in a POinter PLUS EXPR Is used by mulitplication of its size
++  */
++
++  while (!worklist.is_empty ())
++    {
++      srdecl *decl = worklist.pop ();
++      if (decl->visited)
++	continue;
++      decl->visited = true;
++      check_definition (decl, worklist);
++      check_uses (decl, worklist);
++    }
++
++  FOR_EACH_BB_FN (bb, cfun)
++    {
++      for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
++	{
++	  gimple *stmt = gsi_stmt (si);
++	  maybe_record_stmt (node, stmt);
++	}
++    }
++
++  pop_cfun ();
++  current_function = NULL;
++  return sfn;
++}
++
++
++/* Record all accesses for all types including global variables. */
++
++void
++ipa_struct_reorg::record_accesses (void)
++{
++  varpool_node *var;
++  cgraph_node *cnode;
++
++  /* Record global (non-auto) variables first. */
++  FOR_EACH_VARIABLE (var)
++    {
++      if (!var->real_symbol_p ())
++	continue;
++
++      /* Record all variables including the accesses inside a variable. */
++      escape_type escapes = does_not_escape;
++      if (var->externally_visible || !var->definition)
++	escapes = escape_via_global_var;
++      if (var->in_other_partition)
++	escapes = escape_via_global_var;
++      if (!var->externally_visible && var->definition)
++	var->get_constructor ();
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Recording global variable: ");
++	  print_generic_expr (dump_file, var->decl);
++	  fprintf (dump_file, "\n");
++	}
++      record_var (var->decl, escapes);
++    }
++
++  FOR_EACH_FUNCTION (cnode)
++    {
++      if (!cnode->real_symbol_p ())
++        continue;
++
++      /* Record accesses inside a function. */
++      if(cnode->definition)
++	record_function (cnode);
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "all types (before pruning):\n");
++      dump_types (dump_file);
++      fprintf (dump_file, "all functions (before pruning):\n");
++      dump_functions (dump_file);
++    }
++  done_recording = true;
++}
++
++/* A helper function to detect cycles (recusive) types.
++   Return TRUE if TYPE was a rescusive type.  */
++
++bool
++ipa_struct_reorg::walk_field_for_cycles (srtype *type)
++{
++  unsigned i;
++  srfield *field;
++
++  type->visited = true;
++  if (type->escaped_rescusive ())
++    return true;
++
++  if (type->has_escaped ())
++    return false;
++
++  FOR_EACH_VEC_ELT (type->fields, i, field)
++    {
++      if (!field->type)
++	;
++      else if (field->type->visited
++	       || walk_field_for_cycles (field->type))
++	{
++	  type->mark_escape (escape_rescusive_type, NULL);
++	  return true;
++	}
++    }
++
++  return false;
++}
++
++/* Clear visited on all types.  */
++
++void
++ipa_struct_reorg::clear_visited (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    types[i]->visited = false;
++}
++
++/* Detect recusive types and mark them as escaping.  */
++
++void
++ipa_struct_reorg::detect_cycles (void)
++{
++  for (unsigned i = 0; i <  types.length (); i++)
++    {
++      if (types[i]->has_escaped ())
++	continue;
++
++      clear_visited ();
++      walk_field_for_cycles (types[i]);
++    }
++}
++
++/* Propagate escaping to depdenent types.  */
++
++void
++ipa_struct_reorg::propagate_escape (void)
++{
++
++  unsigned i;
++  srtype *type;
++  bool changed = false;
++
++  do
++    {
++      changed = false;
++      FOR_EACH_VEC_ELT (types, i, type)
++	{
++	  for (tree field = TYPE_FIELDS (type->type);
++	       field;
++	       field = DECL_CHAIN (field))
++	    {
++	      if (TREE_CODE (field) == FIELD_DECL
++		  && handled_type (TREE_TYPE (field)))
++		{
++		  tree t = inner_type (TREE_TYPE (field));
++		  srtype *type1 = find_type (t);
++	          if (!type1)
++		    continue;
++		  if (type1->has_escaped ()
++		      && !type->has_escaped ())
++		    {
++		      type->mark_escape (escape_dependent_type_escapes, NULL);
++		      changed = true;
++		    }
++		  if (type->has_escaped ()
++		      && !type1->has_escaped ())
++		    {
++		      type1->mark_escape (escape_dependent_type_escapes, NULL);
++		      changed = true;
++		    }
++		}
++	    }
++	}
++    } while (changed);
++}
++
++/* Prune the escaped types and their decls from what was recorded.  */
++
++void
++ipa_struct_reorg::prune_escaped_types (void)
++{
++  detect_cycles ();
++  propagate_escape ();
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "all types (after prop but before pruning):\n");
++      dump_types (dump_file);
++      fprintf (dump_file, "all functions (after prop but before pruning):\n");
++      dump_functions (dump_file);
++    }
++
++  if (dump_file)
++    dump_types_escaped (dump_file);
++
++
++  /* Prune the function arguments which escape
++     and functions which have no types as arguments. */
++  for (unsigned i = 0; i < functions.length (); )
++    {
++      srfunction *function = functions[i];
++
++      /* Prune function arguments of types that escape. */
++      for (unsigned j = 0; j < function->args.length ();)
++	{
++	  if (function->args[j]->type->has_escaped ())
++	    function->args.ordered_remove (j);
++	  else
++	    j++;
++	}
++
++      /* Prune global variables that the function uses of types that escape. */
++      for (unsigned j = 0; j < function->globals.length ();)
++	{
++	  if (function->globals[j]->type->has_escaped ())
++	    function->globals.ordered_remove (j);
++	  else
++	    j++;
++	}
++
++      /* Prune variables that the function uses of types that escape. */
++      for (unsigned j = 0; j < function->decls.length ();)
++	{
++	  srdecl *decl = function->decls[j];
++	  if (decl->type->has_escaped ())
++	    {
++	      function->decls.ordered_remove (j);
++	      delete decl;
++	    }
++	  else
++	    j++;
++	}
++
++      /* Prune functions which don't refer to any variables any more.  */
++      if (function->args.is_empty ()
++	  && function->decls.is_empty ()
++	  && function->globals.is_empty ())
++	{
++	  delete function;
++	  functions.ordered_remove (i);
++	}
++      else
++	i++;
++    }
++
++  /* Prune globals of types that escape, all references to those decls
++     will have been removed in the first loop.  */
++  for (unsigned j = 0; j < globals.decls.length ();)
++    {
++      srdecl *decl = globals.decls[j];
++      if (decl->type->has_escaped ())
++        {
++          globals.decls.ordered_remove (j);
++          delete decl;
++        }
++      else
++        j++;
++    }
++
++  /* Prune types that escape, all references to those types
++     will have been removed in the above loops.  */
++  for (unsigned i = 0; i < types.length (); )
++    {
++      srtype *type = types[i];
++      if (type->has_escaped ())
++	{
++	  /* All references to this type should have been removed now. */
++	  delete type;
++	  types.ordered_remove (i);
++	}
++      else
++	i++;
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "all types (after pruning):\n");
++      dump_types (dump_file);
++      fprintf (dump_file, "all functions (after pruning):\n");
++      dump_functions (dump_file);
++    }
++}
++
++/* Analyze all of the types. */
++
++void
++ipa_struct_reorg::analyze_types (void)
++{
++  for (unsigned i = 0; i < types.length (); i++)
++    {
++      if (!types[i]->has_escaped ())
++        types[i]->analyze();
++    }
++}
++
++/* Create all new types we want to create. */
++
++bool
++ipa_struct_reorg::create_new_types (void)
++{
++  int newtypes = 0;
++  clear_visited ();
++  for (unsigned i = 0; i < types.length (); i++)
++    newtypes += types[i]->create_new_type ();
++
++  if (dump_file)
++    {
++      if (newtypes)
++	fprintf (dump_file, "\nNumber of structures to transform is %d\n", newtypes);
++      else
++	fprintf (dump_file, "\nNo structures to transform.\n");
++    }
++
++  return newtypes != 0;
++}
++
++/* Create all the new decls except for the new arguments
++   which create_new_functions would have created. */
++
++void
++ipa_struct_reorg::create_new_decls (void)
++{
++  globals.create_new_decls ();
++  for (unsigned i = 0; i < functions.length (); i++)
++    functions[i]->create_new_decls ();
++}
++
++/* Create the new arguments for the function corresponding to NODE. */
++
++void
++ipa_struct_reorg::create_new_args (cgraph_node *new_node)
++{
++  tree decl = new_node->decl;
++  vec<tree> params = ipa_get_vector_of_formal_parms (decl);
++  ipa_parm_adjustment_vec adjs;
++  adjs.create (params.length ());
++  for (unsigned i = 0; i < params.length (); i++)
++    {
++      struct ipa_parm_adjustment adj;
++      tree parm = params[i];
++      memset (&adj, 0, sizeof (adj));
++      adj.base_index = i;
++      adj.base = parm;
++      srtype *t = find_type (inner_type (TREE_TYPE (parm)));
++      if (!t
++	  || t->has_escaped ()
++	  || !t->has_new_type ())
++	{
++	  adj.op = IPA_PARM_OP_COPY;
++	  adjs.safe_push (adj);
++	  continue;
++	}
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Creating a new argument for: ");
++	  print_generic_expr (dump_file, params[i]);
++	  fprintf (dump_file, " in function: ");
++	  print_generic_expr (dump_file, decl);
++	  fprintf (dump_file, "\n");
++	}
++      adj.arg_prefix = "struct_reorg";
++      adj.op = IPA_PARM_OP_NONE;
++      for (unsigned j = 0; j < max_split && t->newtype[j]; j++)
++	{
++	  adj.type = reconstruct_complex_type (TREE_TYPE (parm), t->newtype[j]);
++	  adjs.safe_push (adj);
++	}
++    }
++  ipa_modify_formal_parameters (decl, adjs);
++  params.release ();
++  for (unsigned i = 0; i < adjs.length (); i++)
++    {
++      if (adjs[i].op != IPA_PARM_OP_NONE)
++	continue;
++      tree decl = adjs[i].base;
++      srdecl *d = find_decl (decl);
++      if (!d)
++	continue;
++      unsigned j = 0;
++      while (j < max_split && d->newdecl[j])
++	j++;
++      d->newdecl[j] = adjs[i].new_decl;
++    }
++  adjs.release ();
++
++  function *fn = DECL_STRUCT_FUNCTION (decl);
++
++  if (!fn->static_chain_decl)
++    return;
++  srdecl *chain = find_decl (fn->static_chain_decl);
++  if (!chain)
++    return;
++
++  srtype *type = chain->type;
++  tree orig_var = chain->decl;
++  const char *tname = NULL;
++  if (DECL_NAME (orig_var))
++    tname = IDENTIFIER_POINTER (DECL_NAME (orig_var));
++  gcc_assert (!type->newtype[1]);
++  tree new_name = NULL;
++  char *name = NULL;
++  if (tname)
++    {
++      name = concat (tname, ".reorg.0", NULL);
++      new_name = get_identifier (name);
++      free (name);
++    }
++  tree newtype1 = reconstruct_complex_type (TREE_TYPE (orig_var), type->newtype[0]);
++  chain->newdecl[0] = build_decl (DECL_SOURCE_LOCATION (orig_var),
++				  PARM_DECL, new_name, newtype1);
++  copy_var_attributes (chain->newdecl[0], orig_var);
++  fn->static_chain_decl = chain->newdecl[0];
++
++}
++
++/* Find the refered DECL in the current function or globals.
++   If this is a global decl, record that as being used
++   in the current function.  */
++
++srdecl *
++ipa_struct_reorg::find_decl (tree decl)
++{
++  srdecl *d;
++  d = globals.find_decl (decl);
++  if (d)
++    {
++      /* Record the global usage in the current function.  */
++      if (!done_recording && current_function)
++	{
++	  bool add = true;
++	  /* No reason to add it to the current function if it is
++	     already recorded as such. */
++	  for (unsigned i = 0; i < current_function->globals.length (); i++)
++	    {
++	      if (current_function->globals[i] == d)
++		{
++		  add = false;
++		  break;
++		}
++	    }
++	  if (add)
++	    current_function->globals.safe_push (d);
++	}
++      return d;
++    }
++  if (current_function)
++    return current_function->find_decl (decl);
++  return NULL;
++}
++
++/* Create new function clones for the cases where the arguments
++   need to be changed.  */
++
++void
++ipa_struct_reorg::create_new_functions (void)
++{
++  for (unsigned i = 0; i < functions.length (); i++)
++    {
++      srfunction *f = functions[i];
++      bool anyargchanges = false;
++      cgraph_node *new_node;
++      cgraph_node *node = f->node;
++      int newargs = 0;
++      if (f->old)
++	continue;
++
++      if (f->args.length () == 0)
++	continue;
++
++      for (unsigned j = 0; j < f->args.length (); j++)
++	{
++	  srdecl *d = f->args[j];
++	  srtype *t = d->type;
++	  if (t->has_new_type ())
++	    {
++	      newargs += t->newtype[1] != NULL;
++	      anyargchanges = true;
++	    }
++	}
++      if (!anyargchanges)
++	continue;
++
++      if (dump_file)
++	{
++	  fprintf (dump_file, "Creating a clone of function: ");
++	  f->simple_dump (dump_file);
++	  fprintf (dump_file, "\n");
++	}
++      statistics_counter_event (NULL, "Create new function", 1);
++      new_node = node->create_version_clone_with_body (vNULL, NULL,
++						       NULL, false, NULL, NULL,
++						      "struct_reorg");
++      new_node->make_local ();
++      f->newnode = new_node;
++      srfunction *n = record_function (new_node);
++      current_function = n;
++      n->old = f;
++      f->newf = n;
++      /* Create New arguments. */
++      create_new_args (new_node);
++      current_function = NULL;
++    }
++}
++
++bool
++ipa_struct_reorg::rewrite_lhs_rhs (tree lhs, tree rhs, tree newlhs[max_split], tree newrhs[max_split])
++{
++  bool l = rewrite_expr (lhs, newlhs);
++  bool r = rewrite_expr (rhs, newrhs);
++
++  /* Handle NULL pointer specially. */
++  if (l && !r && integer_zerop (rhs))
++    {
++      r = true;
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	newrhs[i] = fold_convert (TREE_TYPE (newlhs[i]), rhs);
++    }
++
++  return l || r;
++}
++
++bool
++ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_missing_decl)
++{
++  tree base;
++  bool indirect;
++  srtype *t;
++  srfield *f;
++  bool realpart, imagpart;
++  bool address;
++
++  tree newbase[max_split];
++  memset (newexpr, 0, sizeof(tree[max_split]));
++
++  if (TREE_CODE (expr) == CONSTRUCTOR)
++    {
++      srtype *t = find_type (TREE_TYPE (expr));
++      if (!t)
++	return false;
++      gcc_assert (CONSTRUCTOR_NELTS (expr) == 0);
++      if (!t->has_new_type ())
++	return false;
++      for (unsigned i = 0; i < max_split && t->newtype[i]; i++)
++	newexpr[i] = build_constructor (t->newtype[i], NULL);
++      return true;
++    }
++
++  if (!get_type_field (expr, base, indirect, t, f, realpart, imagpart, address))
++    return false;
++
++  /* If the type is not changed, then just return false. */
++  if (!t->has_new_type ())
++    return false;
++
++  /*  NULL pointer handling is "special".  */
++  if (integer_zerop (base))
++    {
++      gcc_assert (indirect && !address);
++      for (unsigned i = 0; i < max_split && t->newtype[i]; i++)
++	{
++	  tree newtype1 = reconstruct_complex_type (TREE_TYPE (base), t->newtype[i]);
++	  newbase[i] = fold_convert (newtype1, base);
++	}
++    }
++  else
++    {
++      srdecl *d = find_decl (base);
++
++      if (!d && dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "Can't find decl:\n");
++	  print_generic_expr (dump_file, base);
++	  fprintf (dump_file, "\ntype:\n");
++	  t->dump (dump_file);
++	}
++      if (!d && ignore_missing_decl)
++	return true;
++      gcc_assert (d);
++      memcpy (newbase, d->newdecl, sizeof(d->newdecl));
++    }
++
++  if (f == NULL)
++    {
++      memcpy (newexpr, newbase, sizeof(newbase));
++      for (unsigned i = 0; i < max_split && newexpr[i]; i++)
++	{
++	  if (address)
++	    newexpr[i] = build_fold_addr_expr (newexpr[i]);
++	  if (indirect)
++	    newexpr[i] = build_simple_mem_ref (newexpr[i]);
++	  if (imagpart)
++	    newexpr[i] = build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]);
++	  if (realpart)
++	    newexpr[i] = build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]);
++	}
++      return true;
++    }
++
++  tree newdecl = newbase[f->clusternum];
++  for (unsigned i = 0; i < max_split && f->newfield[i]; i++)
++    {
++      tree newbase1 = newdecl;
++      if (address)
++        newbase1 = build_fold_addr_expr (newbase1);
++      if (indirect)
++        newbase1 = build_simple_mem_ref (newbase1);
++      newexpr[i] = build3 (COMPONENT_REF, TREE_TYPE (f->newfield[i]),
++			   newbase1, f->newfield[i], NULL_TREE);
++      if (imagpart)
++	newexpr[i] = build1 (IMAGPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]);
++      if (realpart)
++	newexpr[i] = build1 (REALPART_EXPR, TREE_TYPE (TREE_TYPE (newexpr[i])), newexpr[i]);
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "cluster: %d. decl = ", (int)f->clusternum);
++	  print_generic_expr (dump_file, newbase1);
++	  fprintf (dump_file, "\nnewexpr = ");
++	  print_generic_expr (dump_file, newexpr[i]);
++	  fprintf (dump_file, "\n");
++	}
++    }
++  return true;
++}
++
++bool
++ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi)
++{
++  bool remove = false;
++  if (gimple_clobber_p (stmt))
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree newlhs[max_split];
++      if (!rewrite_expr (lhs, newlhs))
++	return false;
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	{
++	  tree clobber = build_constructor (TREE_TYPE (newlhs[i]), NULL);
++	  TREE_THIS_VOLATILE (clobber) = true;
++	  gimple *newstmt = gimple_build_assign (newlhs[i], clobber);
++	  gsi_insert_before (gsi, newstmt, GSI_SAME_STMT);
++	  remove = true;
++	}
++      return remove;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == EQ_EXPR
++      || gimple_assign_rhs_code (stmt) == NE_EXPR)
++    {
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree newrhs1[max_split];
++      tree newrhs2[max_split];
++      tree_code rhs_code = gimple_assign_rhs_code (stmt);
++      tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR;
++      if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2))
++	return false;
++      tree newexpr = NULL_TREE;
++      for (unsigned i = 0; i < max_split && newrhs1[i]; i++)
++	{
++	  tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, newrhs1[i], newrhs2[i]);
++          if (!newexpr)
++	    newexpr = expr;
++	  else
++	    newexpr = gimplify_build2 (gsi, code, boolean_type_node, newexpr, expr);
++	}
++
++      if (newexpr)
++	{
++	  newexpr = fold_convert (TREE_TYPE (gimple_assign_lhs (stmt)), newexpr);
++	  gimple_assign_set_rhs_from_tree (gsi, newexpr);
++	  update_stmt (stmt);
++	}
++      return false;
++    }
++
++  if (gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs1 = gimple_assign_rhs1 (stmt);
++      tree rhs2 = gimple_assign_rhs2 (stmt);
++      tree newlhs[max_split];
++      tree newrhs[max_split];
++
++      if (!rewrite_lhs_rhs (lhs, rhs1, newlhs, newrhs))
++	return false;
++      tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs)));
++      tree num;
++      /* Check if rhs2 is a multiplication of the size of the type. */
++      if (!is_result_of_mult (rhs2, &num, size))
++	internal_error ("the rhs of pointer was not a multiplicate and it slipped through.");
++
++      num = gimplify_build1 (gsi, NOP_EXPR, sizetype, num);
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	{
++	  gimple *new_stmt;
++
++          tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i])));
++	  newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize);
++	  new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, newrhs[i], newsize);
++	  gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
++	  remove = true;
++	}
++      return remove;
++    }
++  if (gimple_assign_rhs_class (stmt) == GIMPLE_SINGLE_RHS)
++    {
++      tree lhs = gimple_assign_lhs (stmt);
++      tree rhs = gimple_assign_rhs1 (stmt);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "rewriting stamtenet:\n");
++	  print_gimple_stmt (dump_file, stmt, 0);
++	  fprintf (dump_file, "\n");
++	}
++      tree newlhs[max_split];
++      tree newrhs[max_split];
++      if (!rewrite_lhs_rhs (lhs, rhs, newlhs, newrhs))
++	{
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    fprintf (dump_file, "\nDid nothing to statement.\n");
++	  return false;
++	}
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	fprintf (dump_file, "\nreplaced with:\n");
++      for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++)
++	{
++	  gimple *newstmt = gimple_build_assign (newlhs[i] ? newlhs[i] : lhs, newrhs[i] ? newrhs[i] : rhs);
++	  if (dump_file && (dump_flags & TDF_DETAILS))
++	    {
++	      print_gimple_stmt (dump_file, newstmt, 0);
++	      fprintf (dump_file, "\n");
++	    }
++	  gsi_insert_before (gsi, newstmt, GSI_SAME_STMT);
++	  remove = true;
++	}
++      return remove;
++    }
++
++  return remove;
++}
++
++/* Rewrite function call statement STMT.  Return TRUE if the statement
++   is to be removed. */
++
++bool
++ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi)
++{
++  /* Handled allocation calls are handled seperately from normal
++     function calls. */
++  if (handled_allocation_stmt (stmt))
++    {
++      tree lhs = gimple_call_lhs (stmt);
++      tree newrhs1[max_split];
++      srdecl *decl = find_decl (lhs);
++      if (!decl || !decl->type)
++	return false;
++      srtype *type = decl->type;
++      tree num = allocate_size (type, stmt);
++      gcc_assert (num);
++      memset (newrhs1, 0, sizeof(newrhs1));
++
++      /* The realloc call needs to have its first argument rewritten. */
++      if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++	{
++	  tree rhs1 = gimple_call_arg (stmt, 0);
++	  if (integer_zerop (rhs1))
++	    {
++	      for (unsigned i = 0; i < max_split; i++)
++		newrhs1[i] = rhs1;
++	    }
++	  else if (!rewrite_expr (rhs1, newrhs1))
++	    internal_error ("rewrite failed for realloc");
++	}
++
++      /* Go through each new lhs.  */
++      for (unsigned i = 0; i < max_split && decl->newdecl[i]; i++)
++	{
++	  tree newsize = TYPE_SIZE_UNIT (type->type);
++	  gimple *g;
++	  /* Every allocation except for calloc needs the size multiplied out. */
++	  if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++	    newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize);
++
++	  if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)
++	      || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA))
++	    g = gimple_build_call (gimple_call_fndecl (stmt),
++				   1, newsize);
++	  else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC))
++	    g = gimple_build_call (gimple_call_fndecl (stmt),
++				   2, num, newsize);
++	  else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC))
++	    g = gimple_build_call (gimple_call_fndecl (stmt),
++				   2, newrhs1[i], newsize);
++	  else
++	    gcc_assert (false);
++	  gimple_call_set_lhs (g, decl->newdecl[i]);
++	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++	}
++      return true;
++    }
++
++  /* The function call free needs to be handled special. */
++  if (gimple_call_builtin_p (stmt, BUILT_IN_FREE))
++    {
++      tree expr = gimple_call_arg (stmt, 0);
++      tree newexpr[max_split];
++      if (!rewrite_expr (expr, newexpr))
++	return false;
++
++      if (newexpr[1] == NULL)
++	{
++	  gimple_call_set_arg (stmt, 0, newexpr[0]);
++	  update_stmt (stmt);
++	  return false;
++	}
++
++      for (unsigned i = 0; i < max_split && newexpr[i]; i++)
++	{
++	  gimple *g = gimple_build_call (gimple_call_fndecl (stmt),
++					 1, newexpr[i]);
++	  gsi_insert_before (gsi, g, GSI_SAME_STMT);
++	}
++      return true;
++    }
++
++  /* Otherwise, look up the function to see if we have cloned it
++     and rewrite the arguments. */
++  tree fndecl = gimple_call_fndecl (stmt);
++
++  /* Indirect calls are already marked as escaping so ignore.  */
++  if (!fndecl)
++    return false;
++
++  cgraph_node *node = cgraph_node::get (fndecl);
++  gcc_assert (node);
++  srfunction *f = find_function (node);
++
++  /* Did not find the function or had not cloned it return saying don't
++     change the function call. */
++  if (!f || !f->newf)
++    return false;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "Changing arguments for function call :\n");
++      print_gimple_expr (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++    }
++
++  /* Move over to the new function. */
++  f = f->newf;
++
++  tree chain = gimple_call_chain (stmt);
++  unsigned nargs = gimple_call_num_args (stmt);
++  auto_vec<tree> vargs (nargs);
++
++  if (chain)
++    {
++      tree newchains[max_split];
++      if (rewrite_expr (chain, newchains))
++	{
++	  /* Chain decl's type cannot be split and but it can change. */
++	  gcc_assert (newchains[1] == NULL);
++	  chain = newchains[0];
++	}
++    }
++
++  for (unsigned i = 0; i < nargs; i++)
++    vargs.quick_push (gimple_call_arg (stmt, i));
++
++  int extraargs = 0;
++
++  for (unsigned i = 0; i < f->args.length (); i++)
++    {
++      srdecl *d = f->args[i];
++      if (d->argumentnum == -2)
++	continue;
++      gcc_assert (d->argumentnum != -1);
++      tree arg = vargs[d->argumentnum + extraargs];
++      tree newargs[max_split];
++      if (!rewrite_expr (arg, newargs))
++	continue;
++
++      /* If this ARG has a replacement handle the replacement.  */
++      for (unsigned j = 0; j < max_split && d->newdecl[j]; j++)
++	{
++	  gcc_assert (newargs[j]);
++	  /* If this is the first replacement of the arugment,
++	     then just replace it.  */
++	  if (j == 0)
++	    vargs[d->argumentnum + extraargs] = newargs[j];
++	  else
++	    {
++	      /* More than one replacement, we need to insert into the array. */
++	      extraargs++;
++	      vargs.safe_insert(d->argumentnum + extraargs, newargs[j]);
++	    }
++	}
++    }
++
++  gcall *new_stmt;
++
++  new_stmt = gimple_build_call_vec (f->node->decl, vargs);
++
++  if (gimple_call_lhs (stmt))
++    gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
++
++  gimple_set_vuse (new_stmt, gimple_vuse (stmt));
++  gimple_set_vdef (new_stmt, gimple_vdef (stmt));
++
++  if (gimple_has_location (stmt))
++    gimple_set_location (new_stmt, gimple_location (stmt));
++  gimple_call_copy_flags (new_stmt, stmt);
++  gimple_call_set_chain (new_stmt, chain);
++
++  gimple_set_modified (new_stmt, true);
++
++  if (gimple_vdef (new_stmt)
++      && TREE_CODE (gimple_vdef (new_stmt)) == SSA_NAME)
++    SSA_NAME_DEF_STMT (gimple_vdef (new_stmt)) = new_stmt;
++
++  gsi_replace (gsi, new_stmt, false);
++
++  /* We need to defer cleaning EH info on the new statement to
++     fixup-cfg.  We may not have dominator information at this point
++     and thus would end up with unreachable blocks and have no way
++     to communicate that we need to run CFG cleanup then.  */
++  int lp_nr = lookup_stmt_eh_lp (stmt);
++  if (lp_nr != 0)
++    {
++      remove_stmt_from_eh_lp (stmt);
++      add_stmt_to_eh_lp (new_stmt, lp_nr);
++    }
++
++
++  return false;
++}
++
++/* Rewrite the conditional statement STMT.  Return TRUE if the
++   old statement is to be removed. */
++
++bool
++ipa_struct_reorg::rewrite_cond (gcond *stmt, gimple_stmt_iterator *gsi)
++{
++  tree_code rhs_code = gimple_cond_code (stmt);
++
++  /* Handle only equals or not equals conditionals. */
++  if (rhs_code != EQ_EXPR
++      && rhs_code != NE_EXPR)
++    return false;
++  tree rhs1 = gimple_cond_lhs (stmt);
++  tree rhs2 = gimple_cond_rhs (stmt);
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "COND: Rewriting\n");
++      print_gimple_stmt (dump_file, stmt, 0);
++      fprintf (dump_file, "\n");
++      print_generic_expr (dump_file, rhs1);
++      fprintf (dump_file, "\n");
++      print_generic_expr (dump_file, rhs2);
++      fprintf (dump_file, "\n");
++    }
++
++  tree newrhs1[max_split];
++  tree newrhs2[max_split];
++  tree_code code = rhs_code == EQ_EXPR ? BIT_AND_EXPR : BIT_IOR_EXPR;
++  if (!rewrite_lhs_rhs (rhs1, rhs2, newrhs1, newrhs2))
++    {
++      if (dump_file && (dump_flags & TDF_DETAILS))
++        fprintf (dump_file, "\nDid nothing to statement.\n");
++      return false;
++    }
++
++  tree newexpr = NULL_TREE;
++  for (unsigned i = 0; i < max_split && newrhs1[i]; i++)
++    {
++      tree expr = gimplify_build2 (gsi, rhs_code, boolean_type_node, newrhs1[i], newrhs2[i]);
++      if (!newexpr)
++	newexpr = expr;
++      else
++	newexpr = gimplify_build2 (gsi, code, boolean_type_node, newexpr, expr);
++     }
++
++  if (newexpr)
++    {
++      gimple_cond_set_lhs (stmt, newexpr);
++      gimple_cond_set_rhs (stmt, boolean_true_node);
++      update_stmt (stmt);
++    }
++  return false;
++}
++
++/* Rewrite debug statments if possible.  Return TRUE if the statement
++   should be removed. */
++
++bool
++ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
++{
++  bool remove = false;
++  if (gimple_debug_bind_p (stmt))
++    {
++      tree var = gimple_debug_bind_get_var (stmt);
++      tree newvar[max_split];
++      if (rewrite_expr (var, newvar, true))
++	remove = true;
++      if (gimple_debug_bind_has_value_p (stmt))
++	{
++          var = gimple_debug_bind_get_value (stmt);
++	  if (TREE_CODE (var) == POINTER_PLUS_EXPR)
++	    var = TREE_OPERAND (var, 0);
++	  if (rewrite_expr (var, newvar, true))
++	    remove = true;
++	}
++    }
++  else if (gimple_debug_source_bind_p (stmt))
++    {
++      tree var = gimple_debug_source_bind_get_var (stmt);
++      tree newvar[max_split];
++      if (rewrite_expr (var, newvar, true))
++	remove = true;
++      var = gimple_debug_source_bind_get_value (stmt);
++      if (TREE_CODE (var) == POINTER_PLUS_EXPR)
++	var = TREE_OPERAND (var, 0);
++      if (rewrite_expr (var, newvar, true))
++	remove = true;
++    }
++
++  return remove;
++}
++
++/* Rewrite PHI nodes, return true if the PHI was replaced. */
++
++bool
++ipa_struct_reorg::rewrite_phi (gphi *phi)
++{
++  tree newlhs[max_split];
++  gphi *newphi[max_split];
++  tree result = gimple_phi_result (phi);
++  gphi_iterator gsi;
++
++  memset(newphi, 0, sizeof(newphi));
++
++  if (!rewrite_expr (result, newlhs))
++    return false;
++
++  if (newlhs[0] == NULL)
++    return false;
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\nrewriting PHI:");
++      print_gimple_stmt (dump_file, phi, 0);
++    }
++
++  for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++    newphi[i] = create_phi_node (newlhs[i], gimple_bb (phi));
++
++  for(unsigned i = 0; i  < gimple_phi_num_args (phi); i++)
++    {
++      tree newrhs[max_split];
++      phi_arg_d rhs = *gimple_phi_arg (phi, i);
++      rewrite_expr (rhs.def, newrhs);
++      for (unsigned j = 0; j < max_split && newlhs[j]; j++)
++	{
++	  SET_PHI_ARG_DEF (newphi[j], i, newrhs[j]);
++	  gimple_phi_arg_set_location (newphi[j], i, rhs.locus);
++	  update_stmt (newphi[j]);
++	}
++    }
++
++  if (dump_file && (dump_flags & TDF_DETAILS))
++    {
++      fprintf (dump_file, "\ninto\n:");
++      for (unsigned i = 0; i < max_split && newlhs[i]; i++)
++	{
++	  print_gimple_stmt (dump_file, newphi[i], 0);
++	  fprintf (dump_file, "\n");
++	}
++    }
++
++  gsi = gsi_for_phi (phi);
++  remove_phi_node (&gsi, false);
++
++  return true;
++}
++
++/* Rewrite gimple statement STMT, return true if the STATEMENT
++   is to be removed. */
++
++bool
++ipa_struct_reorg::rewrite_stmt (gimple *stmt, gimple_stmt_iterator *gsi)
++{
++  switch (gimple_code (stmt))
++    {
++    case GIMPLE_ASSIGN:
++      return rewrite_assign (as_a <gassign *> (stmt), gsi);
++    case GIMPLE_CALL:
++      return rewrite_call (as_a <gcall *> (stmt), gsi);
++    case GIMPLE_COND:
++      return rewrite_cond (as_a <gcond *> (stmt), gsi);
++      break;
++    case GIMPLE_GOTO:
++    case GIMPLE_SWITCH:
++      break;
++    case GIMPLE_DEBUG:
++    case GIMPLE_ASM:
++      break;
++    default:
++      break;
++    }
++  return false;
++}
++
++/* Does the function F uses any decl which has changed. */
++
++bool
++ipa_struct_reorg::has_rewritten_type (srfunction *f)
++{
++  for (unsigned i = 0; i < f->decls.length (); i++)
++    {
++      srdecl *d = f->decls[i];
++      if (d->newdecl[0] != d->decl)
++	return true;
++    }
++
++  for (unsigned i = 0; i < f->globals.length (); i++)
++    {
++      srdecl *d = f->globals[i];
++      if (d->newdecl[0] != d->decl)
++	return true;
++    }
++  return false;
++
++}
++
++/* Rewrite the functions if needed, return
++   the TODOs requested.  */
++
++unsigned
++ipa_struct_reorg::rewrite_functions (void)
++{
++  unsigned retval = 0;
++
++
++  /* Create new types, if we did not create any new types,
++     then don't rewrite any accesses. */
++  if (!create_new_types ())
++    return 0;
++
++  if (functions.length ())
++    {
++      retval = TODO_remove_functions;
++      create_new_functions ();
++    }
++
++  create_new_decls ();
++
++  for (unsigned i = 0; i < functions.length (); i++)
++    {
++      srfunction *f = functions[i];
++      if (f->newnode)
++	continue;
++
++      /* Function uses no rewriten types so don't cause a rewrite. */
++      if (!has_rewritten_type (f))
++	continue;
++
++      cgraph_node *node = f->node;
++      basic_block bb;
++
++      push_cfun (DECL_STRUCT_FUNCTION (node->decl));
++      current_function = f;
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nBefore rewrite:\n");
++          dump_function_to_file (current_function_decl, dump_file, dump_flags | TDF_VOPS);
++	}
++      FOR_EACH_BB_FN (bb, cfun)
++	{
++	  for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); )
++	    {
++	      if (rewrite_phi (si.phi ()))
++		si = gsi_start_phis (bb);
++	      else
++		gsi_next (&si);
++	    }
++
++	  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
++	    {
++	      gimple *stmt = gsi_stmt (si);
++	      if (rewrite_stmt (stmt, &si))
++		gsi_remove (&si, true);
++	      else
++		gsi_next (&si);
++	    }
++        }
++
++      /* Debug statements need to happen after all other statements
++	 have changed. */
++      FOR_EACH_BB_FN (bb, cfun)
++	{
++	  for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); )
++	    {
++	      gimple *stmt = gsi_stmt (si);
++	      if (gimple_code (stmt) == GIMPLE_DEBUG
++		  && rewrite_debug (stmt, &si))
++		gsi_remove (&si, true);
++	      else
++		gsi_next (&si);
++	    }
++	}
++
++      /* Release the old SSA_NAMES for old arguments.  */
++      if (f->old)
++	{
++	  for (unsigned i = 0; i < f->args.length (); i++)
++	    {
++	      srdecl *d = f->args[i];
++	      if (d->newdecl[0] != d->decl)
++		{
++		  tree ssa_name = ssa_default_def (cfun, d->decl);
++		  if (dump_file && (dump_flags & TDF_DETAILS))
++		    {
++		      fprintf (dump_file, "Found ");
++		      print_generic_expr (dump_file, ssa_name);
++		      fprintf (dump_file, " to be released.\n");
++		    }
++		  release_ssa_name (ssa_name);
++		}
++	    }
++	}
++
++      update_ssa (TODO_update_ssa_only_virtuals);
++
++      if (flag_tree_pta)
++	compute_may_aliases ();
++
++      remove_unused_locals ();
++
++      cgraph_edge::rebuild_edges ();
++
++      free_dominance_info (CDI_DOMINATORS);
++
++      if (dump_file && (dump_flags & TDF_DETAILS))
++	{
++	  fprintf (dump_file, "\nAfter rewrite:\n");
++          dump_function_to_file (current_function_decl, dump_file, dump_flags | TDF_VOPS);
++	}
++
++      pop_cfun ();
++      current_function = NULL;
++    }
++
++  return retval | TODO_verify_all;
++}
++
++unsigned int
++ipa_struct_reorg::execute (void)
++{
++  /* FIXME: If there is a top-level inline-asm, the pass immediately returns. */
++  if (symtab->first_asm_symbol ())
++    return 0;
++  record_accesses ();
++  prune_escaped_types ();
++  analyze_types ();
++
++  return rewrite_functions ();
++}
++
++const pass_data pass_data_ipa_struct_reorg =
++{
++  SIMPLE_IPA_PASS, /* type */
++  "struct_reorg", /* name */
++  OPTGROUP_NONE, /* optinfo_flags */
++  TV_IPA_STRUCT_REORG, /* tv_id */
++  0, /* properties_required */
++  0, /* properties_provided */
++  0, /* properties_destroyed */
++  0, /* todo_flags_start */
++  0, /* todo_flags_finish */
++};
++
++class pass_ipa_struct_reorg : public simple_ipa_opt_pass
++{
++public:
++  pass_ipa_struct_reorg (gcc::context *ctxt)
++    : simple_ipa_opt_pass (pass_data_ipa_struct_reorg, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *);
++  virtual unsigned int execute (function *) { return ipa_struct_reorg ().execute(); }
++
++}; // class pass_ipa_struct_reorg
++
++bool
++pass_ipa_struct_reorg::gate (function *)
++{
++  return (optimize
++	  && flag_ipa_struct_reorg
++	  /* Don't bother doing anything if the program has errors.  */
++	  && !seen_error ());
++}
++
++} // anon namespace
++
++simple_ipa_opt_pass *
++make_pass_ipa_struct_reorg (gcc::context *ctxt)
++{
++  return new pass_ipa_struct_reorg (ctxt);
++}
+diff -Nurp a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h
+--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h	2020-06-16 22:56:07.732000000 -0400
+@@ -0,0 +1,235 @@
++/* Struct-reorg optimizations.
++   Copyright (C) 2016-2017 Free Software Foundation, Inc.
++   Contributed by Andrew Pinski  <apinski@cavium.com>
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef IPA_STRUCT_REORG_H
++#define IPA_STRUCT_REORG_H
++
++namespace struct_reorg {
++
++const int max_split = 2;
++
++template <typename type>
++struct auto_vec_del : auto_vec<type*>
++{
++  ~auto_vec_del();
++};
++
++template <typename T>
++auto_vec_del<T>::~auto_vec_del(void)
++{
++  unsigned i;
++  T *t;
++  FOR_EACH_VEC_ELT (*this, i, t)
++    {
++      delete t;
++    }
++}
++
++enum escape_type
++{
++  does_not_escape,
++#define DEF_ESCAPE(ENUM, TEXT) ENUM,
++#include "escapes.def"
++  escape_max_escape
++};
++
++const char *escape_type_string[escape_max_escape - 1] =
++{
++#define DEF_ESCAPE(ENUM, TEXT) TEXT,
++#include "escapes.def"
++};
++
++struct srfield;
++struct srtype;
++struct sraccess;
++struct srdecl;
++struct srfunction;
++
++struct srfunction
++{
++  cgraph_node *node;
++  auto_vec<srdecl*> args;
++  auto_vec<srdecl*> globals;
++  auto_vec_del<srdecl> decls;
++  srdecl *record_decl (srtype *, tree, int arg);
++
++  srfunction *old;
++  cgraph_node *newnode;
++  srfunction *newf;
++
++  // Constructors
++  srfunction (cgraph_node *n);
++
++  // Methods
++  void add_arg (srdecl *arg);
++  void dump (FILE *file);
++  void simple_dump (FILE *file);
++
++  bool check_args (void);
++  void create_new_decls (void);
++  srdecl *find_decl (tree);
++};
++
++struct srglobal : private srfunction
++{
++  srglobal ()
++    : srfunction (NULL)
++  {
++  }
++
++  using srfunction::dump;
++  using srfunction::create_new_decls;
++  using srfunction::find_decl;
++  using srfunction::record_decl;
++  using srfunction::decls;
++};
++
++struct srtype
++{
++  tree type;
++  auto_vec_del<srfield> fields;
++
++  // array of fields that use this type.
++  auto_vec<srfield*> field_sites;
++
++  // array of functions which use directly the type
++  auto_vec<srfunction*> functions;
++
++  auto_vec_del<sraccess> accesses;
++  bool chain_type;
++
++private:
++  escape_type escapes;
++public:
++
++  tree newtype[max_split];
++  bool visited;
++
++  // Constructors
++  srtype(tree type);
++
++  // Methods
++  void dump (FILE *file);
++  void simple_dump (FILE *file);
++  void add_function (srfunction *);
++  void add_access (sraccess *a)
++  {
++    accesses.safe_push (a);
++  }
++  void add_field_site (srfield *);
++
++  srfield *find_field (unsigned HOST_WIDE_INT offset);
++
++  bool create_new_type (void);
++  void analyze (void);
++  void mark_escape (escape_type, gimple *stmt);
++  bool has_escaped (void)
++  {
++    return escapes != does_not_escape;
++  }
++  const char *escape_reason (void)
++  {
++    if (!has_escaped())
++      return NULL;
++    return escape_type_string[escapes-1];
++  }
++  bool escaped_rescusive (void)
++  {
++    return escapes == escape_rescusive_type;
++  }
++  bool has_new_type (void)
++  {
++    return newtype[0] && newtype[0] != type;
++  }
++};
++
++struct srfield
++{
++  unsigned HOST_WIDE_INT offset;
++  tree fieldtype;
++  tree fielddecl;
++  srtype *base;
++  srtype *type;
++
++  unsigned clusternum;
++
++  tree newfield[max_split];
++
++  // Constructors
++  srfield (tree field, srtype *base);
++
++  // Methods
++  void dump (FILE *file);
++  void simple_dump (FILE *file);
++
++  void create_new_fields (tree newtype[max_split],
++			  tree newfields[max_split],
++			  tree newlast[max_split]);
++};
++
++struct sraccess
++{
++  gimple *stmt;
++  cgraph_node *node;
++
++  srtype *type;
++  // NULL field means the whole type is accessed
++  srfield *field;
++
++  // constructors
++  sraccess (gimple *s, cgraph_node *n, srtype *t, srfield *f = NULL)
++    : stmt (s),
++      node (n),
++      type (t),
++      field (f)
++  {}
++
++  // Methods
++  void dump (FILE *file);
++};
++
++struct srdecl
++{
++  srtype *type;
++  tree decl;
++  tree func;
++  /* -1 : not an argument
++     -2 : static chain */
++  int argumentnum;
++
++  bool visited;
++
++  tree newdecl[max_split];
++
++  // Constructors
++  srdecl (srtype *type, tree decl, int argumentnum = -1);
++
++  // Methods
++  void dump (FILE *file);
++  bool has_new_decl (void)
++  {
++    return newdecl[0] && newdecl[0] != decl;
++  }
++};
++
++
++} // namespace struct_reorg
++
++#endif
+diff -Nurp a/gcc/Makefile.in b/gcc/Makefile.in
+--- a/gcc/Makefile.in	2020-03-12 07:07:20.000000000 -0400
++++ b/gcc/Makefile.in	2020-06-16 22:56:07.732000000 -0400
+@@ -1367,6 +1367,7 @@ OBJS = \
+ 	incpath.o \
+ 	init-regs.o \
+ 	internal-fn.o \
++	ipa-struct-reorg/ipa-struct-reorg.o \
+ 	ipa-cp.o \
+ 	ipa-devirt.o \
+ 	ipa-fnsummary.o \
+diff -Nurp a/gcc/params.def b/gcc/params.def
+--- a/gcc/params.def	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/params.def	2020-06-16 22:56:07.732000000 -0400
+@@ -42,6 +42,16 @@ along with GCC; see the file COPYING3.
+ 
+    Be sure to add an entry to invoke.texi summarizing the parameter.  */
+ 
++/* The threshold ratio between current and hottest structure counts.
++   We say that if the ratio of the current structure count,
++   calculated by profiling, to the hottest structure count
++   in the program is less than this parameter, then structure
++   reorganization is not applied. The default is 10%.  */
++DEFPARAM (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO,
++	  "struct-reorg-cold-struct-ratio",
++	  "The threshold ratio between current and hottest structure counts",
++	  10, 0, 100)
++
+ /* When branch is predicted to be taken with probability lower than this
+    threshold (in percent), then it is considered well predictable. */
+ DEFPARAM (PARAM_PREDICTABLE_BRANCH_OUTCOME,
+diff -Nurp a/gcc/params.h b/gcc/params.h
+--- a/gcc/params.h	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/params.h	2020-06-16 22:56:07.732000000 -0400
+@@ -130,6 +130,8 @@ extern int default_param_value (compiler
+ extern void init_param_values (int *params);
+ 
+ /* Macros for the various parameters.  */
++#define STRUCT_REORG_COLD_STRUCT_RATIO \
++  PARAM_VALUE (PARAM_STRUCT_REORG_COLD_STRUCT_RATIO)
+ #define MAX_INLINE_INSNS_SINGLE \
+   PARAM_VALUE (PARAM_MAX_INLINE_INSNS_SINGLE)
+ #define MAX_INLINE_INSNS \
+diff -Nurp a/gcc/passes.def b/gcc/passes.def
+--- a/gcc/passes.def	2020-03-12 07:07:21.000000000 -0400
++++ b/gcc/passes.def	2020-06-16 22:56:07.732000000 -0400
+@@ -169,6 +169,8 @@ along with GCC; see the file COPYING3.
+   INSERT_PASSES_AFTER (all_late_ipa_passes)
+   NEXT_PASS (pass_materialize_all_clones);
+   NEXT_PASS (pass_ipa_pta);
++  /* FIXME: this should a normal IP pass */
++  NEXT_PASS (pass_ipa_struct_reorg);
+   NEXT_PASS (pass_omp_simd_clone);
+   TERMINATE_PASS_LIST (all_late_ipa_passes)
+ 
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c
+--- a/gcc/testsuite/gcc.c-torture/compile/20170404-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.c-torture/compile/20170404-1.c	2020-06-16 22:56:07.732000000 -0400
+@@ -0,0 +1,19 @@
++struct a
++{
++  int t, t1;
++};
++
++static struct a *b;
++
++void *xmalloc(int);
++
++
++void f(void)
++{
++  b = xmalloc (sizeof(*b));
++}
++
++int g(void)
++{
++ return b->t;
++}
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/nested-3.c b/gcc/testsuite/gcc.c-torture/compile/nested-3.c
+--- a/gcc/testsuite/gcc.c-torture/compile/nested-3.c	2020-03-12 07:07:22.000000000 -0400
++++ b/gcc/testsuite/gcc.c-torture/compile/nested-3.c	2020-06-16 22:56:07.736000000 -0400
+@@ -1,3 +1,4 @@
++/* This used to crash Struct reorg.  */
+ struct a
+ {
+   int t;
+diff -Nurp a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c
+--- a/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.c-torture/compile/struct-reorg-1.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,18 @@
++#include <stdlib.h>
++typedef struct {
++   long laststart_offset;
++   unsigned regnum;
++} compile_stack_elt_t;
++typedef struct {
++   compile_stack_elt_t *stack;
++   unsigned size;
++} compile_stack_type;
++void f (const char *p, const char *pend, int c)
++{
++  compile_stack_type compile_stack;
++  while (p != pend)
++    if (c)
++      compile_stack.stack = realloc (compile_stack.stack,
++				     (compile_stack.size << 1)
++				       * sizeof (compile_stack_elt_t));
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/pr33136-4.c b/gcc/testsuite/gcc.dg/pr33136-4.c
+--- a/gcc/testsuite/gcc.dg/pr33136-4.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/pr33136-4.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,59 @@
++/* PR tree-optimization/33136 */
++/* { dg-do run } */
++/* { dg-options "-O2" } */
++
++extern void abort (void);
++
++struct S
++{
++  int b;
++  int *c;
++};
++static int d, e;
++
++static struct S s;
++
++static int *
++__attribute__((noinline, const))
++foo (void)
++{
++  return &s.b;
++}
++
++int *
++__attribute__((noinline))
++bar (int **f)
++{
++  s.c = &d;
++  *f = &e;
++  /* As nothing ever takes the address of any int * field in struct S,
++     the write to *f can't alias with the s.c field.  */
++  return s.c;
++}
++
++int
++__attribute__((noinline))
++baz (int *x)
++{
++  s.b = 1;
++  *x = 4;
++  /* Function foo takes address of an int field in struct S,
++     so *x can alias with the s.b field (and it does in this testcase).  */
++  return s.b;
++}
++
++int
++__attribute__((noinline))
++t (void)
++{
++  int *f = (int *) 0;
++  return 10 * (bar (&f) != &d) + baz (foo ());
++}
++
++int
++main (void)
++{
++  if (t () != 4)
++    abort ();
++  return 0;
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp
+--- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,87 @@
++#   Copyright (C) 2007, 2008, 2009, 2010
++#   Free Software Foundation, Inc.
++
++# This program is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3 of the License, or
++# (at your option) any later version.
++# 
++# This program is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++# 
++# You should have received a copy of the GNU General Public License
++# along with this program; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++# Test the functionality of programs compiled with profile-directed structure
++# rearrangement using -fprofile-generate followed by -fprofile-use.
++
++load_lib gcc-dg.exp
++load_lib target-supports.exp
++
++# Initialize `dg'.
++dg-init
++torture-init
++
++set STRUCT_REORG_TORTURE_OPTIONS [list \
++        { -O1 } \
++        { -O1 -g } \
++        { -O2 } \
++        { -O2 -g } \
++        { -O3 -fomit-frame-pointer -funroll-loops -fpeel-loops -ftracer -finline-functions } \
++        { -O3 -g } \
++        { -Os } ]
++
++
++set-torture-options $STRUCT_REORG_TORTURE_OPTIONS {{}} $LTO_TORTURE_OPTIONS
++
++gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/wo_prof_*.c]] "" "-fipa-struct-reorg -fdump-ipa-all -fwhole-program"
++
++torture-finish
++dg-final
++
++# Some targets don't support tree profiling.
++if { ![check_profiling_available ""] } {
++    return
++}
++
++# The procedures in profopt.exp need these parameters.
++set tool gcc
++set prof_ext "gcda"
++
++# Override the list defined in profopt.exp.
++set PROFOPT_OPTIONS [list {}]
++
++if $tracelevel then {
++    strace $tracelevel
++}
++
++# Load support procs.
++load_lib profopt.exp
++
++# These are globals used by profopt-execute.  The first is options
++# needed to generate profile data, the second is options to use the
++# profile data.
++set common "-O3 -fwhole-program"
++set profile_option [concat $common " -fprofile-generate"]
++set feedback_option [concat $common " -fprofile-use -fipa-struct-reorg -fdump-ipa-all"]
++
++foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_prof_*.c]] {
++    # If we're only testing specific files and this isn't one of them, skip it.
++    if ![runtest_file_p $runtests $src] then {
++        continue
++    }
++    profopt-execute $src
++}
++
++set feedback_option [concat $feedback_option " --param struct-reorg-cold-struct-ratio=30"]
++
++foreach src [lsort [glob -nocomplain $srcdir/$subdir/w_ratio_*.c]] {
++    # If we're only testing specific files and this isn't one of them, skip it.
++    if ![runtest_file_p $runtests $src] then {
++        continue
++    }
++    profopt-execute $src
++}
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_field.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,26 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct basic
++{
++  int a;
++  int b[10];
++} type_struct;
++
++type_struct *str1;
++
++int main()
++{
++  int i;
++
++  str1 = malloc (10 * sizeof (type_struct));
++
++  for (i=0; i<=9; i++)
++    str1[i].a = str1[i].b[0];
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_array_through_pointer.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,38 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i;
++  str_t A[N];
++  str_t *p = A;
++
++  for (i = 0; i < N; i++)
++    p[i].a = 0;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != 0)
++      abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_double_malloc.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,29 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++
++typedef struct test_struct
++{
++  int a;
++  int b;
++} type_struct;
++
++typedef type_struct **struct_pointer2;
++
++struct_pointer2 str1;
++
++int main()
++{
++  int i, j;
++
++  str1 = malloc (2 * sizeof (type_struct *));
++
++  for (i = 0; i <= 1; i++)
++    str1[i] = malloc (2 * sizeof (type_struct));
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_empty_str.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,44 @@
++/* { dg-do run } */
++
++#include <stdlib.h>
++
++struct S { int a; struct V *b; };
++typedef struct { int c; } T;
++typedef struct { int d; int e; } U;
++
++void * 
++fn (void *x) 
++{
++  return x;
++}
++
++int
++foo (struct S *s)
++{
++  T x;
++  
++  T y = *(T *)fn (&x);
++  return y.c;
++}
++
++int
++bar (struct S *s)
++{
++  U x;
++  
++  U y = *(U *)fn (&x);
++  return y.d + s->a;
++}
++
++int 
++main ()
++{
++  struct S s;
++
++  foo(&s) + bar (&s);
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_arg_to_local.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,44 @@
++/* { dg-do run } */
++
++#include <stdlib.h>
++struct str
++{
++  int a;
++  float b;
++};
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++foo (struct str * p_str)
++{
++  static int sum = 0;
++
++  sum = sum + p_str->a;
++  return sum;
++}
++
++int
++main ()
++{
++  int i, sum;
++  struct str * p = malloc (N * sizeof (struct str));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    sum = foo (p+i);
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg"   } } */
++
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return-1.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,32 @@
++/* { dg-do run } */
++
++#include <stdlib.h>
++
++struct A {
++  int d;
++  int d1;
++};
++
++struct A a;
++
++struct A *foo () __attribute__((noinline));
++struct A *foo ()
++{
++  a.d = 5;
++  return &a;
++}
++
++int
++main ()
++{
++  a.d = 0;
++  foo ();
++
++  if (a.d != 5)
++    abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped. .Type escapes via a return" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_return.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,31 @@
++/* { dg-do run } */
++
++#include <stdlib.h>
++
++struct A {
++  int d;
++};
++
++struct A a;
++
++struct A foo () __attribute__((noinline));
++struct A foo ()
++{
++  a.d = 5;
++  return a;
++}
++
++int
++main ()
++{
++  a.d = 0;
++  foo ();
++
++  if (a.d != 5)
++    abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped: \"Type escapes via a return" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_str_init.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,31 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 2
++
++str_t A[2] = {{1,1},{2,2}};
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    A[i].b = A[i].a;
++
++  for (i = 0; i < N; i++)
++    if (A[i].b != A[i].a)
++      abort ();
++  
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_array.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,33 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#define N 1000
++
++typedef struct 
++{
++  str_t A[N];
++  int c;
++}str_with_substr_t;
++
++str_with_substr_t a;
++
++int
++main ()
++{
++  int i;
++  
++  for (i = 0; i < N; i++)
++    a.A[i].b = 0;
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_pointer.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,48 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 16000
++#define N 1000
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 1000
++#endif
++
++typedef struct 
++{
++  str_t * sub_str;
++  int c;
++}str_with_substr_t;
++
++int foo;
++
++int
++main (void)
++{
++  int i;
++  str_with_substr_t A[N];
++  str_t a[N];
++
++  for (i=0; i < N; i++)
++    A[i].sub_str = &(a[i]);
++
++  for (i=0; i < N; i++)
++    A[i].sub_str->a = 5;
++
++  foo = A[56].sub_str->a;
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_escape_substr_value.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,45 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++
++typedef struct 
++{
++  str_t sub_str;
++  int c;
++}str_with_substr_t;
++
++int
++main ()
++{
++  int i;
++  str_with_substr_t A[N];
++
++  for (i = 0; i < N; i++)
++    A[i].sub_str.a = 5;
++
++  for (i = 0; i < N; i++)
++    if (A[i].sub_str.a != 5)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type is used in an array" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_array.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,32 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#define N 1000
++str_t A[N];
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_global_var.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,45 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t *p;
++
++int
++main ()
++{
++  int i, sum;
++
++  p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].b = p[i].a + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].b != p[i].a + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_array.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,40 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i;
++  str_t A[N];
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_local_var.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,43 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i, sum;
++
++  str_t * p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].b = p[i].a + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].b != p[i].a + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var-1.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,47 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  long i, num;
++
++  num = rand();
++  num = num > N ? N : num; 
++  str_t * p = malloc (num * sizeof (str_t));
++
++  if (p == 0)
++    return 0;
++
++  for (i = 1; i <= num; i++)
++    p[i-1].b = i;
++
++  for (i = 1; i <= num; i++)
++    p[i-1].a = p[i-1].b + 1;
++
++  for (i = 0; i < num; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++  
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_malloc_size_var.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,47 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i, num;
++
++  num = rand();
++  num = num > N ? N : num; 
++  str_t * p = malloc (num * sizeof (str_t));
++
++  if (p == 0)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p[i].b = i;
++
++  for (i = 0; i < num; i++)
++    p[i].a = p[i].b + 1;
++
++  for (i = 0; i < num; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++  
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_mult_field_peeling.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,42 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++  int c;
++  float d;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 1600
++#define N 100
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 100
++#endif
++
++int 
++main ()
++{
++  int i;
++  str_t *p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].a = 5;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != 5)      
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* The structure str_t is erroneously peeled into 4 structures instead of 2.  */
++/* { dg-final { scan-ipa-dump "the number of new types is 2" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_global.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 3
++
++str_t str;
++
++int
++main ()
++{
++  int i;
++  int res = 1<<(1<<N);
++  str.a = 2;
++
++  for (i = 0; i < N; i++)
++    str.a = str.a * str.a;
++  
++  if (str.a != res)
++    abort ();
++
++  /* POSIX ignores all but the 8 low-order bits, but other
++     environments may not.  */
++  return (str.a & 255);
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_local.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,34 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 3
++
++int
++main ()
++{
++  int i;
++  int res = 1<<(1<<N);
++  str_t str;
++  
++  str.a = 2;
++
++  for (i = 0; i < N; i++)
++    str.a = str.a * str.a;
++  
++  if (str.a != res)
++    abort ();
++
++  /* POSIX ignores all but the 8 low-order bits, but other
++     environments may not.  */
++  return (str.a & 255);
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "No structures to transform" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_single_str_pointer.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,38 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  int *b;
++}str_t;
++
++#define N 3
++
++str_t *p;
++
++int
++main ()
++{
++  str_t str;
++  int i;
++  int res = 1 << (1 << N);
++  p = &str;
++  str.a = 2;
++ 
++  p->b = &(p->a);
++
++  for (i=0; i < N; i++)
++    p->a = *(p->b)*(*(p->b));
++
++  if (p->a != res)
++    abort ();
++  
++  /* POSIX ignores all but the 8 low-order bits, but other
++     environments may not.  */
++  return (p->a & 255);
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "has escaped...Type escapes a cast to a different" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c
+--- a/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/wo_prof_two_strs.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,67 @@
++/* { dg-do compile } */
++/* { dg-do run } */
++
++#include <stdlib.h>
++
++typedef struct
++{
++  int a;
++  float b;
++}str_t1;
++
++typedef struct
++{
++  int c;
++  float d;
++}str_t2;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 16000
++#define N 1000
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t1 *p1;
++str_t2 *p2;
++int num;
++
++void
++foo (void)
++{
++  int i;
++
++  for (i=0; i < num; i++)
++    p2[i].c = 2;
++}
++
++int
++main ()
++{
++  int i, r;
++
++  r = rand ();
++  num = r > N ? N : r; 
++  p1 = malloc (num * sizeof (str_t1));
++  p2 = malloc (num * sizeof (str_t2));
++
++  if (p1 == NULL || p2 == NULL)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p1[i].a = 1;
++
++  foo ();
++
++  for (i = 0; i < num; i++)
++    if (p1[i].a != 1 || p2[i].c != 2)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_array.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,29 @@
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#define N 1000
++str_t A[N];
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg"  { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_global_var.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,42 @@
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t *p;
++
++int
++main ()
++{
++  int i, sum;
++
++  p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].a = p[i].b + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_array.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,37 @@
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i;
++  str_t A[N];
++
++  for (i = 0; i < N; i++)
++    {
++      A[i].a = 0;
++    }
++
++  for (i = 0; i < N; i++)
++    if (A[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_local_var.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,40 @@
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  float b;
++}str_t;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 8000
++#define N 1000
++#else
++#define N (STACK_SIZE/8)
++#endif
++#else
++#define N 1000
++#endif
++
++int
++main ()
++{
++  int i, sum;
++
++  str_t * p = malloc (N * sizeof (str_t));
++  if (p == NULL)
++    return 0;
++  for (i = 0; i < N; i++)
++    p[i].b = i;
++
++  for (i = 0; i < N; i++)
++    p[i].a = p[i].b + 1;
++
++  for (i = 0; i < N; i++)
++    if (p[i].a != p[i].b + 1)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_single_str_global.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,31 @@
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  int b;
++}str_t;
++
++#define N 3
++
++str_t str;
++
++int
++main ()
++{
++  int i;
++  int res = 1<<(1<<N);
++  str.a = 2;
++
++  for (i = 0; i < N; i++)
++      str.a = str.a * str.a;
++  
++  if (str.a != res)
++    abort ();
++
++  /* POSIX ignores all but the 8 low-order bits, but other
++     environments may not.  */
++  return (str.a & 255);
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c
+--- a/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_prof_two_strs.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,64 @@
++#include <stdlib.h>
++
++typedef struct
++{
++  int a;
++  float b;
++}str_t1;
++
++typedef struct
++{
++  int c;
++  float d;
++}str_t2;
++
++#ifdef STACK_SIZE
++#if STACK_SIZE > 16000
++#define N 1000
++#else
++#define N (STACK_SIZE/16)
++#endif
++#else
++#define N 1000
++#endif
++
++str_t1 *p1;
++str_t2 *p2;
++int num;
++
++void
++foo (void)
++{
++  int i;
++
++  for (i=0; i < num; i++)
++    p2[i].c = 2;
++}
++
++int
++main ()
++{
++  int i, r;
++
++  r = rand ();
++  num = r > N ? N : r; 
++  p1 = malloc (num * sizeof (str_t1));
++  p2 = malloc (num * sizeof (str_t2));
++
++  if (p1 == NULL || p2 == NULL)
++    return 0;
++
++  for (i = 0; i < num; i++)
++    p1[i].a = 1;
++
++  foo ();
++
++  for (i = 0; i < num; i++)
++    if (p1[i].a != 1 || p2[i].c != 2)
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 2" "struct_reorg" } } */
+diff -Nurp a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c
+--- a/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/gcc.dg/struct/w_ratio_cold_str.c	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,43 @@
++#include <stdlib.h>
++typedef struct
++{
++  int a;
++  int b;
++}str_t1;
++
++typedef struct
++{
++  float a;
++  float b;
++}str_t2;
++
++#define N1 1000
++#define N2 100
++str_t1 A1[N1];
++str_t2 A2[N2];
++
++int
++main ()
++{
++  int i;
++
++  for (i = 0; i < N1; i++)
++    A1[i].a = 0;
++
++  for (i = 0; i < N2; i++)
++    A2[i].a = 0;
++
++  for (i = 0; i < N1; i++)
++    if (A1[i].a != 0) 
++      abort ();
++
++  for (i = 0; i < N2; i++)
++    if (A2[i].a != 0) 
++      abort ();
++
++  return 0;
++}
++
++/*--------------------------------------------------------------------------*/
++/* Arrays are not handled. */
++/* { dg-final-use { scan-ipa-dump "Number of structures to transform is 1" "struct_reorg" { xfail *-*-* } } } */
+diff -Nurp a/gcc/testsuite/g++.dg/torture/pr38355.C b/gcc/testsuite/g++.dg/torture/pr38355.C
+--- a/gcc/testsuite/g++.dg/torture/pr38355.C	1969-12-31 19:00:00.000000000 -0500
++++ b/gcc/testsuite/g++.dg/torture/pr38355.C	2020-06-16 22:56:07.736000000 -0400
+@@ -0,0 +1,25 @@
++// { dg-do run }
++// { dg-options "-fwhole-program -fipa-struct-reorg" }
++template<int> struct A
++{
++  char c;
++  void foo(int);
++  void bar(int i) { foo(i+1); }
++};
++
++template<int> struct B : virtual A<0> {};
++
++template<int T> inline void baz(B<T>& b, int i)
++{
++  if (i) b.bar(0);
++}
++
++extern template class A<0>;
++extern template void baz(B<0>&, int);
++
++int main()
++{
++  B<0> b;
++  baz(b, 0);
++  return 0;
++}
+diff -Nurp a/gcc/timevar.def b/gcc/timevar.def
+--- a/gcc/timevar.def	2020-03-12 07:07:23.000000000 -0400
++++ b/gcc/timevar.def	2020-06-16 22:56:07.736000000 -0400
+@@ -77,6 +77,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP     , "
+ DEFTIMEVAR (TV_IPA_INLINING          , "ipa inlining heuristics")
+ DEFTIMEVAR (TV_IPA_FNSPLIT           , "ipa function splitting")
+ DEFTIMEVAR (TV_IPA_COMDATS	     , "ipa comdats")
++DEFTIMEVAR (TV_IPA_STRUCT_REORG	     , "ipa struct reorg optimization")
+ DEFTIMEVAR (TV_IPA_OPT		     , "ipa various optimizations")
+ DEFTIMEVAR (TV_IPA_LTO_DECOMPRESS    , "lto stream inflate")
+ DEFTIMEVAR (TV_IPA_LTO_COMPRESS      , "lto stream deflate")
+diff -Nurp a/gcc/tree-pass.h b/gcc/tree-pass.h
+--- a/gcc/tree-pass.h	2020-03-12 07:07:23.000000000 -0400
++++ b/gcc/tree-pass.h	2020-06-16 22:56:07.736000000 -0400
+@@ -504,6 +504,7 @@ extern ipa_opt_pass_d *make_pass_ipa_dev
+ extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
+ extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
++extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_pta (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_ipa_tm (gcc::context *ctxt);
+ extern simple_ipa_opt_pass *make_pass_target_clone (gcc::context *ctxt);
diff --git a/ivopts-1.patch b/ivopts-1.patch
index 2c5e62c..6f2e041 100644
--- a/ivopts-1.patch
+++ b/ivopts-1.patch
@@ -1,3 +1,6 @@
+re-PR-tree-optimization-90240-ICE-in-try_improve_iv_.patch:
+commit 98d8f142132ac670da2dc99cce530048343ab948
+
 diff -urpN a/gcc/testsuite/gfortran.dg/graphite/pr90240.f b/gcc/testsuite/gfortran.dg/graphite/pr90240.f
 new file mode 100644
 --- /dev/null
diff --git a/ivopts-2.patch b/ivopts-2.patch
index c9cbec1..9bd0b68 100644
--- a/ivopts-2.patch
+++ b/ivopts-2.patch
@@ -1,3 +1,6 @@
+re-PR-tree-optimization-90078-ICE-with-deep-template.patch:
+commit 8363a2f1f7c47d7b3d1760ce631a6824e91c0d80
+
 diff -urpN a/gcc/testsuite/g++.dg/tree-ssa/pr90078.C b/gcc/testsuite/g++.dg/tree-ssa/pr90078.C
 new file mode 100644
 --- /dev/null
diff --git a/loop-finite-bugfix.patch b/loop-finite-bugfix.patch
index c159a8b..a290507 100644
--- a/loop-finite-bugfix.patch
+++ b/loop-finite-bugfix.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-c-94392-only-enable-ffinite-loops-for-C.patch
+75efe9cb1f8938a713ce540dc3b27bc2afcd3fae
+
 diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
 index 6b6c754ad86..58ba0948e79 100644
 --- a/gcc/c-family/c-opts.c
diff --git a/loop-finite.patch b/loop-finite.patch
index 945ea28..cc2543e 100644
--- a/loop-finite.patch
+++ b/loop-finite.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-PR-tree-optimization-89713-Assume-loop-with-an-exit-.patch
+c29c92c789d93848cc1c929838771bfc68cb272c
+
 diff --git a/gcc/common.opt b/gcc/common.opt
 index e1404165feb..a1544d06824 100644
 --- a/gcc/common.opt
diff --git a/loop-split.patch b/loop-split.patch
index c689060..d99db75 100644
--- a/loop-split.patch
+++ b/loop-split.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-Loop-split-on-semi-invariant-conditional-statement.patch
+095f78c62157124ad479a3f98b6995ced090b807
+
 diff --git a/gcc/params.def b/gcc/params.def
 index 942447d77e6..df7d1f7c5e7 100644
 --- a/gcc/params.def
diff --git a/medium-code-mode.patch b/medium-code-mode.patch
new file mode 100644
index 0000000..9133683
--- /dev/null
+++ b/medium-code-mode.patch
@@ -0,0 +1,426 @@
+diff -Nurp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+--- a/gcc/config/aarch64/aarch64.c	2020-07-16 14:54:30.588000000 +0800
++++ b/gcc/config/aarch64/aarch64.c	2020-07-16 15:06:33.000000000 +0800
+@@ -2030,6 +2030,32 @@ aarch64_load_symref_appropriately (rtx d
+ 	emit_insn (gen_add_losym (dest, tmp_reg, imm));
+ 	return;
+       }
++    case SYMBOL_MEDIUM_ABSOLUTE:
++	{
++		rtx tmp_reg = dest;
++		machine_mode mode = GET_MODE (dest);
++
++		gcc_assert (mode == Pmode || mode == ptr_mode);
++		if (can_create_pseudo_p ())
++	  		tmp_reg = gen_reg_rtx (mode);
++
++		if (mode == DImode)
++		{
++			emit_insn (
++			gen_load_symbol_medium_di (dest, tmp_reg, imm));
++		}
++		else
++		{
++			emit_insn (
++			gen_load_symbol_medium_si (dest, tmp_reg, imm));
++		}
++		if (REG_P (dest))
++		{
++			set_unique_reg_note (
++			get_last_insn (), REG_EQUIV, copy_rtx (imm));
++		}
++		return;
++	}
+ 
+     case SYMBOL_TINY_ABSOLUTE:
+       emit_insn (gen_rtx_SET (dest, imm));
+@@ -2152,6 +2178,64 @@ aarch64_load_symref_appropriately (rtx d
+ 	return;
+       }
+ 
++	case SYMBOL_MEDIUM_GOT_4G:
++	{
++		rtx tmp_reg = dest;
++		machine_mode mode = GET_MODE (dest);
++		if (can_create_pseudo_p ())
++		{
++			tmp_reg = gen_reg_rtx (mode);
++		}
++		rtx insn;
++		rtx mem;
++		rtx s = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
++
++		if (mode == DImode)
++		{
++			emit_insn (
++			gen_load_symbol_medium_di (tmp_reg, dest, s));
++		}
++		else
++		{
++			emit_insn (
++			gen_load_symbol_medium_si (tmp_reg, dest, s));
++		}
++		if (REG_P (dest))
++		{
++			set_unique_reg_note (
++			get_last_insn (), REG_EQUIV, copy_rtx (s));
++		}
++
++		if (mode == ptr_mode)
++		{
++			if (mode == DImode)
++			{
++				emit_insn (gen_get_gotoff_di (dest, imm));
++				insn = gen_ldr_got_medium_di (
++					   dest, tmp_reg, dest);
++			}
++			else
++			{
++				emit_insn (gen_get_gotoff_si (dest, imm));
++				insn = gen_ldr_got_medium_si (
++					   dest, tmp_reg, dest);
++			}
++			mem = XVECEXP (SET_SRC (insn), 0, 0);
++		}
++		else
++		{
++			gcc_assert (mode == Pmode);
++			emit_insn (gen_get_gotoff_di (dest, imm));
++			insn = gen_ldr_got_medium_sidi (dest, tmp_reg, dest);
++			mem = XVECEXP (XEXP (SET_SRC (insn), 0), 0, 0);
++		}
++
++		gcc_assert (GET_CODE (mem) == MEM);
++		MEM_READONLY_P (mem) = 1;
++		MEM_NOTRAP_P (mem) = 1;
++		emit_insn (insn);
++		return;
++	}
+     case SYMBOL_SMALL_TLSGD:
+       {
+ 	rtx_insn *insns;
+@@ -3372,11 +3456,12 @@ aarch64_expand_mov_immediate (rtx dest,
+ 
+ 	  return;
+ 
+-        case SYMBOL_SMALL_TLSGD:
+-        case SYMBOL_SMALL_TLSDESC:
++	case SYMBOL_SMALL_TLSGD:
++	case SYMBOL_SMALL_TLSDESC:
+ 	case SYMBOL_SMALL_TLSIE:
+ 	case SYMBOL_SMALL_GOT_28K:
+ 	case SYMBOL_SMALL_GOT_4G:
++	case SYMBOL_MEDIUM_GOT_4G:
+ 	case SYMBOL_TINY_GOT:
+ 	case SYMBOL_TINY_TLSIE:
+ 	  if (const_offset != 0)
+@@ -3395,6 +3480,7 @@ aarch64_expand_mov_immediate (rtx dest,
+ 	case SYMBOL_TLSLE24:
+ 	case SYMBOL_TLSLE32:
+ 	case SYMBOL_TLSLE48:
++	case SYMBOL_MEDIUM_ABSOLUTE:
+ 	  aarch64_load_symref_appropriately (dest, imm, sty);
+ 	  return;
+ 
+@@ -10334,6 +10420,13 @@ cost_plus:
+ 	  if (speed)
+ 	    *cost += extra_cost->alu.arith;
+ 	}
++		else if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM
++			 || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
++	  {
++		/* 4 movs  adr  sub  add  2movs  ldr.  */
++		if (speed)
++		  *cost += 7*extra_cost->alu.arith;
++	  }
+ 
+       if (flag_pic)
+ 	{
+@@ -10341,6 +10434,8 @@ cost_plus:
+ 	  *cost += COSTS_N_INSNS (1);
+ 	  if (speed)
+ 	    *cost += extra_cost->ldst.load;
++	  if (aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC)
++	  	*cost += 2*extra_cost->alu.arith;
+ 	}
+       return true;
+ 
+@@ -11395,6 +11490,7 @@ initialize_aarch64_tls_size (struct gcc_
+       if (aarch64_tls_size > 32)
+ 	aarch64_tls_size = 32;
+       break;
++    case AARCH64_CMODEL_MEDIUM:
+     case AARCH64_CMODEL_LARGE:
+       /* The maximum TLS size allowed under large is 16E.
+ 	 FIXME: 16E should be 64bit, we only support 48bit offset now.  */
+@@ -12187,6 +12283,9 @@ initialize_aarch64_code_model (struct gc
+ 	   aarch64_cmodel = AARCH64_CMODEL_SMALL_PIC;
+ #endif
+ 	   break;
++     case AARCH64_CMODEL_MEDIUM:
++	 	aarch64_cmodel = AARCH64_CMODEL_MEDIUM_PIC;
++	   break;
+ 	 case AARCH64_CMODEL_LARGE:
+ 	   sorry ("code model %qs with %<-f%s%>", "large",
+ 		  opts->x_flag_pic > 1 ? "PIC" : "pic");
+@@ -12205,6 +12304,7 @@ static void
+ aarch64_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
+ {
+   ptr->x_aarch64_override_tune_string = opts->x_aarch64_override_tune_string;
++  ptr->x_aarch64_data_threshold = opts->x_aarch64_data_threshold;
+   ptr->x_aarch64_branch_protection_string
+     = opts->x_aarch64_branch_protection_string;
+ }
+@@ -12220,6 +12320,7 @@ aarch64_option_restore (struct gcc_optio
+   opts->x_explicit_arch = ptr->x_explicit_arch;
+   selected_arch = aarch64_get_arch (ptr->x_explicit_arch);
+   opts->x_aarch64_override_tune_string = ptr->x_aarch64_override_tune_string;
++  opts->x_aarch64_data_threshold = ptr->x_aarch64_data_threshold;
+   opts->x_aarch64_branch_protection_string
+     = ptr->x_aarch64_branch_protection_string;
+   if (opts->x_aarch64_branch_protection_string)
+@@ -13067,6 +13168,8 @@ aarch64_classify_symbol (rtx x, HOST_WID
+ 
+ 	case AARCH64_CMODEL_SMALL_SPIC:
+ 	case AARCH64_CMODEL_SMALL_PIC:
++	case AARCH64_CMODEL_MEDIUM_PIC:
++	case AARCH64_CMODEL_MEDIUM:
+ 	case AARCH64_CMODEL_SMALL:
+ 	  return SYMBOL_SMALL_ABSOLUTE;
+ 
+@@ -13100,6 +13203,7 @@ aarch64_classify_symbol (rtx x, HOST_WID
+ 	  return SYMBOL_TINY_ABSOLUTE;
+ 
+ 	case AARCH64_CMODEL_SMALL:
++	AARCH64_SMALL_ROUTINE:
+ 	  /* Same reasoning as the tiny code model, but the offset cap here is
+ 	     4G.  */
+ 	  if ((SYMBOL_REF_WEAK (x)
+@@ -13121,7 +13225,48 @@ aarch64_classify_symbol (rtx x, HOST_WID
+ 		    ?  SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G);
+ 	  return SYMBOL_SMALL_ABSOLUTE;
+ 
++	case AARCH64_CMODEL_MEDIUM:
++	{
++		tree decl_local = SYMBOL_REF_DECL (x);
++		if (decl_local != NULL
++		    && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
++		{
++			HOST_WIDE_INT size = tree_to_uhwi (
++						 DECL_SIZE_UNIT (decl_local));
++			/* If the data is smaller than the threshold, goto
++			   the small code model.  Else goto the large code
++			   model.  */
++			if (size >= HOST_WIDE_INT (aarch64_data_threshold))
++				goto AARCH64_LARGE_ROUTINE;
++		}
++		goto AARCH64_SMALL_ROUTINE;
++	}
++
++	case AARCH64_CMODEL_MEDIUM_PIC:
++	{
++		tree decl_local = SYMBOL_REF_DECL (x);
++		if (decl_local != NULL
++		    && tree_fits_uhwi_p (DECL_SIZE_UNIT (decl_local)))
++		{
++			HOST_WIDE_INT size = tree_to_uhwi (
++						 DECL_SIZE_UNIT (decl_local));
++			if (size < HOST_WIDE_INT (aarch64_data_threshold))
++		   	{
++				if (!aarch64_symbol_binds_local_p (x))
++				{
++					return SYMBOL_SMALL_GOT_4G;
++				}
++				return SYMBOL_SMALL_ABSOLUTE;
++		   	}
++		}
++		if (!aarch64_symbol_binds_local_p (x))
++		{
++			return SYMBOL_MEDIUM_GOT_4G;
++		}
++		return SYMBOL_MEDIUM_ABSOLUTE;
++	}
+ 	case AARCH64_CMODEL_LARGE:
++	AARCH64_LARGE_ROUTINE:
+ 	  /* This is alright even in PIC code as the constant
+ 	     pool reference is always PC relative and within
+ 	     the same translation unit.  */
+@@ -15364,6 +15509,8 @@ aarch64_asm_preferred_eh_data_format (in
+      case AARCH64_CMODEL_SMALL:
+      case AARCH64_CMODEL_SMALL_PIC:
+      case AARCH64_CMODEL_SMALL_SPIC:
++	  case AARCH64_CMODEL_MEDIUM:
++	  case AARCH64_CMODEL_MEDIUM_PIC:
+        /* text+got+data < 4Gb.  4-byte signed relocs are sufficient
+ 	  for everything.  */
+        type = DW_EH_PE_sdata4;
+@@ -18454,7 +18601,8 @@ aarch64_empty_mask_is_expensive (unsigne
+ bool
+ aarch64_use_pseudo_pic_reg (void)
+ {
+-  return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC;
++  return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
++  	  || aarch64_cmodel == AARCH64_CMODEL_MEDIUM_PIC ;
+ }
+ 
+ /* Implement TARGET_UNSPEC_MAY_TRAP_P.  */
+@@ -18464,6 +18612,7 @@ aarch64_unspec_may_trap_p (const_rtx x,
+ {
+   switch (XINT (x, 1))
+     {
++    case UNSPEC_GOTMEDIUMPIC4G:
+     case UNSPEC_GOTSMALLPIC:
+     case UNSPEC_GOTSMALLPIC28K:
+     case UNSPEC_GOTTINYPIC:
+diff -Nurp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
+--- a/gcc/config/aarch64/aarch64.h	2020-07-16 14:54:30.592000000 +0800
++++ b/gcc/config/aarch64/aarch64.h	2020-07-16 14:55:05.672000000 +0800
+@@ -33,6 +33,10 @@
+ 
+ #define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
+ 
++/* Default threshold 64-bit relocation data
++    with aarch64 medium memory model.  */
++#define AARCH64_DEFAULT_LARGE_DATA_THRESHOLD 65536
++
+ /* Target machine storage layout.  */
+ 
+ #define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)	\
+diff -Nurp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
+--- a/gcc/config/aarch64/aarch64.md	2020-07-16 14:54:30.588000000 +0800
++++ b/gcc/config/aarch64/aarch64.md	2020-07-16 14:55:05.676000000 +0800
+@@ -209,6 +209,11 @@
+     UNSPEC_RSQRTS
+     UNSPEC_NZCV
+     UNSPEC_XPACLRI
++    UNSPEC_MOV_MEDIUM_SYMBOL
++    UNSPEC_GET_LAST_PC
++    UNSPEC_GOTMEDIUMPIC4G
++    UNSPEC_GET_GOTOFF
++    UNSPEC_LOAD_SYMBOL_MEDIUM
+     UNSPEC_LD1_SVE
+     UNSPEC_ST1_SVE
+     UNSPEC_LD1RQ
+@@ -6548,6 +6553,39 @@
+   [(set_attr "type" "load_4")]
+ )
+ 
++(define_insn "get_gotoff_<mode>"
++  [(set (match_operand:GPI 0 "register_operand" "=r")
++	(unspec:GPI [(match_operand 1 "aarch64_valid_symref" "S")]
++		  UNSPEC_GET_GOTOFF))]
++  ""
++  "movz\\t%x0, :gotoff_g1:%A1\;movk\\t%x0, :gotoff_g0_nc:%A1"
++  [(set_attr "type" "multiple")
++   (set_attr "length" "8")]
++)
++
++(define_insn "ldr_got_medium_<mode>"
++  [(set (match_operand:PTR 0 "register_operand" "=r")
++	(unspec:PTR [(mem:PTR (lo_sum:PTR
++			      (match_operand:PTR 1 "register_operand" "r")
++			      (match_operand:PTR 2 "register_operand" "r")))]
++		    UNSPEC_GOTMEDIUMPIC4G))]
++  ""
++  "ldr\\t%0, [%1, %2]"
++  [(set_attr "type" "load_4")]
++)
++
++(define_insn "ldr_got_medium_sidi"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extend:DI
++	 (unspec:SI [(mem:SI (lo_sum:DI
++			     (match_operand:DI 1 "register_operand" "r")
++			     (match_operand:DI 2 "register_operand" "r")))]
++		    UNSPEC_GOTMEDIUMPIC4G)))]
++  "TARGET_ILP32"
++  "ldr\\t%0, [%1, %2]"
++  [(set_attr "type" "load_4")]
++)
++
+ (define_insn "ldr_got_small_28k_<mode>"
+   [(set (match_operand:PTR 0 "register_operand" "=r")
+ 	(unspec:PTR [(mem:PTR (lo_sum:PTR
+@@ -6709,6 +6747,23 @@
+    (set_attr "length" "12")]
+ )
+ 
++(define_insn "load_symbol_medium_<mode>"
++   [(set (match_operand:GPI 0 "register_operand" "=r")
++		(unspec:GPI [(match_operand 2 "aarch64_valid_symref" "S")]
++		  UNSPEC_LOAD_SYMBOL_MEDIUM))
++	(clobber (match_operand:GPI 1 "register_operand" "=r"))]
++  ""
++  "movz\\t%x0, :prel_g3:%A2\;\\
++   movk\\t%x0, :prel_g2_nc:%A2\;\\
++   movk\\t%x0, :prel_g1_nc:%A2\;\\
++   movk\\t%x0, :prel_g0_nc:%A2\;\\
++   adr\\t%x1, .\;\\
++   sub\\t%x1, %x1, 0x4\;\\
++   add\\t%x0, %x0, %x1"
++  [(set_attr "type" "multiple")
++   (set_attr "length" "28")]
++)
++
+ (define_expand "tlsdesc_small_<mode>"
+   [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)]
+   "TARGET_TLS_DESC"
+diff -Nurp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
+--- a/gcc/config/aarch64/aarch64.opt	2020-07-16 14:54:30.580000000 +0800
++++ b/gcc/config/aarch64/aarch64.opt	2020-07-16 14:55:05.676000000 +0800
+@@ -27,6 +27,10 @@ enum aarch64_processor explicit_tune_cor
+ TargetVariable
+ enum aarch64_arch explicit_arch = aarch64_no_arch
+ 
++;; -mlarge-data-threshold=
++TargetSave
++int x_aarch64_data_threshold
++
+ TargetSave
+ const char *x_aarch64_override_tune_string
+ 
+@@ -61,8 +65,15 @@ EnumValue
+ Enum(cmodel) String(small) Value(AARCH64_CMODEL_SMALL)
+ 
+ EnumValue
++Enum(cmodel) String(medium) Value(AARCH64_CMODEL_MEDIUM)
++
++EnumValue
+ Enum(cmodel) String(large) Value(AARCH64_CMODEL_LARGE)
+ 
++mlarge-data-threshold=
++Target RejectNegative Joined UInteger Var(aarch64_data_threshold) Init(AARCH64_DEFAULT_LARGE_DATA_THRESHOLD)
++-mlarge-data-threshold=<number> Data greater than given threshold will be assume that it should be relocated using 64-bit relocation.
++
+ mbig-endian
+ Target Report RejectNegative Mask(BIG_END)
+ Assume target CPU is configured as big endian.
+diff -Nurp a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
+--- a/gcc/config/aarch64/aarch64-opts.h	2020-07-16 14:54:30.584000000 +0800
++++ b/gcc/config/aarch64/aarch64-opts.h	2020-07-16 14:55:05.676000000 +0800
+@@ -66,6 +66,10 @@ enum aarch64_code_model {
+   /* -fpic for small memory model.
+      GOT size to 28KiB (4K*8-4K) or 3580 entries.  */
+   AARCH64_CMODEL_SMALL_SPIC,
++  /* Using movk insn sequence to do 64bit PC relative relocation.  */
++  AARCH64_CMODEL_MEDIUM,
++  /* Using movk insn sequence to do 64bit PC relative got relocation.  */
++  AARCH64_CMODEL_MEDIUM_PIC,
+   /* No assumptions about addresses of code and data.
+      The PIC variant is not yet implemented.  */
+   AARCH64_CMODEL_LARGE
+diff -Nurp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
+--- a/gcc/config/aarch64/aarch64-protos.h	2020-07-16 14:54:30.584000000 +0800
++++ b/gcc/config/aarch64/aarch64-protos.h	2020-07-16 14:55:05.676000000 +0800
+@@ -95,9 +95,11 @@
+  */
+ enum aarch64_symbol_type
+ {
++  SYMBOL_MEDIUM_ABSOLUTE,
+   SYMBOL_SMALL_ABSOLUTE,
+   SYMBOL_SMALL_GOT_28K,
+   SYMBOL_SMALL_GOT_4G,
++  SYMBOL_MEDIUM_GOT_4G,
+   SYMBOL_SMALL_TLSGD,
+   SYMBOL_SMALL_TLSDESC,
+   SYMBOL_SMALL_TLSIE,
diff --git a/reduction-chain-slp-option.patch b/reduction-chain-slp-option.patch
new file mode 100644
index 0000000..1b10c5c
--- /dev/null
+++ b/reduction-chain-slp-option.patch
@@ -0,0 +1,52 @@
+diff -Nurp a/gcc/common.opt b/gcc/common.opt
+--- a/gcc/common.opt	2020-06-20 23:53:56.124000000 +0800
++++ b/gcc/common.opt	2020-06-22 23:02:18.808000000 +0800
+@@ -2858,6 +2858,10 @@ ftree-slp-vectorize
+ Common Report Var(flag_tree_slp_vectorize) Optimization EnabledBy(ftree-vectorize)
+ Enable basic block vectorization (SLP) on trees.
+ 
++ftree-vect-analyze-slp-group
++Common Report Var(flag_tree_slp_group) Init(0)
++Disable SLP vectorization for reduction chain on tree.
++
+ fvect-cost-model=
+ Common Joined RejectNegative Enum(vect_cost_model) Var(flag_vect_cost_model) Init(VECT_COST_MODEL_DEFAULT) Optimization
+ -fvect-cost-model=[unlimited|dynamic|cheap]	Specifies the cost model for vectorization.
+diff -Nurp a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-12.c	2020-06-22 23:04:08.260000000 +0800
+@@ -0,0 +1,20 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize -fdump-tree-vect-details -funsafe-math-optimizations -fno-tree-reassoc -ftree-vect-analyze-slp-group" } */
++void f(double *a, double *res, double m) {
++  double res1, res0;
++  res1 = 0;
++  res0 = 0;
++  for (int i = 0; i < 1000; i+=8) {
++    res0 += a[i] * m;
++    res1 += a[i+1] * m;
++    res0 += a[i+2] * m;
++    res1 += a[i+3] * m;
++    res0 += a[i+4] * m;
++    res1 += a[i+5] * m;
++    res0 += a[i+6] * m;
++    res1 += a[i+7] * m;
++  }
++  res[0] += res0;
++  res[1] += res1;
++}
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+diff -Nurp a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+--- a/gcc/tree-vect-slp.c	2020-06-21 01:07:56.516000000 +0800
++++ b/gcc/tree-vect-slp.c	2020-06-22 23:02:54.540000000 +0800
+@@ -2327,8 +2327,9 @@ vect_analyze_slp (vec_info *vinfo, unsig
+ 	{
+ 	  /* Find SLP sequences starting from reduction chains.  */
+ 	  FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
+-	    if (! vect_analyze_slp_instance (vinfo, bst_map, first_element,
++	    if (flag_tree_slp_group
++		|| ! vect_analyze_slp_instance (vinfo, bst_map, first_element,
+ 					     max_tree_size))
+ 	      {
+ 		/* Dissolve reduction chain group.  */
+ 		stmt_vec_info vinfo = first_element;
diff --git a/reductions-slp-enhancement.patch b/reductions-slp-enhancement.patch
new file mode 100644
index 0000000..de426a3
--- /dev/null
+++ b/reductions-slp-enhancement.patch
@@ -0,0 +1,59 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-tree-vect-slp.c-vect_analyze_slp-When-reduction-grou.patch
+0214d31a48f867b9b00134cea7223d35ed7865aa
+
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-9.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-9.c
+new file mode 100644
+index 00000000000..bee642ee999
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-9.c
+@@ -0,0 +1,25 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target vect_int_mult } */
++
++int
++bar (int *x, int a, int b, int n)
++{
++  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
++  int sum1 = 0;
++  int sum2 = 0;
++  for (int i = 0; i < n; ++i)
++    {
++      /* Reduction chain vectorization fails here because of the
++         different operations but we can still vectorize both
++	 reductions as SLP reductions, saving IVs.  */
++      sum1 += x[2*i] - a;
++      sum1 += x[2*i+1] * b;
++      sum2 += x[2*i] - b;
++      sum2 += x[2*i+1] * a;
++    }
++  return sum1 + sum2;
++}
++
++/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+index e1061ede061..0af51197a84 100644
+--- a/gcc/tree-vect-slp.c
++++ b/gcc/tree-vect-slp.c
+@@ -2271,14 +2271,18 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
+ 	      {
+ 		/* Dissolve reduction chain group.  */
+ 		stmt_vec_info vinfo = first_element;
++		stmt_vec_info last = NULL;
+ 		while (vinfo)
+ 		  {
+ 		    stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
+ 		    REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
+ 		    REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
++		    last = vinfo;
+ 		    vinfo = next;
+ 		  }
+ 		STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
++		/* It can be still vectorized as part of an SLP reduction.  */
++		loop_vinfo->reductions.safe_push (last);
+ 	      }
+ 	}
diff --git a/remove-array-index-inliner-hint.patch b/remove-array-index-inliner-hint.patch
index e0c09fb..416cd9a 100644
--- a/remove-array-index-inliner-hint.patch
+++ b/remove-array-index-inliner-hint.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-ipa-fnsummary.c-ipa_dump_hints-Do-not-dump-array_ind.patch
+a20f263ba1a76af40eb4e6734529739a2a30ed65
+
 diff -uprN a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
 --- a/gcc/doc/invoke.texi
 +++ b/gcc/doc/invoke.texi
diff --git a/simplify-removing-subregs.patch b/simplify-removing-subregs.patch
new file mode 100644
index 0000000..199ff7a
--- /dev/null
+++ b/simplify-removing-subregs.patch
@@ -0,0 +1,117 @@
+diff -Nurp a/gcc/expr.c b/gcc/expr.c
+--- a/gcc/expr.c	2020-08-05 20:33:04.068000000 +0800
++++ b/gcc/expr.c	2020-08-05 20:33:21.420000000 +0800
+@@ -3770,6 +3770,78 @@ emit_move_insn (rtx x, rtx y)
+   gcc_assert (mode != BLKmode
+ 	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ 
++  /* If we have a copy that looks like one of the following patterns:
++       (set (subreg:M1 (reg:M2 ...)) (subreg:M1 (reg:M2 ...)))
++       (set (subreg:M1 (reg:M2 ...)) (mem:M1 ADDR))
++       (set (mem:M1 ADDR) (subreg:M1 (reg:M2 ...)))
++       (set (subreg:M1 (reg:M2 ...)) (constant C))
++     where mode M1 is equal in size to M2, try to detect whether the
++     mode change involves an implicit round trip through memory.
++     If so, see if we can avoid that by removing the subregs and
++     doing the move in mode M2 instead.  */
++
++  rtx x_inner = NULL_RTX;
++  rtx y_inner = NULL_RTX;
++
++ #define CANDIDATE_SUBREG_P(subreg) \
++   (REG_P (SUBREG_REG (subreg)) \
++    && known_eq (GET_MODE_SIZE (GET_MODE (SUBREG_REG (subreg))), \
++		 GET_MODE_SIZE (GET_MODE (subreg))) \
++    && optab_handler (mov_optab, GET_MODE (SUBREG_REG (subreg))) \
++       != CODE_FOR_nothing)
++
++ #define CANDIDATE_MEM_P(innermode, mem) \
++   (!targetm.can_change_mode_class ((innermode), GET_MODE (mem), ALL_REGS) \
++    && !push_operand ((mem), GET_MODE (mem))                              \
++    /* Not a candiate if innermode requires too much alignment.  */       \
++    && (MEM_ALIGN (mem) >= GET_MODE_ALIGNMENT (innermode)                 \
++	|| targetm.slow_unaligned_access (GET_MODE (mem),                  \
++					  MEM_ALIGN (mem))                 \
++	|| !targetm.slow_unaligned_access ((innermode),                    \
++					   MEM_ALIGN (mem))))
++
++  if (SUBREG_P (x) && CANDIDATE_SUBREG_P (x))
++    x_inner = SUBREG_REG (x);
++
++  if (SUBREG_P (y) && CANDIDATE_SUBREG_P (y))
++    y_inner = SUBREG_REG (y);
++
++  if (x_inner != NULL_RTX
++      && y_inner != NULL_RTX
++      && GET_MODE (x_inner) == GET_MODE (y_inner)
++      && !targetm.can_change_mode_class (GET_MODE (x_inner), mode, ALL_REGS))
++    {
++      x = x_inner;
++      y = y_inner;
++      mode = GET_MODE (x_inner);
++    }
++  else if (x_inner != NULL_RTX
++	   && MEM_P (y)
++	   && CANDIDATE_MEM_P (GET_MODE (x_inner), y))
++    {
++      x = x_inner;
++      y = adjust_address (y, GET_MODE (x_inner), 0);
++      mode = GET_MODE (x_inner);
++    }
++  else if (y_inner != NULL_RTX
++	   && MEM_P (x)
++	   && CANDIDATE_MEM_P (GET_MODE (y_inner), x))
++    {
++      x = adjust_address (x, GET_MODE (y_inner), 0);
++      y = y_inner;
++      mode = GET_MODE (y_inner);
++    }
++  else if (x_inner != NULL_RTX
++	   && CONSTANT_P (y)
++	   && !targetm.can_change_mode_class (GET_MODE (x_inner),
++					      mode, ALL_REGS)
++	   && (y_inner = simplify_subreg (GET_MODE (x_inner), y, mode, 0)))
++    {
++      x = x_inner;
++      y = y_inner;
++      mode = GET_MODE (x_inner);
++    }
++
+   if (CONSTANT_P (y))
+     {
+       if (optimize
+diff -Nurp a/gcc/testsuite/gcc.target/aarch64/pr95254.c b/gcc/testsuite/gcc.target/aarch64/pr95254.c
+--- a/gcc/testsuite/gcc.target/aarch64/pr95254.c	1970-01-01 08:00:00.000000000 +0800
++++ b/gcc/testsuite/gcc.target/aarch64/pr95254.c	2020-08-05 20:33:21.424000000 +0800
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-slp-vectorize -march=armv8.2-a+sve -msve-vector-bits=256" } */
++
++typedef short __attribute__((vector_size (8))) v4hi;
++
++typedef union U4HI { v4hi v; short a[4]; } u4hi;
++
++short b[4];
++
++void pass_v4hi (v4hi v)
++{
++    int i;
++    u4hi u;
++    u.v = v;
++    for (i = 0; i < 4; i++)
++      b[i] = u.a[i];
++};
++
++/* { dg-final { scan-assembler-not "ptrue" } } */
+diff -Nurp a/gcc/testsuite/gcc.target/i386/pr67609.c b/gcc/testsuite/gcc.target/i386/pr67609.c
+--- a/gcc/testsuite/gcc.target/i386/pr67609.c	2020-08-05 20:33:04.628000000 +0800
++++ b/gcc/testsuite/gcc.target/i386/pr67609.c	2020-08-05 20:33:21.424000000 +0800
+@@ -1,7 +1,7 @@
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -msse2" } */
+ /* { dg-require-effective-target lp64 } */
+-/* { dg-final { scan-assembler "movdqa" } } */
++/* { dg-final { scan-assembler "movq\t%xmm0" } } */
+ 
+ #include <emmintrin.h>
+ __m128d reg;
diff --git a/skip-debug-insns-when-computing-inline-costs.patch b/skip-debug-insns-when-computing-inline-costs.patch
index 6155590..2f09c27 100644
--- a/skip-debug-insns-when-computing-inline-costs.patch
+++ b/skip-debug-insns-when-computing-inline-costs.patch
@@ -1,3 +1,9 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-PR91176-Skip-debug-insns-when-computing-inline-costs.patch
+d3ed5b56646511a52db9992f4024969bfc9a13f9
+
 diff -uprN a/gcc/ipa-fnsummary.c b/gcc/ipa-fnsummary.c
 --- a/gcc/ipa-fnsummary.c
 +++ b/gcc/ipa-fnsummary.c
diff --git a/tighten-range-for-generating-csel.patch b/tighten-range-for-generating-csel.patch
new file mode 100644
index 0000000..8e628f8
--- /dev/null
+++ b/tighten-range-for-generating-csel.patch
@@ -0,0 +1,132 @@
+This backport contains 1 patch from gcc main stream tree.
+The commit id of these patchs list as following in the order of time.
+
+0001-cselim-Don-t-assume-it-is-safe-to-cstore-replace-a-s.patch
+cf39dccf9284d2fd9f9aa7050760adea110c8d88
+
+diff -uprN a/gcc/testsuite/gcc.c-torture/execute/pr94734.c b/gcc/testsuite/gcc.c-torture/execute/pr94734.c
+new file mode 100644
+--- /dev/null
++++ b/gcc/testsuite/gcc.c-torture/execute/pr94734.c
+@@ -0,0 +1,59 @@
++/* PR tree-optimization/94734 */
++
++__attribute__((noipa)) int
++foo (int n)
++{
++  int arr[16], s = 0;
++  for (int i = 0; i < n; i++)
++    {
++      if (i < 16)
++	arr[i] = i;
++    }
++  for (int i = 0; i < 16; i++)
++    s += arr[i];
++  return s;
++}
++
++__attribute__((noipa)) int
++bar (int n, int x, unsigned long y, unsigned long z)
++{
++  int arr[16], s = 0;
++  arr[4] = 42;
++  for (int i = 0; i < n; i++)
++    {
++      if (x == (i & 0x25))
++	arr[y] = i;
++    }
++  return arr[z];
++}
++
++__attribute__((noipa)) int
++baz (int n, int x, unsigned long z)
++{
++  int arr[16], s = 0;
++  arr[12] = 42;
++  for (int i = 0; i < n; i++)
++    {
++      if (x == (i & 0x25))
++	arr[7] = i;
++    }
++  return arr[z];
++}
++
++int
++main ()
++{
++  if (foo (10374) != 15 * 16 / 2)
++    __builtin_abort ();
++  if (bar (25, 0x25, (unsigned long) 0xdeadbeefbeefdeadULL, 4) != 42)
++    __builtin_abort ();
++  if (bar (25, 4, 15, 15) != 22)
++    __builtin_abort ();
++  if (baz (25, 0x25, 12) != 42)
++    __builtin_abort ();
++  if (baz (25, 4, 7) != 22)
++    __builtin_abort ();
++  if (baz (25, 4, 12) != 42)
++    __builtin_abort ();
++  return 0;
++}
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-1.c
+@@ -9,4 +9,4 @@ unsigned test(unsigned k, unsigned b) {
+         return a[0]+a[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-2.c
+@@ -11,4 +11,4 @@ unsigned test(unsigned k, unsigned b) {
+         return a[0]+a[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-5.c
+@@ -13,4 +13,4 @@ int test(int b, int k) {
+     return a.data[0] + a.data[1];
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+diff -uprN a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+--- a/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/pr89430-6.c
+@@ -16,4 +16,4 @@ int test(int b, int k) {
+     return a.data[0].x + a.data[1].x;
+ }
+ 
+-/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" } } */
++/* { dg-final { scan-tree-dump "Conditional store replacement" "cselim" { xfail *-*-* } } } */
+diff -uprN a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
+--- a/gcc/tree-ssa-phiopt.c
++++ b/gcc/tree-ssa-phiopt.c
+@@ -45,6 +45,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "tree-inline.h"
+ #include "params.h"
+ #include "case-cfn-macros.h"
++#include "tree-eh.h"
+ 
+ static unsigned int tree_ssa_phiopt_worker (bool, bool, bool);
+ static bool two_value_replacement (basic_block, basic_block, edge, gphi *,
+@@ -2237,10 +2238,13 @@ cond_store_replacement (basic_block middle_bb, basic_block join_bb,
+      whose value is not available readily, which we want to avoid.  */
+   if (!nontrap->contains (lhs))
+     {
+-      /* If LHS is a local variable without address-taken, we could
++      /* If LHS is an access to a local variable without address-taken
++	 (or when we allow data races) and known not to trap, we could
+ 	 always safely move down the store.  */
+       tree base = get_base_address (lhs);
+-      if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
++      if (!auto_var_p (base)
++	  || (TREE_ADDRESSABLE (base) && !flag_store_data_races)
++	  || tree_could_trap_p (lhs))
+ 	return false;
+     }
diff --git a/vectorization-enhancement.patch b/vectorization-enhancement.patch
new file mode 100644
index 0000000..3c7f0af
--- /dev/null
+++ b/vectorization-enhancement.patch
@@ -0,0 +1,20239 @@
+This backport contains 128 patchs from gcc main stream tree. 
+The commit id of these patchs list as following in the order of time.
+
+0001-Aarch64-SVE-Dot-product-support.patch
+9feeafd7f95ea9f7211908c137c60074b3a52da2
+
+0002-tree-vect-stmts.c-get_group_load_store_type-Avoid-pe.patch
+419c5f99876d9ee517f6b646dd785cdcaf5cb6fe
+
+0003-re-PR-tree-optimization-90358-526.blender_r-train-ru.patch
+898758504fa87d9f5e72c2c8b32139b413276a10
+
+0004-tree-vect-slp.c-vect_build_slp_tree_2-Bump-size-when.patch
+9f708a844853eb2fe87e696d27de14cbd68896f8
+
+0005-cfgloop.h-struct-loop-Add-simdlen-member.patch
+f63445e56c265757ebd50dc12fcd01773341b49f
+
+0006-Current-vectoriser-doesn-t-support-masked-loads-for-.patch
+997636716c5dde7d59d026726a6f58918069f122
+
+0007-tree-vrp.h-value_range_base-nonzero_p-New.patch
+f2b00d2ba461d6dafdeccf6d93828b349b5e7f76
+
+0008-AArch64-PR-tree-optimization-90332-Implement-vec_ini.patch
+41dab855dce20d5d7042c9330dd8124d0ece19c0
+
+0009-Fix-a-thinko-in-tree-ssa-loop.c.patch
+cc261f66c268107b120add99942d729b3a489452
+
+0010-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
+3fe0ddc88334f9afd622458653a6d103948994bd
+
+0011-re-PR-tree-optimization-90883-Generated-code-is-wors.patch
+08c1638dab9becfafc65064891c1c59f5711c27f
+
+0012-Remove-quite-obvious-dead-assignments.patch
+45309d286c80ecad8b7a4efba0e9aba35d847af6
+
+0013-Fix-various-issues-seen-with-clang-static-analyzer.patch
+ef874db611879d5004e1d834543e55d31f2bfe1c
+
+0014-re-PR-tree-optimization-91033-ICE-in-vect_analyze_lo.patch
+a7b3509eb6aa51d696be5edba6f4e451ceff03a0
+
+0015-re-PR-tree-optimization-91069-Miscompare-of-453.povr.patch
+75da268e1a563a1a52389cd2ecee12d07c45a655
+
+0016-tree-vrp.c-extract_range_from_multiplicative_op-Add-.patch
+e2cfa983c31fa7886f496a47feb8714297ca0063
+
+0017-re-PR-tree-optimization-91257-Compile-time-and-memor.patch
+a55d6091230ae8d0d6f6c20dcc55158f6705090e
+
+0018-re-PR-tree-optimization-91257-Compile-time-and-memor.patch
+ce52e0ffb4f1ea7bd4fb99aea5dda75d260e438f
+
+0019-Enforce-canonicalization-in-value_range.patch:
+c7cf3a9bb00b6d64ba0c0e0761f000758e9428a6
+
+0020-tree-vectorizer.h-get_initial_def_for_reduction-Remo.patch
+5fdd6038147e4ba30c8c01332dae8ab0d717bc14
+
+0021-tree-parloops.c-report_ploop_op-Copy-from-report_vec.patch
+31de92e39bbeffb9f1641d292e94b48f70809ae1
+
+0022-tree-vect-loop.c-vect_is_simple_reduction-Remove-ope.patch
+901083b9bdf69a7b1382f9682c6fd1d5759667dd
+
+0023-Enforce-correct-COND_EXPR-order-for-EXTRACT_LAST_RED.patch
+c449d3ae28ff4e133114fb67dbf7dcc7a95ca5d5
+
+0024-tree-vect-loop.c-vect_is_slp_reduction-Remove.patch
+b3c4d0dd309b7027f6e0f0b9a84829fcd53f7d64
+
+0025-re-PR-tree-optimization-91822-FAIL-gcc.dg-pr88031.c-.patch
+6e222b2a3aede20f3093802d1649e75848e3bd2b
+
+0026-re-PR-target-91269-unaligned-floating-point-register.patch
+d63eadac7db10d4846bdffa93fd164cb035fb102
+
+0027-tree-vect-loop.c-get_initial_def_for_reduction-Simpl.patch
+d469a71e5a0eb512b522248841c56496abca8cd6
+
+0028-tree-vectorizer.h-_stmt_vec_info-const_cond_reduc_co.patch
+a7701dd16103048432ec8051e4773760c0e2cf90
+
+0029-re-PR-tree-optimization-91896-ICE-in-vect_get_vec_de.patch
+fadb01364d36a50836201bc9a6a03e525d267967
+
+0030-tree-vect-loop.c-vect_analyze_loop_operations-Also-c.patch
+9593e8e5e391e77bb065d4689b7511bed6a640a3
+
+0031-tree-vect-loop.c-vect_analyze_loop_operations-Analyz.patch
+1b4dbccc1f828fa00e6acc8b88d24301c65552df
+
+0032-Fix-reduc_index-1-handling-for-COND_REDUCTION-PR9190.patch
+18908a56e18f15f84a91a4529923dd0878b2294f
+
+0033-tree-vectorizer.h-_stmt_vec_info-reduc_fn-New.patch
+29f26978866f32bddd656847441a3a953ffd7a21
+
+0034-gimple.c-gimple_get_lhs-For-PHIs-return-the-result.patch
+61362d9d18916bd5b694385982cf4a02b7537b0e
+
+0035-tree-vect-loop.c-vectorizable_reduction-Move-variabl.patch
+c7ea76ea5629e9f0357de49847274cf80e35f2f8
+
+0036-tree-if-conv.c-tree_if_conversion-Move-call-to-ifcvt.patch
+f30b3d2891cef9803badb3f85d739c0fcfafd585
+
+0037-tree-vectorizer.h-stmt_vec_info_type-cycle_phi_info_.patch
+291fa23ac04e317877c1e102937532f080180bb2
+
+0038-re-PR-tree-optimization-91940-__builtin_bswap16-loop.patch
+9ff9a0a5e6edd8729f559bf86ca06f781c4da246
+
+0039-tree-vectorizer.h-vect_transform_reduction-Declare.patch
+9f4d9a366b3299c276043ab987234c7bed7d29f2
+
+0040-re-PR-target-91982-gcc.target-aarch64-sve-clastb_-.c.patch
+48528394eafa9d1db9f956570f910c76d429a3e5
+
+0041-re-PR-tree-optimization-91532-SVE-Redundant-predicat.patch
+b238b34ea47222ffca7addc5fe4e8c052ade88b3
+
+0042-tree-vectorizer.h-_stmt_vec_info-v_reduc_type-Remove.patch
+69f8c1aef5cdcc54d5cb2ca4f99f4f26c2f822a9
+
+0043-tree-vectorizer.h-_stmt_vec_info-reduc_vectype_in-Ne.patch
+f78347996e02a8a767a525bfb764e769afe29d67
+
+0044-tree-vect-loop.c-vect_is_simple_reduction-Simplify-a.patch
+4a8841c0413d52261a8d024577381582d07a866a
+
+0045-re-PR-tree-optimization-92069-ice-in-vect_analyze_sc.patch
+7bd8bec53f0e43c7a7852c54650746e65324514b
+
+0046-Deal-with-incoming-POLY_INT_CST-ranges-PR92033.patch
+96eb7d7a642085f651e9940f0ee75568d7c4441d
+
+0047-tree-vect-loop.c-vect_valid_reduction_input_p-Remove.patch
+aab8c2fd6542a52663243eec160b80bdd61516d5
+
+0048-tree-vect-loop.c-needs_fold_left_reduction_p-Export.patch
+aa9dffac731d0359a0e7a925ff8f4a1bef182eac
+
+0049-vect-Refactor-versioning-threshold.patch
+a421fe9e610b5dbfce1913cd724c8ba193addd47
+
+0050-vect-Outline-code-into-new-function-determine_peel_f.patch
+31b35fd503e1c6713839db24044812d237aba5f1
+
+0051-vect-Be-consistent-in-versioning-threshold-use.patch
+f261d4808cc28a2dfd47fe06c97364c0869bb78f
+
+0052-tree-vect-loop.c-check_reduction_path-Compute-reduct.patch
+58baf7ab85cbb1068a651c96f7d56e2902ead6cc
+
+0053-tree-vectorizer.h-_stmt_vec_info-cond_reduc_code-Rem.patch
+c11cccc0285f02f117a1e80924fb7673b6486ce9
+
+0054-re-PR-target-86753-gcc.target-aarch64-sve-vcond_-45-.patch
+cc1facefe3b4e3b067d95291a7dba834b830ff18
+
+0055-Avoid-recomputing-data-references-in-BB-SLP.patch
+fa0c8df71d4f0476834db0b7cd88524878b46cf7
+
+0056-Move-code-out-of-vect_slp_analyze_bb_1.patch
+1d778697b37aec23db5b6003dfe08d2d78bd9424
+
+0057-Avoid-setting-current_vector_size-in-get_vec_alignme.patch
+da157e2ee9e12348df78246ee33b244b7cc334df
+
+0058-Pass-a-vec_info-to-vect_supportable_shift.patch
+a5c3185a503fbdbc1bf05efe8ab9d12850a211c1
+
+0059-Pass-a-vec_info-to-vect_supportable_direct_optab_p.patch
+dcab2a0d1d4b2c0b4bba6f5e3834ec0678a2a5c8
+
+0060-Pass-a-vec_info-to-get_mask_type_for_scalar_type.patch
+1bd5196c9b1a0cd7280adadd6d788f81a82ca023
+
+0061-Pass-a-vec_info-to-get_vectype_for_scalar_type.patch
+7ed54790da87bbb4a134020a9fb8bd1b72fd0acb
+
+0062-Pass-a-vec_info-to-duplicate_and_interleave.patch
+cdbe6e9bb4ae2882f77f94993783085fa342a9f9
+
+0063-Pass-a-vec_info-to-can_duplicate_and_interleave_p.patch
+43fdde5738ea0554fa000987e9769add027f4876
+
+0064-Pass-a-vec_info-to-simple_integer_narrowing.patch
+6c261c667801eee46a6221d3681d17493c0bbd65
+
+0065-Pass-a-vec_info-to-supportable_narrowing_operation.patch
+db8374a63fd0ea84f72ac76cc899be44df36df6a
+
+0066-Pass-a-loop_vec_info-to-vect_maybe_permute_loop_mask.patch
+b0dab10e71b03441beefbbf951c0812056413cd3
+
+0067-Pass-a-vec_info-to-vect_halve_mask_nunits.patch
+830e90dab3dee5c8129c7760ff09ab112c2cd271
+
+0068-Pass-a-vec_info-to-vect_double_mask_nunits.patch
+8d1473958808fe4714ec24991ac83ee6cbf45397
+
+0069-Replace-current_vector_size-with-vec_info-vector_siz.patch
+ba7f76dd6bbf038948bbe516764a8bb0c851f750
+
+0070-tree-vectorizer.h-_slp_tree-ops-New-member.patch
+30c0d1e3cf8b03992e08cfd00ccf1fcb638d3c03
+
+0071-re-PR-tree-optimization-92162-ICE-in-vect_create_epi.patch
+53b15ca96116544a7a3ca8bc5f4e1649b74f3d45
+
+0072-Fix-use-after-free-in-vector_size-change.patch
+87121696fb2ddbec5f33daa359234850f7fd306d
+
+0073-re-PR-tree-optimization-92173-ICE-in-optab_for_tree_.patch
+9107d6526b938eba8168025c0d90d06ad3634e69
+
+0074-re-PR-tree-optimization-92173-ICE-in-optab_for_tree_.patch
+6c7b0df8029d01e05577668333660d0bc58a3023
+
+0075-AArch64-Don-t-apply-mode_for_int_vector-to-scalars.patch
+d7814449f229cecdee48afe381519a61ea7e3378
+
+0076-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
+82e8e335f917b9ce40801838c06f7945cf88da43
+
+0077-re-PR-tree-optimization-92205-ICE-in-vect_get_vec_de.patch
+e227594789d909fbad56f6036910938678738f92
+
+0078-tree-vect-slp.c-vect_get_and_check_slp_defs-For-redu.patch
+4352288a3df915575a2b820f702242908740106f
+
+0079-tree-vect-loop.c-vectorizable_reduction-Verify-STMT_.patch
+ea133b14f48ed5730748a7e02e322fb07ccc2d85
+
+0080-Fix-reductions-for-fully-masked-loops.patch
+89d0345ad7b8d84045813972ee60557a6b511c57
+
+0081-tree-vect-loop.c-vect_create_epilog_for_reduction-Us.patch
+e0c4f7fbd6a4ee8e3a1468514044bd941fa28522
+
+0082-re-PR-tree-optimization-92241-ice-in-vect_mark_patte.patch
+97c6bea819ec0a773041308e62a7c05c33f093b0
+
+0083-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
+b7ff7cef5005721e78d6936bed3ae1c059b4e8d2
+
+0084-Fix-reduc_index-calculation-in-vectorizable_conditio.patch
+1d149b7260bcc4c0c6367b3aea47a8b91a1cf345
+
+0085-vect-PR-88915-Vectorize-epilogues-when-versioning-lo.patch
+97c146036750e7cb3966d292572ec158a78f356e
+
+0086-re-PR-tree-optimization-65930-Reduction-with-sign-ch.patch
+b4673569c2a8b974e3f84ffaa547941c5d40cfe5
+
+0087-Come-up-with-an-abstraction.patch
+7f4a8ee03d404c560dcb75ba684fd57ffbc77e85
+
+0088-re-PR-tree-optimization-92275-ICE-error-definition-i.patch
+b81f2dafdbd2c5aa49213b35dc12d4610834e39e
+
+0089-vect-Make-vect-epilogues-nomask-1-default.patch
+1297712fb4af6c6bfd827e0f0a9695b14669f87d
+
+0090-vect-Clean-up-orig_loop_vinfo-from-vect_analyze_loop.patch
+494d6c28c53d0852bb6468b1f1ca189159775fcc
+
+0091-re-PR-tree-optimization-92371-ICE-in-info_for_reduct.patch
+02bf7e6fa219f939b3225c54fbe8bab2133b1aeb
+
+0092-vect-PR92317-fix-skip_epilogue-creation-for-epilogue.patch
+2e7a4f579b1157754ea20a03431b4fa80cd4567a
+
+0093-Restructure-vect_analyze_loop.patch
+72d6aeecd95ec49fff1d258e4631167a03351cbb
+
+0094-Check-the-VF-is-small-enough-for-an-epilogue-loop.patch
+8ec5b16a9a3dbd6d825596c22f1bc32646de28fe
+
+0095-tree-vect-loop.c-vectorizable_reduction-Remember-red.patch
+06af1f1a0def9de076ec629ea634122f15882ce6
+
+0096-Don-t-vectorise-single-iteration-epilogues.patch
+4b205bf82d06c4d9d0ae7b78e54c712d79d5b021
+
+0097-re-PR-tree-optimization-92405-ICE-in-vect_get_vec_de.patch
+084d390246c2172853f9e12ce04aef23cba79590
+
+0098-re-PR-tree-optimization-92324-ICE-in-expand_direct_o.patch
+f1e1ed3314b7c6308f64cbbcf6d1916e239c8e35
+
+0099-vect-Disable-vectorization-of-epilogues-for-loops-wi.patch
+b602712b3ea2a0729a2eda61bd9ee795aba6138f
+
+0100-Use-correct-vector-type-in-neutral_op_for_slp_reduct.patch
+d308ca27c71e43625b378dc6c2774105867d4fa7
+
+0101-vect-Account-for-epilogue-s-peeling-for-gaps-when-ch.patch
+87b47251924c7539a9a8e191587d118a14496473
+
+0102-Add-a-targetm.vectorize.related_mode-hook.patch
+f09552335030433018fd5f7f6b9848339b5ca2da
+
+0103-Replace-mode_for_int_vector-with-related_int_vector_.patch
+d083ee47a9828236016841356fc7207e7c90bbbd
+
+0104-Add-build_truth_vector_type_for_mode.patch
+0a0ef2387cc1561d537d8d949aef9479ef17ba35
+
+0105-Remove-build_-same_sized_-truth_vector_type.patch
+e8738f4e9686203451fd11f05b268b8a31b95ebd
+
+0106-Pass-the-data-vector-mode-to-get_mask_mode.patch
+10116ec1c147a76522cafba6b6a5b4ed1cb37b77
+
+0107-Use-build_vector_type_for_mode-in-get_vectype_for_sc.patch
+95da266b86fcdeff84fcadc5e3cde3d0027e571d
+
+0108-Use-consistent-compatibility-checks-in-vectorizable_.patch
+0203c4f3bfb3e3242635b0cee0b9deedb4070a62
+
+0109-Use-consistent-compatibility-checks-in-vectorizable_.patch
+e021fb865564b62a10adb1e98f75b5ea05058047
+
+0110-Replace-vec_info-vector_size-with-vec_info-vector_mo.patch
+1c84a2d25ecd4c03dde745f36a4762dd45f97c85
+
+0111-Make-less-use-of-get_same_sized_vectype.patch
+2df4150075c03f8a292c40afd3bb25febb673578
+
+0112-Require-equal-type-sizes-for-vectorised-calls.patch
+7f52eb891b738337d5cf82c7c440a5eea8c7b0c9
+
+0113-Support-vectorisation-with-mixed-vector-sizes.patch
+df7c22831f1e48dba49479c5960c1c180d8eab2c
+
+0114-Avoid-retrying-with-the-same-vector-modes.patch
+a55d8232df3dd4f7a3f5b70025074c3919b802a6
+
+0115-AArch64-Support-vectorising-with-multiple-vector-siz.patch
+74166aabeb7f22990476b1169bba031b8323ee92
+
+0116-Allow-mixed-vector-sizes-within-a-single-vectorised-.patch
+05101d1b575a57ca26e4275e971da85a0dd1d52a
+
+0117-Vectorise-conversions-between-differently-sized-inte.patch
+9c437a108a14b9bdc44659c131b0da944e5ffeab
+
+0118-Consider-building-nodes-from-scalars-in-vect_slp_ana.patch
+60838d634634a70d65a126166c944b159ac7649c
+
+0119-Optionally-pick-the-cheapest-loop_vec_info.patch
+bcc7e346bf9b5dc77797ea949d6adc740deb30ca
+
+0120-Move-canonicalisation-of-dr_with_seg_len_pair_ts.patch
+1fb2b0f69ee849142b669ba1b82264ce6d0f75f9
+
+0121-Delay-swapping-data-refs-in-prune_runtime_alias_test.patch
+97602450b04e94aff034381bf6ee4236b95727ed
+
+0122-Add-flags-to-dr_with_seg_len_pair_t.patch
+e9acf80c96d681917d930869b7cbfb7d2fa54d51
+
+0123-Record-whether-a-dr_with_seg_len-contains-mixed-step.patch
+52c29905259363ce2b78dd7aa8a25cf531cddb3a
+
+0124-Dump-the-list-of-merged-alias-pairs.patch
+cad984b289e2b3aca786314c673339eb0500fefa
+
+0125-Print-the-type-of-alias-check-in-a-dump-message.patch
+b4d1b635737a4780e5be247f8be9550eaf83dae5
+
+0126-Use-a-single-comparison-for-index-based-alias-checks.patch
+f9d6338bd15ce1fae36bf25d3a0545e9678ddc58
+
+0127-Optimise-WAR-and-WAW-alias-checks.patch
+8489e1f45b50600c01eb8ed8c5d0ca914ded281c
+
+0128-Avoid-quadratic-behaviour-in-prune_runtime_alias_tes.patch
+ea1ff9e46c7ec5e49ec671616cfcf405ef665054
+
+diff --git a/gcc/asan.c b/gcc/asan.c
+index 3b800b26b69..605d04f87f7 100644
+--- a/gcc/asan.c
++++ b/gcc/asan.c
+@@ -1713,8 +1713,8 @@ asan_emit_allocas_unpoison (rtx top, rtx bot, rtx_insn *before)
+   rtx ret = init_one_libfunc ("__asan_allocas_unpoison");
+   top = convert_memory_address (ptr_mode, top);
+   bot = convert_memory_address (ptr_mode, bot);
+-  ret = emit_library_call_value (ret, NULL_RTX, LCT_NORMAL, ptr_mode,
+-				 top, ptr_mode, bot, ptr_mode);
++  emit_library_call (ret, LCT_NORMAL, ptr_mode,
++		     top, ptr_mode, bot, ptr_mode);
+ 
+   do_pending_stack_adjust ();
+   rtx_insn *insns = get_insns ();
+diff --git a/gcc/bt-load.c b/gcc/bt-load.c
+index a7d9d53954e..f68879ca49a 100644
+--- a/gcc/bt-load.c
++++ b/gcc/bt-load.c
+@@ -1169,7 +1169,6 @@ move_btr_def (basic_block new_def_bb, int btr, btr_def *def, bitmap live_range,
+ 
+   if (def->other_btr_uses_before_def)
+     {
+-      insp = BB_END (b);
+       for (insp = BB_END (b); ! INSN_P (insp); insp = PREV_INSN (insp))
+ 	gcc_assert (insp != BB_HEAD (b));
+ 
+diff --git a/gcc/builtins.c b/gcc/builtins.c
+index ed11f79ff0b..910e614a4d1 100644
+--- a/gcc/builtins.c
++++ b/gcc/builtins.c
+@@ -1653,11 +1653,8 @@ expand_builtin_apply_args_1 (void)
+   /* Save the structure value address unless this is passed as an
+      "invisible" first argument.  */
+   if (struct_incoming_value)
+-    {
+-      emit_move_insn (adjust_address (registers, Pmode, size),
+-		      copy_to_reg (struct_incoming_value));
+-      size += GET_MODE_SIZE (Pmode);
+-    }
++    emit_move_insn (adjust_address (registers, Pmode, size),
++		    copy_to_reg (struct_incoming_value));
+ 
+   /* Return the address of the block.  */
+   return copy_addr_to_reg (XEXP (registers, 0));
+@@ -1806,7 +1803,6 @@ expand_builtin_apply (rtx function, rtx arguments, rtx argsize)
+       emit_move_insn (struct_value, value);
+       if (REG_P (struct_value))
+ 	use_reg (&call_fusage, struct_value);
+-      size += GET_MODE_SIZE (Pmode);
+     }
+ 
+   /* All arguments and registers used for the call are set up by now!  */
+diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
+index c0582a54c93..cb999cbf82f 100644
+--- a/gcc/c/c-typeck.c
++++ b/gcc/c/c-typeck.c
+@@ -5424,7 +5424,7 @@ build_conditional_expr (location_t colon_loc, tree ifexp, bool ifexp_bcp,
+       tree elem_type = TREE_TYPE (vectype);
+       tree zero = build_int_cst (elem_type, 0);
+       tree zero_vec = build_vector_from_val (vectype, zero);
+-      tree cmp_type = build_same_sized_truth_vector_type (vectype);
++      tree cmp_type = truth_type_for (vectype);
+       ifexp = build2 (NE_EXPR, cmp_type, ifexp, zero_vec);
+     }
+ 
+@@ -11327,7 +11327,7 @@ build_vec_cmp (tree_code code, tree type,
+ {
+   tree zero_vec = build_zero_cst (type);
+   tree minus_one_vec = build_minus_one_cst (type);
+-  tree cmp_type = build_same_sized_truth_vector_type (type);
++  tree cmp_type = truth_type_for (type);
+   tree cmp = build2 (code, cmp_type, arg0, arg1);
+   return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
+ }
+diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
+index e252975f546..4ae8e3b3297 100644
+--- a/gcc/cfgexpand.c
++++ b/gcc/cfgexpand.c
+@@ -3029,7 +3029,6 @@ expand_asm_stmt (gasm *stmt)
+ 	      }
+ 	}
+     }
+-  unsigned nclobbers = clobber_rvec.length();
+ 
+   /* First pass over inputs and outputs checks validity and sets
+      mark_addressable if needed.  */
+@@ -3301,7 +3300,7 @@ expand_asm_stmt (gasm *stmt)
+   gcc_assert (constraints.length() == noutputs + ninputs);
+ 
+   /* But it certainly can adjust the clobbers.  */
+-  nclobbers = clobber_rvec.length();
++  unsigned nclobbers = clobber_rvec.length ();
+ 
+   /* Third pass checks for easy conflicts.  */
+   /* ??? Why are we doing this on trees instead of rtx.  */
+@@ -5979,11 +5978,11 @@ construct_init_block (void)
+     {
+       first_block = e->dest;
+       redirect_edge_succ (e, init_block);
+-      e = make_single_succ_edge (init_block, first_block, flags);
++      make_single_succ_edge (init_block, first_block, flags);
+     }
+   else
+-    e = make_single_succ_edge (init_block, EXIT_BLOCK_PTR_FOR_FN (cfun),
+-			       EDGE_FALLTHRU);
++    make_single_succ_edge (init_block, EXIT_BLOCK_PTR_FOR_FN (cfun),
++			   EDGE_FALLTHRU);
+ 
+   update_bb_for_insn (init_block);
+   return init_block;
+diff --git a/gcc/cfghooks.c b/gcc/cfghooks.c
+index a1d603a207e..a18b6490bdd 100644
+--- a/gcc/cfghooks.c
++++ b/gcc/cfghooks.c
+@@ -253,8 +253,6 @@ verify_flow_info (void)
+ 	err = 1;
+       }
+ 
+-  last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+-
+   /* Clean up.  */
+   free (last_visited);
+   free (edge_checksum);
+diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
+index b78d87d22f1..98bf6d2adda 100644
+--- a/gcc/cfgloop.h
++++ b/gcc/cfgloop.h
+@@ -174,6 +174,9 @@ struct GTY ((chain_next ("%h.next"))) loop {
+      of the loop can be safely evaluated concurrently.  */
+   int safelen;
+ 
++  /* Preferred vectorization factor for the loop if non-zero.  */
++  int simdlen;
++
+   /* Constraints are generally set by consumers and affect certain
+      semantics of niter analyzer APIs.  Currently the APIs affected are
+      number_of_iterations_exit* functions and their callers.  One typical
+diff --git a/gcc/cfgloopmanip.c b/gcc/cfgloopmanip.c
+index ea4b914c15b..8fc697ecf5d 100644
+--- a/gcc/cfgloopmanip.c
++++ b/gcc/cfgloopmanip.c
+@@ -364,7 +364,6 @@ remove_path (edge e, bool *irred_invalidated,
+ 
+   for (i = 0; i < nrem; i++)
+     {
+-      bb = rem_bbs[i];
+       FOR_EACH_EDGE (ae, ei, rem_bbs[i]->succs)
+ 	if (ae->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
+ 	    && !bitmap_bit_p (seen, ae->dest->index))
+@@ -1016,6 +1015,7 @@ copy_loop_info (struct loop *loop, struct loop *target)
+   target->nb_iterations_estimate = loop->nb_iterations_estimate;
+   target->estimate_state = loop->estimate_state;
+   target->safelen = loop->safelen;
++  target->simdlen = loop->simdlen;
+   target->constraints = loop->constraints;
+   target->can_be_parallel = loop->can_be_parallel;
+   target->warned_aggressive_loop_optimizations
+diff --git a/gcc/cfgrtl.c b/gcc/cfgrtl.c
+index 08e534f2485..b5f15907bde 100644
+--- a/gcc/cfgrtl.c
++++ b/gcc/cfgrtl.c
+@@ -2958,7 +2958,6 @@ rtl_verify_bb_layout (void)
+   basic_block last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun), curr_bb = NULL;
+ 
+   num_bb_notes = 0;
+-  last_bb_seen = ENTRY_BLOCK_PTR_FOR_FN (cfun);
+ 
+   for (x = rtx_first; x; x = NEXT_INSN (x))
+     {
+diff --git a/gcc/cgraph.c b/gcc/cgraph.c
+index a16f4668b3c..bed6838d22b 100644
+--- a/gcc/cgraph.c
++++ b/gcc/cgraph.c
+@@ -2717,8 +2717,6 @@ bool
+ cgraph_node::set_pure_flag (bool pure, bool looping)
+ {
+   struct set_pure_flag_info info = {pure, looping, false};
+-  if (!pure)
+-    looping = false;
+   call_for_symbol_thunks_and_aliases (set_pure_flag_1, &info, !pure, true);
+   return info.changed;
+ }
+diff --git a/gcc/combine.c b/gcc/combine.c
+index 567aa2c3715..b9d674c96cc 100644
+--- a/gcc/combine.c
++++ b/gcc/combine.c
+@@ -6591,7 +6591,6 @@ simplify_if_then_else (rtx x)
+ 	  || reg_mentioned_p (true_rtx, false_rtx)
+ 	  || rtx_equal_p (false_rtx, XEXP (cond, 0))))
+     {
+-      true_code = reversed_comparison_code (cond, NULL);
+       SUBST (XEXP (x, 0), reversed_comparison (cond, GET_MODE (cond)));
+       SUBST (XEXP (x, 1), false_rtx);
+       SUBST (XEXP (x, 2), true_rtx);
+diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
+index e3852c5d182..28f93a70801 100644
+--- a/gcc/config/aarch64/aarch64-simd.md
++++ b/gcc/config/aarch64/aarch64-simd.md
+@@ -3183,7 +3183,7 @@
+ ;; In this insn, operand 1 should be low, and operand 2 the high part of the
+ ;; dest vector.
+ 
+-(define_insn "*aarch64_combinez<mode>"
++(define_insn "@aarch64_combinez<mode>"
+   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
+ 	(vec_concat:<VDBL>
+ 	  (match_operand:VDC 1 "general_operand" "w,?r,m")
+@@ -3197,7 +3197,7 @@
+    (set_attr "arch" "simd,fp,simd")]
+ )
+ 
+-(define_insn "*aarch64_combinez_be<mode>"
++(define_insn "@aarch64_combinez_be<mode>"
+   [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
+         (vec_concat:<VDBL>
+ 	  (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
+@@ -5926,6 +5926,15 @@
+   DONE;
+ })
+ 
++(define_expand "vec_init<mode><Vhalf>"
++  [(match_operand:VQ_NO2E 0 "register_operand" "")
++   (match_operand 1 "" "")]
++  "TARGET_SIMD"
++{
++  aarch64_expand_vector_init (operands[0], operands[1]);
++  DONE;
++})
++
+ (define_insn "*aarch64_simd_ld1r<mode>"
+   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
+ 	(vec_duplicate:VALL_F16
+@@ -6937,3 +6946,21 @@
+   "pmull2\\t%0.1q, %1.2d, %2.2d"
+   [(set_attr "type" "crypto_pmull")]
+ )
++
++;; Sign- or zero-extend a 64-bit integer vector to a 128-bit vector.
++(define_insn "<optab><Vnarrowq><mode>2"
++  [(set (match_operand:VQN 0 "register_operand" "=w")
++	(ANY_EXTEND:VQN (match_operand:<VNARROWQ> 1 "register_operand" "w")))]
++  "TARGET_SIMD"
++  "<su>xtl\t%0.<Vtype>, %1.<Vntype>"
++  [(set_attr "type" "neon_shift_imm_long")]
++)
++
++;; Truncate a 128-bit integer vector to a 64-bit vector.
++(define_insn "trunc<mode><Vnarrowq>2"
++  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
++	(truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
++  "TARGET_SIMD"
++  "xtn\t%0.<Vntype>, %1.<Vtype>"
++  [(set_attr "type" "neon_shift_imm_narrow_q")]
++)
+diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
+index 3f39c4c5b63..02d33b7276f 100644
+--- a/gcc/config/aarch64/aarch64-sve.md
++++ b/gcc/config/aarch64/aarch64-sve.md
+@@ -3132,3 +3132,19 @@
+     DONE;
+   }
+ )
++
++;; Unpredicated DOT product.
++(define_insn "<sur>dot_prod<vsi2qi>"
++  [(set (match_operand:SVE_SDI 0 "register_operand" "=w, ?&w")
++	(plus:SVE_SDI
++	  (unspec:SVE_SDI
++	    [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
++	     (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
++	    DOTPROD)
++	  (match_operand:SVE_SDI 3 "register_operand" "0, w")))]
++  "TARGET_SVE"
++  "@
++   <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
++   movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
++  [(set_attr "movprfx" "*,yes")]
++)
+diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
+index 2ff0bc0a686..128c250dffe 100644
+--- a/gcc/config/aarch64/aarch64.c
++++ b/gcc/config/aarch64/aarch64.c
+@@ -1549,17 +1549,37 @@ aarch64_sve_pred_mode (unsigned int elem_nbytes)
+ /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
+ 
+ static opt_machine_mode
+-aarch64_get_mask_mode (poly_uint64 nunits, poly_uint64 nbytes)
++aarch64_get_mask_mode (machine_mode mode)
+ {
+-  if (TARGET_SVE && known_eq (nbytes, BYTES_PER_SVE_VECTOR))
++  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
++  if (vec_flags & VEC_SVE_DATA)
++    return aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (mode));
++
++  return default_get_mask_mode (mode);
++}
++
++/* Implement TARGET_VECTORIZE_RELATED_MODE.  */
++
++static opt_machine_mode
++aarch64_vectorize_related_mode (machine_mode vector_mode,
++				scalar_mode element_mode,
++				poly_uint64 nunits)
++{
++  unsigned int vec_flags = aarch64_classify_vector_mode (vector_mode);
++
++  /* Prefer to use 1 128-bit vector instead of 2 64-bit vectors.  */
++  if ((vec_flags & VEC_ADVSIMD)
++      && known_eq (nunits, 0U)
++      && known_eq (GET_MODE_BITSIZE (vector_mode), 64U)
++      && maybe_ge (GET_MODE_BITSIZE (element_mode)
++		   * GET_MODE_NUNITS (vector_mode), 128U))
+     {
+-      unsigned int elem_nbytes = vector_element_size (nbytes, nunits);
+-      machine_mode pred_mode;
+-      if (aarch64_sve_pred_mode (elem_nbytes).exists (&pred_mode))
+-	return pred_mode;
++      machine_mode res = aarch64_simd_container_mode (element_mode, 128);
++      if (VECTOR_MODE_P (res))
++	return res;
+     }
+ 
+-  return default_get_mask_mode (nunits, nbytes);
++  return default_vectorize_related_mode (vector_mode, element_mode, nunits);
+ }
+ 
+ /* Implement TARGET_PREFERRED_ELSE_VALUE.  For binary operations,
+@@ -10897,7 +10917,9 @@ aarch64_emit_approx_sqrt (rtx dst, rtx src, bool recp)
+     /* Caller assumes we cannot fail.  */
+     gcc_assert (use_rsqrt_p (mode));
+ 
+-  machine_mode mmsk = mode_for_int_vector (mode).require ();
++  machine_mode mmsk = (VECTOR_MODE_P (mode)
++		       ? related_int_vector_mode (mode).require ()
++		       : int_mode_for_mode (mode).require ());
+   rtx xmsk = gen_reg_rtx (mmsk);
+   if (!recp)
+     /* When calculating the approximate square root, compare the
+@@ -14226,13 +14248,34 @@ aarch64_preferred_simd_mode (scalar_mode mode)
+ 
+ /* Return a list of possible vector sizes for the vectorizer
+    to iterate over.  */
+-static void
+-aarch64_autovectorize_vector_sizes (vector_sizes *sizes)
++static unsigned int
++aarch64_autovectorize_vector_modes (vector_modes *modes, bool)
+ {
+   if (TARGET_SVE)
+-    sizes->safe_push (BYTES_PER_SVE_VECTOR);
+-  sizes->safe_push (16);
+-  sizes->safe_push (8);
++    modes->safe_push (VNx16QImode);
++
++  /* Try using 128-bit vectors for all element types.  */
++  modes->safe_push (V16QImode);
++
++  /* Try using 64-bit vectors for 8-bit elements and 128-bit vectors
++     for wider elements.  */
++  modes->safe_push (V8QImode);
++
++  /* Try using 64-bit vectors for 16-bit elements and 128-bit vectors
++     for wider elements.
++
++     TODO: We could support a limited form of V4QImode too, so that
++     we use 32-bit vectors for 8-bit elements.  */
++  modes->safe_push (V4HImode);
++
++  /* Try using 64-bit vectors for 32-bit elements and 128-bit vectors
++     for 64-bit elements.
++
++     TODO: We could similarly support limited forms of V2QImode and V2HImode
++     for this case.  */
++  modes->safe_push (V2SImode);
++
++  return 0;
+ }
+ 
+ /* Implement TARGET_MANGLE_TYPE.  */
+@@ -15191,6 +15234,45 @@ aarch64_expand_vector_init (rtx target, rtx vals)
+   rtx v0 = XVECEXP (vals, 0, 0);
+   bool all_same = true;
+ 
++  /* This is a special vec_init<M><N> where N is not an element mode but a
++     vector mode with half the elements of M.  We expect to find two entries
++     of mode N in VALS and we must put their concatentation into TARGET.  */
++  if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0))))
++    {
++      gcc_assert (known_eq (GET_MODE_SIZE (mode),
++		  2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0)))));
++      rtx lo = XVECEXP (vals, 0, 0);
++      rtx hi = XVECEXP (vals, 0, 1);
++      machine_mode narrow_mode = GET_MODE (lo);
++      gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode);
++      gcc_assert (narrow_mode == GET_MODE (hi));
++
++      /* When we want to concatenate a half-width vector with zeroes we can
++	 use the aarch64_combinez[_be] patterns.  Just make sure that the
++	 zeroes are in the right half.  */
++      if (BYTES_BIG_ENDIAN
++	  && aarch64_simd_imm_zero (lo, narrow_mode)
++	  && general_operand (hi, narrow_mode))
++	emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo));
++      else if (!BYTES_BIG_ENDIAN
++	       && aarch64_simd_imm_zero (hi, narrow_mode)
++	       && general_operand (lo, narrow_mode))
++	emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi));
++      else
++	{
++	  /* Else create the two half-width registers and combine them.  */
++	  if (!REG_P (lo))
++	    lo = force_reg (GET_MODE (lo), lo);
++	  if (!REG_P (hi))
++	    hi = force_reg (GET_MODE (hi), hi);
++
++	  if (BYTES_BIG_ENDIAN)
++	    std::swap (lo, hi);
++	  emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi));
++	}
++     return;
++   }
++
+   /* Count the number of variable elements to initialise.  */
+   for (int i = 0; i < n_elts; ++i)
+     {
+@@ -16684,7 +16766,7 @@ aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
+   if (d->testing_p)
+     return true;
+ 
+-  machine_mode sel_mode = mode_for_int_vector (d->vmode).require ();
++  machine_mode sel_mode = related_int_vector_mode (d->vmode).require ();
+   rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+   if (d->one_vector_p)
+     emit_unspec2 (d->target, UNSPEC_TBL, d->op0, force_reg (sel_mode, sel));
+@@ -17064,9 +17146,7 @@ void
+ aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
+ 			  rtx *ops)
+ {
+-  machine_mode pred_mode
+-    = aarch64_get_mask_mode (GET_MODE_NUNITS (cmp_mode),
+-			     GET_MODE_SIZE (cmp_mode)).require ();
++  machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
+   rtx pred = gen_reg_rtx (pred_mode);
+   if (FLOAT_MODE_P (cmp_mode))
+     {
+@@ -19363,9 +19443,9 @@ aarch64_libgcc_floating_mode_supported_p
+ #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
+   aarch64_builtin_vectorized_function
+ 
+-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+-  aarch64_autovectorize_vector_sizes
++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
++  aarch64_autovectorize_vector_modes
+ 
+ #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+ #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV \
+@@ -19398,6 +19478,8 @@ aarch64_libgcc_floating_mode_supported_p
+ #define TARGET_VECTORIZE_VEC_PERM_CONST \
+   aarch64_vectorize_vec_perm_const
+ 
++#undef TARGET_VECTORIZE_RELATED_MODE
++#define TARGET_VECTORIZE_RELATED_MODE aarch64_vectorize_related_mode
+ #undef TARGET_VECTORIZE_GET_MASK_MODE
+ #define TARGET_VECTORIZE_GET_MASK_MODE aarch64_get_mask_mode
+ #undef TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE
+diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
+index 6caeeac8086..c7ccd5bf6fe 100644
+--- a/gcc/config/aarch64/iterators.md
++++ b/gcc/config/aarch64/iterators.md
+@@ -663,6 +663,9 @@
+ 			  (QI "b")   (HI "h")
+ 			  (SI "s")   (DI "d")])
+ 
++;; Like Vetype, but map to types that are a quarter of the element size.
++(define_mode_attr Vetype_fourth [(VNx4SI "b") (VNx2DI "h")])
++
+ ;; Equivalent of "size" for a vector element.
+ (define_mode_attr Vesize [(VNx16QI "b")
+ 			  (VNx8HI  "h") (VNx8HF  "h")
+@@ -765,6 +768,7 @@
+ ;; Half modes of all vector modes, in lower-case.
+ (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
+ 			 (V4HI "v2hi")  (V8HI  "v4hi")
++			 (V8HF  "v4hf")
+ 			 (V2SI "si")    (V4SI  "v2si")
+ 			 (V2DI "di")    (V2SF  "sf")
+ 			 (V4SF "v2sf")  (V2DF  "df")])
+@@ -800,6 +804,8 @@
+ 			    (V2DI "V2SI")
+ 			    (DI	  "SI")	  (SI	"HI")
+ 			    (HI	  "QI")])
++(define_mode_attr Vnarrowq [(V8HI "v8qi") (V4SI "v4hi")
++			    (V2DI "v2si")])
+ 
+ ;; Narrowed quad-modes for VQN (Used for XTN2).
+ (define_mode_attr VNARROWQ2 [(V8HI "V16QI") (V4SI "V8HI")
+@@ -1029,8 +1035,10 @@
+ 		      (V2SF "p") (V4SF  "v")
+ 		      (V4HF "v") (V8HF  "v")])
+ 
+-(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")])
+-(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")])
++(define_mode_attr vsi2qi [(V2SI "v8qi") (V4SI "v16qi")
++			  (VNx4SI "vnx16qi") (VNx2DI "vnx8hi")])
++(define_mode_attr VSI2QI [(V2SI "V8QI") (V4SI "V16QI")
++			  (VNx4SI "VNx16QI") (VNx2DI "VNx8HI")])
+ 
+ 
+ ;; Register suffix for DOTPROD input types from the return type.
+diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
+index f7ff95a0edf..325dd3cea9a 100644
+--- a/gcc/config/arc/arc.c
++++ b/gcc/config/arc/arc.c
+@@ -477,16 +477,17 @@ arc_preferred_simd_mode (scalar_mode mode)
+ }
+ 
+ /* Implements target hook
+-   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
++   TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
+ 
+-static void
+-arc_autovectorize_vector_sizes (vector_sizes *sizes)
++static unsigned int
++arc_autovectorize_vector_modes (vector_modes *modes, bool)
+ {
+   if (TARGET_PLUS_QMACW)
+     {
+-      sizes->quick_push (8);
+-      sizes->quick_push (4);
++      modes->quick_push (V4HImode);
++      modes->quick_push (V2HImode);
+     }
++  return 0;
+ }
+ 
+ 
+@@ -596,8 +597,8 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
+ #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+ #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arc_preferred_simd_mode
+ 
+-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES arc_autovectorize_vector_sizes
++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES arc_autovectorize_vector_modes
+ 
+ #undef TARGET_CAN_USE_DOLOOP_P
+ #define TARGET_CAN_USE_DOLOOP_P arc_can_use_doloop_p
+diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
+index cdfc0f9e72f..1a4a4b7bc58 100644
+--- a/gcc/config/arm/arm.c
++++ b/gcc/config/arm/arm.c
+@@ -288,7 +288,7 @@ static bool arm_builtin_support_vector_misalignment (machine_mode mode,
+ static void arm_conditional_register_usage (void);
+ static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
+ static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
+-static void arm_autovectorize_vector_sizes (vector_sizes *);
++static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
+ static int arm_default_branch_cost (bool, bool);
+ static int arm_cortex_a5_branch_cost (bool, bool);
+ static int arm_cortex_m_branch_cost (bool, bool);
+@@ -519,9 +519,9 @@ static const struct attribute_spec arm_attribute_table[] =
+ #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
+ #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+ #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
+-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+-  arm_autovectorize_vector_sizes
++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
++  arm_autovectorize_vector_modes
+ 
+ #undef  TARGET_MACHINE_DEPENDENT_REORG
+ #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
+@@ -28446,14 +28446,15 @@ arm_vector_alignment (const_tree type)
+   return align;
+ }
+ 
+-static void
+-arm_autovectorize_vector_sizes (vector_sizes *sizes)
++static unsigned int
++arm_autovectorize_vector_modes (vector_modes *modes, bool)
+ {
+   if (!TARGET_NEON_VECTORIZE_DOUBLE)
+     {
+-      sizes->safe_push (16);
+-      sizes->safe_push (8);
++      modes->safe_push (V16QImode);
++      modes->safe_push (V8QImode);
+     }
++  return 0;
+ }
+ 
+ static bool
+diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
+index 99fa45edcd4..eb06ff9e05b 100644
+--- a/gcc/config/gcn/gcn.c
++++ b/gcc/config/gcn/gcn.c
+@@ -3800,8 +3800,7 @@ gcn_expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
+    a vector.  */
+ 
+ opt_machine_mode
+-gcn_vectorize_get_mask_mode (poly_uint64 ARG_UNUSED (nunits),
+-			     poly_uint64 ARG_UNUSED (length))
++gcn_vectorize_get_mask_mode (machine_mode)
+ {
+   /* GCN uses a DImode bit-mask.  */
+   return DImode;
+diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
+index 1bca5a7eea6..5a0f8a0eb72 100644
+--- a/gcc/config/i386/i386.c
++++ b/gcc/config/i386/i386.c
+@@ -9647,7 +9647,6 @@ ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
+   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+   CUMULATIVE_ARGS next_cum;
+   tree fntype;
+-  int max;
+ 
+   gcc_assert (!no_rtl);
+ 
+@@ -9663,10 +9662,6 @@ ix86_setup_incoming_vararg_bounds (cumulative_args_t cum_v,
+   if (stdarg_p (fntype))
+     ix86_function_arg_advance (pack_cumulative_args (&next_cum), mode, type,
+ 			       true);
+-
+-  max = cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
+-  if (max > X86_64_REGPARM_MAX)
+-    max = X86_64_REGPARM_MAX;
+ }
+ 
+ 
+@@ -11806,7 +11801,6 @@ choose_basereg (HOST_WIDE_INT cfa_offset, rtx &base_reg,
+ 	    {
+ 	      base_reg = hard_frame_pointer_rtx;
+ 	      base_offset = toffset;
+-	      len = tlen;
+ 	    }
+ 	}
+     }
+@@ -39699,12 +39693,10 @@ ix86_preferred_reload_class (rtx x, reg_class_t regclass)
+ static reg_class_t
+ ix86_preferred_output_reload_class (rtx x, reg_class_t regclass)
+ {
+-  machine_mode mode = GET_MODE (x);
+-
+   /* Restrict the output reload class to the register bank that we are doing
+      math on.  If we would like not to return a subset of CLASS, reject this
+      alternative: if reload cannot do this, it will still use its choice.  */
+-  mode = GET_MODE (x);
++  machine_mode mode = GET_MODE (x);
+   if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
+     return MAYBE_SSE_CLASS_P (regclass) ? ALL_SSE_REGS : NO_REGS;
+ 
+@@ -45666,14 +45658,13 @@ ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
+ 			       0, OPTAB_DIRECT);
+ 
+   /* Compensate.  */
+-  tmp = gen_reg_rtx (mode);
+   /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
+   tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
+-  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
++  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one)));
+   xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+   /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
+   tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
+-  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, one, tmp)));
++  emit_insn (gen_rtx_SET (tmp, gen_rtx_AND (mode, tmp, one)));
+   xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
+ 
+   /* res = copysign (xa2, operand1) */
+@@ -50238,27 +50229,42 @@ ix86_split_reduction (machine_mode mode)
+    vectors.  If AVX512F is enabled then try vectorizing with 512bit,
+    256bit and 128bit vectors.  */
+ 
+-static void
+-ix86_autovectorize_vector_sizes (vector_sizes *sizes)
++static unsigned int
++ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
+ {
+   if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+     {
+-      sizes->safe_push (64);
+-      sizes->safe_push (32);
+-      sizes->safe_push (16);
++      modes->safe_push (V64QImode);
++      modes->safe_push (V32QImode);
++      modes->safe_push (V16QImode);
++    }
++  else if (TARGET_AVX512F && all)
++    {
++      modes->safe_push (V32QImode);
++      modes->safe_push (V16QImode);
++      modes->safe_push (V64QImode);
+     }
+   else if (TARGET_AVX && !TARGET_PREFER_AVX128)
+     {
+-      sizes->safe_push (32);
+-      sizes->safe_push (16);
++      modes->safe_push (V32QImode);
++      modes->safe_push (V16QImode);
++    }
++  else if (TARGET_AVX && all)
++    {
++      modes->safe_push (V16QImode);
++      modes->safe_push (V32QImode);
+     }
++
++  return 0;
+ }
+ 
+ /* Implemenation of targetm.vectorize.get_mask_mode.  */
+ 
+ static opt_machine_mode
+-ix86_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
++ix86_get_mask_mode (machine_mode data_mode)
+ {
++  unsigned vector_size = GET_MODE_SIZE (data_mode);
++  unsigned nunits = GET_MODE_NUNITS (data_mode);
+   unsigned elem_size = vector_size / nunits;
+ 
+   /* Scalar mask case.  */
+@@ -51849,9 +51855,9 @@ ix86_run_selftests (void)
+ #undef TARGET_VECTORIZE_SPLIT_REDUCTION
+ #define TARGET_VECTORIZE_SPLIT_REDUCTION \
+   ix86_split_reduction
+-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+-  ix86_autovectorize_vector_sizes
++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
++  ix86_autovectorize_vector_modes
+ #undef TARGET_VECTORIZE_GET_MASK_MODE
+ #define TARGET_VECTORIZE_GET_MASK_MODE ix86_get_mask_mode
+ #undef TARGET_VECTORIZE_INIT_COST
+diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
+index 18cc39ae521..8c961f12a42 100644
+--- a/gcc/config/i386/sse.md
++++ b/gcc/config/i386/sse.md
+@@ -16441,10 +16441,9 @@
+ 	(unspec:VF_128_256
+ 	  [(match_operand:VF_128_256 1 "register_operand" "0,0,x")
+ 	   (match_operand:VF_128_256 2 "vector_operand" "YrBm,*xBm,xm")
+-	   (subreg:VF_128_256
+-	     (lt:<sseintvecmode>
+-	       (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
+-	       (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C")) 0)]
++	   (lt:VF_128_256
++	     (match_operand:<sseintvecmode> 3 "register_operand" "Yz,Yz,x")
++	     (match_operand:<sseintvecmode> 4 "const0_operand" "C,C,C"))]
+ 	  UNSPEC_BLENDV))]
+   "TARGET_SSE4_1"
+   "#"
+diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
+index d758fbf1be6..1008947209e 100644
+--- a/gcc/config/mips/mips.c
++++ b/gcc/config/mips/mips.c
+@@ -13457,13 +13457,14 @@ mips_preferred_simd_mode (scalar_mode mode)
+   return word_mode;
+ }
+ 
+-/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES.  */
++/* Implement TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES.  */
+ 
+-static void
+-mips_autovectorize_vector_sizes (vector_sizes *sizes)
++static unsigned int
++mips_autovectorize_vector_modes (vector_modes *modes, bool)
+ {
+   if (ISA_HAS_MSA)
+-    sizes->safe_push (16);
++    modes->safe_push (V16QImode);
++  return 0;
+ }
+ 
+ /* Implement TARGET_INIT_LIBFUNCS.  */
+@@ -22676,9 +22677,9 @@ mips_starting_frame_offset (void)
+ 
+ #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
+ #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE mips_preferred_simd_mode
+-#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
+-#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
+-  mips_autovectorize_vector_sizes
++#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
++#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
++  mips_autovectorize_vector_modes
+ 
+ #undef TARGET_INIT_BUILTINS
+ #define TARGET_INIT_BUILTINS mips_init_builtins
+diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
+index 87d60078bb0..8f046de424c 100644
+--- a/gcc/config/rs6000/rs6000.c
++++ b/gcc/config/rs6000/rs6000.c
+@@ -15457,7 +15457,7 @@ static tree
+ fold_build_vec_cmp (tree_code code, tree type,
+ 		    tree arg0, tree arg1)
+ {
+-  tree cmp_type = build_same_sized_truth_vector_type (type);
++  tree cmp_type = truth_type_for (type);
+   tree zero_vec = build_zero_cst (type);
+   tree minus_one_vec = build_minus_one_cst (type);
+   tree cmp = fold_build2 (code, cmp_type, arg0, arg1);
+diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
+index db3f94978ec..c35666dec83 100644
+--- a/gcc/config/s390/s390.c
++++ b/gcc/config/s390/s390.c
+@@ -6588,7 +6588,7 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
+ 	case LE:   cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
+ 	default: gcc_unreachable ();
+ 	}
+-      scratch_mode = mode_for_int_vector (GET_MODE (cmp1)).require ();
++      scratch_mode = related_int_vector_mode (GET_MODE (cmp1)).require ();
+ 
+       if (inv_p)
+ 	all_p = !all_p;
+@@ -6694,7 +6694,7 @@ s390_expand_vcond (rtx target, rtx then, rtx els,
+ 
+   /* We always use an integral type vector to hold the comparison
+      result.  */
+-  result_mode = mode_for_int_vector (cmp_mode).require ();
++  result_mode = related_int_vector_mode (cmp_mode).require ();
+   result_target = gen_reg_rtx (result_mode);
+ 
+   /* We allow vector immediates as comparison operands that
+diff --git a/gcc/cp/call.c b/gcc/cp/call.c
+index f365a5a7f7b..23a54f3c332 100644
+--- a/gcc/cp/call.c
++++ b/gcc/cp/call.c
+@@ -5161,7 +5161,7 @@ build_conditional_expr_1 (const op_location_t &loc,
+ 
+       if (!COMPARISON_CLASS_P (arg1))
+ 	{
+-	  tree cmp_type = build_same_sized_truth_vector_type (arg1_type);
++	  tree cmp_type = truth_type_for (arg1_type);
+ 	  arg1 = build2 (NE_EXPR, cmp_type, arg1, build_zero_cst (arg1_type));
+ 	}
+       return build3_loc (loc, VEC_COND_EXPR, arg2_type, arg1, arg2, arg3);
+diff --git a/gcc/cp/class.c b/gcc/cp/class.c
+index 6b57184e081..5b0a60d61cc 100644
+--- a/gcc/cp/class.c
++++ b/gcc/cp/class.c
+@@ -4760,8 +4760,6 @@ adjust_clone_args (tree decl)
+       tree orig_decl_parms = TYPE_ARG_TYPES (TREE_TYPE (decl));
+       tree decl_parms, clone_parms;
+ 
+-      clone_parms = orig_clone_parms;
+-
+       /* Skip the 'this' parameter.  */
+       orig_clone_parms = TREE_CHAIN (orig_clone_parms);
+       orig_decl_parms = TREE_CHAIN (orig_decl_parms);
+@@ -8581,7 +8579,6 @@ dump_class_hierarchy_r (FILE *stream,
+   tree base_binfo;
+   int i;
+ 
+-  indented = maybe_indent_hierarchy (stream, indent, 0);
+   fprintf (stream, "%s (0x" HOST_WIDE_INT_PRINT_HEX ") ",
+ 	   type_as_string (BINFO_TYPE (binfo), TFF_PLAIN_IDENTIFIER),
+ 	   (HOST_WIDE_INT) (uintptr_t) binfo);
+@@ -8602,7 +8599,6 @@ dump_class_hierarchy_r (FILE *stream,
+     fprintf (stream, " virtual");
+   fprintf (stream, "\n");
+ 
+-  indented = 0;
+   if (BINFO_PRIMARY_P (binfo))
+     {
+       indented = maybe_indent_hierarchy (stream, indent + 3, indented);
+diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
+index 39d55589ef3..5c82c2272c2 100644
+--- a/gcc/cp/decl.c
++++ b/gcc/cp/decl.c
+@@ -6387,7 +6387,7 @@ build_aggr_init_full_exprs (tree decl, tree init, int flags)
+ static tree
+ check_initializer (tree decl, tree init, int flags, vec<tree, va_gc> **cleanups)
+ {
+-  tree type = TREE_TYPE (decl);
++  tree type;
+   tree init_code = NULL;
+   tree core_type;
+ 
+diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
+index e1c02d7b718..60fe58e0313 100644
+--- a/gcc/cp/parser.c
++++ b/gcc/cp/parser.c
+@@ -10485,7 +10485,7 @@ cp_parser_lambda_expression (cp_parser* parser)
+     if (ok)
+       maybe_add_lambda_conv_op (type);
+ 
+-    type = finish_struct (type, /*attributes=*/NULL_TREE);
++    finish_struct (type, /*attributes=*/NULL_TREE);
+ 
+     in_discarded_stmt = discarded;
+ 
+diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
+index 4787747b6ff..ff7921533cb 100644
+--- a/gcc/cp/pt.c
++++ b/gcc/cp/pt.c
+@@ -7459,8 +7459,7 @@ unify_bound_ttp_args (tree tparms, tree targs, tree parm, tree& arg,
+     {
+       /* In keeping with P0522R0, adjust P's template arguments
+ 	 to apply to A's template; then flatten it again.  */
+-      tree nparmvec = parmvec;
+-      nparmvec = coerce_ttp_args_for_tta (arg, parmvec, tf_none);
++      tree nparmvec = coerce_ttp_args_for_tta (arg, parmvec, tf_none);
+       nparmvec = expand_template_argument_pack (nparmvec);
+ 
+       if (unify (tparms, targs, nparmvec, argvec,
+@@ -7887,7 +7886,6 @@ convert_template_argument (tree parm,
+ 	 invalid, but static members are OK.  In any
+ 	 case, grab the underlying fields/functions
+ 	 and issue an error later if required.  */
+-      orig_arg = TREE_VALUE (arg);
+       TREE_TYPE (arg) = unknown_type_node;
+     }
+ 
+diff --git a/gcc/cp/rtti.c b/gcc/cp/rtti.c
+index 3ca2b5e7b88..9aea6b939ec 100644
+--- a/gcc/cp/rtti.c
++++ b/gcc/cp/rtti.c
+@@ -209,8 +209,8 @@ build_headof (tree exp)
+   offset = build_vtbl_ref (cp_build_fold_indirect_ref (exp),
+                            index);
+ 
+-  type = cp_build_qualified_type (ptr_type_node,
+-				  cp_type_quals (TREE_TYPE (exp)));
++  cp_build_qualified_type (ptr_type_node,
++			   cp_type_quals (TREE_TYPE (exp)));
+   return fold_build_pointer_plus (exp, offset);
+ }
+ 
+diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
+index 2169f8c4efd..c42fd731cd2 100644
+--- a/gcc/cp/typeck.c
++++ b/gcc/cp/typeck.c
+@@ -4293,7 +4293,7 @@ build_vec_cmp (tree_code code, tree type,
+ {
+   tree zero_vec = build_zero_cst (type);
+   tree minus_one_vec = build_minus_one_cst (type);
+-  tree cmp_type = build_same_sized_truth_vector_type(type);
++  tree cmp_type = truth_type_for (type);
+   tree cmp = build2 (code, cmp_type, arg0, arg1);
+   return build3 (VEC_COND_EXPR, type, cmp, minus_one_vec, zero_vec);
+ }
+@@ -9189,8 +9189,6 @@ convert_for_initialization (tree exp, tree type, tree rhs, int flags,
+   if (exp == error_mark_node)
+     return error_mark_node;
+ 
+-  rhstype = non_reference (rhstype);
+-
+   type = complete_type (type);
+ 
+   if (DIRECT_INIT_EXPR_P (type, rhs))
+diff --git a/gcc/cselib.c b/gcc/cselib.c
+index 84c17c23f6d..108b2588cf9 100644
+--- a/gcc/cselib.c
++++ b/gcc/cselib.c
+@@ -2518,13 +2518,12 @@ cselib_record_sets (rtx_insn *insn)
+   int n_sets = 0;
+   int i;
+   struct cselib_set sets[MAX_SETS];
+-  rtx body = PATTERN (insn);
+   rtx cond = 0;
+   int n_sets_before_autoinc;
+   int n_strict_low_parts = 0;
+   struct cselib_record_autoinc_data data;
+ 
+-  body = PATTERN (insn);
++  rtx body = PATTERN (insn);
+   if (GET_CODE (body) == COND_EXEC)
+     {
+       cond = COND_EXEC_TEST (body);
+diff --git a/gcc/d/d-codegen.cc b/gcc/d/d-codegen.cc
+index 2abff92fc88..6f5499b08ee 100644
+--- a/gcc/d/d-codegen.cc
++++ b/gcc/d/d-codegen.cc
+@@ -1397,7 +1397,7 @@ build_boolop (tree_code code, tree arg0, tree arg1)
+       /* Build a vector comparison.
+ 	 VEC_COND_EXPR <e1 op e2, { -1, -1, -1, -1 }, { 0, 0, 0, 0 }>; */
+       tree type = TREE_TYPE (arg0);
+-      tree cmptype = build_same_sized_truth_vector_type (type);
++      tree cmptype = truth_type_for (type);
+       tree cmp = fold_build2_loc (input_location, code, cmptype, arg0, arg1);
+ 
+       return fold_build3_loc (input_location, VEC_COND_EXPR, type, cmp,
+diff --git a/gcc/df-scan.c b/gcc/df-scan.c
+index 08d7af33371..84c2e54c855 100644
+--- a/gcc/df-scan.c
++++ b/gcc/df-scan.c
+@@ -229,7 +229,6 @@ void
+ df_scan_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
+ {
+   struct df_scan_problem_data *problem_data;
+-  unsigned int insn_num = get_max_uid () + 1;
+   basic_block bb;
+ 
+   /* Given the number of pools, this is really faster than tearing
+@@ -257,7 +256,6 @@ df_scan_alloc (bitmap all_blocks ATTRIBUTE_UNUSED)
+   bitmap_obstack_initialize (&problem_data->reg_bitmaps);
+   bitmap_obstack_initialize (&problem_data->insn_bitmaps);
+ 
+-  insn_num += insn_num / 4;
+   df_grow_reg_info ();
+ 
+   df_grow_insn_info ();
+diff --git a/gcc/doc/poly-int.texi b/gcc/doc/poly-int.texi
+index 1023e823cb3..d60bb02aabf 100644
+--- a/gcc/doc/poly-int.texi
++++ b/gcc/doc/poly-int.texi
+@@ -803,6 +803,18 @@ the assertion is known to hold.
+ @item constant_lower_bound (@var{a})
+ Assert that @var{a} is nonnegative and return the smallest value it can have.
+ 
++@item constant_lower_bound_with_limit (@var{a}, @var{b})
++Return the least value @var{a} can have, given that the context in
++which @var{a} appears guarantees that the answer is no less than @var{b}.
++In other words, the caller is asserting that @var{a} is greater than or
++equal to @var{b} even if @samp{known_ge (@var{a}, @var{b})} doesn't hold.
++
++@item constant_upper_bound_with_limit (@var{a}, @var{b})
++Return the greatest value @var{a} can have, given that the context in
++which @var{a} appears guarantees that the answer is no greater than @var{b}.
++In other words, the caller is asserting that @var{a} is less than or equal
++to @var{b} even if @samp{known_le (@var{a}, @var{b})} doesn't hold.
++
+ @item lower_bound (@var{a}, @var{b})
+ Return a value that is always less than or equal to both @var{a} and @var{b}.
+ It will be the greatest such value for some indeterminate values
+diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
+index 8c8978bb13a..73db70867b4 100644
+--- a/gcc/doc/tm.texi
++++ b/gcc/doc/tm.texi
+@@ -6016,27 +6016,71 @@ against lower halves of vectors recursively until the specified mode is
+ reached.  The default is @var{mode} which means no splitting.
+ @end deftypefn
+ 
+-@deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
+-If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
+-the only one that is worth considering, this hook should add all suitable
+-vector sizes to @var{sizes}, in order of decreasing preference.  The first
+-one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
++@deftypefn {Target Hook} {unsigned int} TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES (vector_modes *@var{modes}, bool @var{all})
++If using the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}
++is not the only approach worth considering, this hook should add one mode to
++@var{modes} for each useful alternative approach.  These modes are then
++passed to @code{TARGET_VECTORIZE_RELATED_MODE} to obtain the vector mode
++for a given element mode.
++
++The modes returned in @var{modes} should use the smallest element mode
++possible for the vectorization approach that they represent, preferring
++integer modes over floating-poing modes in the event of a tie.  The first
++mode should be the @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} for its
++element mode.
++
++If @var{all} is true, add suitable vector modes even when they are generally
++not expected to be worthwhile.
++
++The hook returns a bitmask of flags that control how the modes in
++@var{modes} are used.  The flags are:
++@table @code
++@item VECT_COMPARE_COSTS
++Tells the loop vectorizer to try all the provided modes and pick the one
++with the lowest cost.  By default the vectorizer will choose the first
++mode that works.
++@end table
+ 
+ The hook does not need to do anything if the vector returned by
+ @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant
+-for autovectorization.  The default implementation does nothing.
+-@end deftypefn
+-
+-@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_GET_MASK_MODE (poly_uint64 @var{nunits}, poly_uint64 @var{length})
+-A vector mask is a value that holds one boolean result for every element
+-in a vector.  This hook returns the machine mode that should be used to
+-represent such a mask when the vector in question is @var{length} bytes
+-long and contains @var{nunits} elements.  The hook returns an empty
+-@code{opt_machine_mode} if no such mode exists.
+-
+-The default implementation returns the mode of an integer vector that
+-is @var{length} bytes long and that contains @var{nunits} elements,
+-if such a mode exists.
++for autovectorization.  The default implementation adds no modes and
++returns 0.
++@end deftypefn
++
++@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_RELATED_MODE (machine_mode @var{vector_mode}, scalar_mode @var{element_mode}, poly_uint64 @var{nunits})
++If a piece of code is using vector mode @var{vector_mode} and also wants
++to operate on elements of mode @var{element_mode}, return the vector mode
++it should use for those elements.  If @var{nunits} is nonzero, ensure that
++the mode has exactly @var{nunits} elements, otherwise pick whichever vector
++size pairs the most naturally with @var{vector_mode}.  Return an empty
++@code{opt_machine_mode} if there is no supported vector mode with the
++required properties.
++
++There is no prescribed way of handling the case in which @var{nunits}
++is zero.  One common choice is to pick a vector mode with the same size
++as @var{vector_mode}; this is the natural choice if the target has a
++fixed vector size.  Another option is to choose a vector mode with the
++same number of elements as @var{vector_mode}; this is the natural choice
++if the target has a fixed number of elements.  Alternatively, the hook
++might choose a middle ground, such as trying to keep the number of
++elements as similar as possible while applying maximum and minimum
++vector sizes.
++
++The default implementation uses @code{mode_for_vector} to find the
++requested mode, returning a mode with the same size as @var{vector_mode}
++when @var{nunits} is zero.  This is the correct behavior for most targets.
++@end deftypefn
++
++@deftypefn {Target Hook} opt_machine_mode TARGET_VECTORIZE_GET_MASK_MODE (machine_mode @var{mode})
++Return the mode to use for a vector mask that holds one boolean
++result for each element of vector mode @var{mode}.  The returned mask mode
++can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of
++booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class
++@code{MODE_INT}).  Return an empty @code{opt_machine_mode} if no such
++mask mode exists.
++
++The default implementation returns a @code{MODE_VECTOR_INT} with the
++same size and number of elements as @var{mode}, if such a mode exists.
+ @end deftypefn
+ 
+ @deftypefn {Target Hook} bool TARGET_VECTORIZE_EMPTY_MASK_IS_EXPENSIVE (unsigned @var{ifn})
+diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
+index fe1194ef91a..bc362dca0f5 100644
+--- a/gcc/doc/tm.texi.in
++++ b/gcc/doc/tm.texi.in
+@@ -4172,7 +4172,9 @@ address;  but often a machine-dependent strategy can generate better code.
+ 
+ @hook TARGET_VECTORIZE_SPLIT_REDUCTION
+ 
+-@hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
++@hook TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
++
++@hook TARGET_VECTORIZE_RELATED_MODE
+ 
+ @hook TARGET_VECTORIZE_GET_MASK_MODE
+ 
+diff --git a/gcc/dojump.c b/gcc/dojump.c
+index 8626689463e..bac37a357a9 100644
+--- a/gcc/dojump.c
++++ b/gcc/dojump.c
+@@ -668,8 +668,6 @@ do_jump_by_parts_greater_rtx (scalar_int_mode mode, int unsignedp, rtx op0,
+       code = LE;
+       if_true_label = if_false_label;
+       if_false_label = drop_through_label;
+-      drop_through_if_true = false;
+-      drop_through_if_false = true;
+       prob = prob.invert ();
+     }
+ 
+diff --git a/gcc/early-remat.c b/gcc/early-remat.c
+index 122891c1edb..0396f16babf 100644
+--- a/gcc/early-remat.c
++++ b/gcc/early-remat.c
+@@ -1123,7 +1123,6 @@ early_remat::record_equiv_candidates (unsigned int cand1_index,
+       ec->representative = cand1_index;
+       cand1->equiv_class = ec;
+     }
+-  cand1 = &m_candidates[ec->representative];
+   cand2->equiv_class = ec;
+   bitmap_set_bit (ec->members, cand2_index);
+   if (cand2_index > ec->representative)
+diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
+index 15dffa58a2e..78104603c76 100644
+--- a/gcc/emit-rtl.c
++++ b/gcc/emit-rtl.c
+@@ -3993,7 +3993,7 @@ try_split (rtx pat, rtx_insn *trial, int last)
+   before = PREV_INSN (trial);
+   after = NEXT_INSN (trial);
+ 
+-  tem = emit_insn_after_setloc (seq, trial, INSN_LOCATION (trial));
++  emit_insn_after_setloc (seq, trial, INSN_LOCATION (trial));
+ 
+   delete_insn (trial);
+ 
+diff --git a/gcc/expmed.c b/gcc/expmed.c
+index c5f5499c013..34cdfbf151a 100644
+--- a/gcc/expmed.c
++++ b/gcc/expmed.c
+@@ -1662,12 +1662,10 @@ extract_bit_field_1 (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
+ 	  poly_uint64 nunits;
+ 	  if (!multiple_p (GET_MODE_BITSIZE (GET_MODE (op0)),
+ 			   GET_MODE_UNIT_BITSIZE (tmode), &nunits)
+-	      || !mode_for_vector (inner_mode, nunits).exists (&new_mode)
+-	      || !VECTOR_MODE_P (new_mode)
++	      || !related_vector_mode (tmode, inner_mode,
++				       nunits).exists (&new_mode)
+ 	      || maybe_ne (GET_MODE_SIZE (new_mode),
+-			   GET_MODE_SIZE (GET_MODE (op0)))
+-	      || GET_MODE_INNER (new_mode) != GET_MODE_INNER (tmode)
+-	      || !targetm.vector_mode_supported_p (new_mode))
++			   GET_MODE_SIZE (GET_MODE (op0))))
+ 	    new_mode = VOIDmode;
+ 	}
+       poly_uint64 pos;
+diff --git a/gcc/expr.c b/gcc/expr.c
+index fa15b7eceae..5e3700fe15f 100644
+--- a/gcc/expr.c
++++ b/gcc/expr.c
+@@ -249,6 +249,31 @@ convert_move (rtx to, rtx from, int unsignedp)
+ 
+   if (VECTOR_MODE_P (to_mode) || VECTOR_MODE_P (from_mode))
+     {
++      if (GET_MODE_UNIT_PRECISION (to_mode)
++	  > GET_MODE_UNIT_PRECISION (from_mode))
++	{
++	  optab op = unsignedp ? zext_optab : sext_optab;
++	  insn_code icode = convert_optab_handler (op, to_mode, from_mode);
++	  if (icode != CODE_FOR_nothing)
++	    {
++	      emit_unop_insn (icode, to, from,
++			      unsignedp ? ZERO_EXTEND : SIGN_EXTEND);
++	      return;
++	    }
++	}
++
++      if (GET_MODE_UNIT_PRECISION (to_mode)
++	  < GET_MODE_UNIT_PRECISION (from_mode))
++	{
++	  insn_code icode = convert_optab_handler (trunc_optab,
++						   to_mode, from_mode);
++	  if (icode != CODE_FOR_nothing)
++	    {
++	      emit_unop_insn (icode, to, from, TRUNCATE);
++	      return;
++	    }
++	}
++
+       gcc_assert (known_eq (GET_MODE_BITSIZE (from_mode),
+ 			    GET_MODE_BITSIZE (to_mode)));
+ 
+diff --git a/gcc/fold-const.h b/gcc/fold-const.h
+index 049fee91876..e2e66246315 100644
+--- a/gcc/fold-const.h
++++ b/gcc/fold-const.h
+@@ -83,7 +83,7 @@ extern bool fold_deferring_overflow_warnings_p (void);
+ extern void fold_overflow_warning (const char*, enum warn_strict_overflow_code);
+ extern enum tree_code fold_div_compare (enum tree_code, tree, tree,
+ 					tree *, tree *, bool *);
+-extern int operand_equal_p (const_tree, const_tree, unsigned int);
++extern int operand_equal_p (const_tree, const_tree, unsigned int flags = 0);
+ extern int multiple_of_p (tree, const_tree, const_tree);
+ #define omit_one_operand(T1,T2,T3)\
+    omit_one_operand_loc (UNKNOWN_LOCATION, T1, T2, T3)
+diff --git a/gcc/fwprop.c b/gcc/fwprop.c
+index cf2c9de2d35..f2966fadae8 100644
+--- a/gcc/fwprop.c
++++ b/gcc/fwprop.c
+@@ -448,6 +448,18 @@ enum {
+   PR_OPTIMIZE_FOR_SPEED = 4
+ };
+ 
++/* Check that X has a single def.  */
++
++static bool
++reg_single_def_p (rtx x)
++{
++  if (!REG_P (x))
++    return false;
++
++  int regno = REGNO (x);
++  return (DF_REG_DEF_COUNT (regno) == 1
++	  && !bitmap_bit_p (DF_LR_OUT (ENTRY_BLOCK_PTR_FOR_FN (cfun)), regno));
++}
+ 
+ /* Replace all occurrences of OLD in *PX with NEW and try to simplify the
+    resulting expression.  Replace *PX with a new RTL expression if an
+@@ -547,6 +559,54 @@ propagate_rtx_1 (rtx *px, rtx old_rtx, rtx new_rtx, int flags)
+ 	  tem = simplify_gen_subreg (mode, op0, GET_MODE (SUBREG_REG (x)),
+ 				     SUBREG_BYTE (x));
+ 	}
++
++      else
++	{
++	  rtvec vec;
++	  rtvec newvec;
++	  const char *fmt = GET_RTX_FORMAT (code);
++	  rtx op;
++
++	  for (int i = 0; fmt[i]; i++)
++	    switch (fmt[i])
++	      {
++	      case 'E':
++		vec = XVEC (x, i);
++		newvec = vec;
++		for (int j = 0; j < GET_NUM_ELEM (vec); j++)
++		  {
++		    op = RTVEC_ELT (vec, j);
++		    valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
++		    if (op != RTVEC_ELT (vec, j))
++		      {
++			if (newvec == vec)
++			  {
++			    newvec = shallow_copy_rtvec (vec);
++			    if (!tem)
++			      tem = shallow_copy_rtx (x);
++			    XVEC (tem, i) = newvec;
++			  }
++			RTVEC_ELT (newvec, j) = op;
++		      }
++		  }
++	        break;
++
++	      case 'e':
++		if (XEXP (x, i))
++		  {
++		    op = XEXP (x, i);
++		    valid_ops &= propagate_rtx_1 (&op, old_rtx, new_rtx, flags);
++		    if (op != XEXP (x, i))
++		      {
++			if (!tem)
++			  tem = shallow_copy_rtx (x);
++			XEXP (tem, i) = op;
++		      }
++		  }
++	        break;
++	      }
++	}
++
+       break;
+ 
+     case RTX_OBJ:
+@@ -1370,10 +1430,11 @@ forward_propagate_and_simplify (df_ref use, rtx_insn *def_insn, rtx def_set)
+ 
+ /* Given a use USE of an insn, if it has a single reaching
+    definition, try to forward propagate it into that insn.
+-   Return true if cfg cleanup will be needed.  */
++   Return true if cfg cleanup will be needed.
++   REG_PROP_ONLY is true if we should only propagate register copies.  */
+ 
+ static bool
+-forward_propagate_into (df_ref use)
++forward_propagate_into (df_ref use, bool reg_prop_only = false)
+ {
+   df_ref def;
+   rtx_insn *def_insn, *use_insn;
+@@ -1394,10 +1455,6 @@ forward_propagate_into (df_ref use)
+   if (DF_REF_IS_ARTIFICIAL (def))
+     return false;
+ 
+-  /* Do not propagate loop invariant definitions inside the loop.  */
+-  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father)
+-    return false;
+-
+   /* Check if the use is still present in the insn!  */
+   use_insn = DF_REF_INSN (use);
+   if (DF_REF_FLAGS (use) & DF_REF_IN_NOTE)
+@@ -1415,6 +1472,19 @@ forward_propagate_into (df_ref use)
+   if (!def_set)
+     return false;
+ 
++  if (reg_prop_only
++      && (!reg_single_def_p (SET_SRC (def_set))
++	  || !reg_single_def_p (SET_DEST (def_set))))
++    return false;
++
++  /* Allow propagations into a loop only for reg-to-reg copies, since
++     replacing one register by another shouldn't increase the cost.  */
++
++  if (DF_REF_BB (def)->loop_father != DF_REF_BB (use)->loop_father
++      && (!reg_single_def_p (SET_SRC (def_set))
++	  || !reg_single_def_p (SET_DEST (def_set))))
++    return false;
++
+   /* Only try one kind of propagation.  If two are possible, we'll
+      do it on the following iterations.  */
+   if (forward_propagate_and_simplify (use, def_insn, def_set)
+@@ -1483,7 +1553,7 @@ gate_fwprop (void)
+ }
+ 
+ static unsigned int
+-fwprop (void)
++fwprop (bool fwprop_addr_p)
+ {
+   unsigned i;
+ 
+@@ -1502,11 +1572,16 @@ fwprop (void)
+ 
+       df_ref use = DF_USES_GET (i);
+       if (use)
+-	if (DF_REF_TYPE (use) == DF_REF_REG_USE
+-	    || DF_REF_BB (use)->loop_father == NULL
+-	    /* The outer most loop is not really a loop.  */
+-	    || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
+-	  forward_propagate_into (use);
++	{
++	  if (DF_REF_TYPE (use) == DF_REF_REG_USE
++	      || DF_REF_BB (use)->loop_father == NULL
++	      /* The outer most loop is not really a loop.  */
++	      || loop_outer (DF_REF_BB (use)->loop_father) == NULL)
++	    forward_propagate_into (use, fwprop_addr_p);
++
++	  else if (fwprop_addr_p)
++	    forward_propagate_into (use, false);
++	}
+     }
+ 
+   fwprop_done ();
+@@ -1537,7 +1612,7 @@ public:
+ 
+   /* opt_pass methods: */
+   virtual bool gate (function *) { return gate_fwprop (); }
+-  virtual unsigned int execute (function *) { return fwprop (); }
++  virtual unsigned int execute (function *) { return fwprop (false); }
+ 
+ }; // class pass_rtl_fwprop
+ 
+@@ -1549,33 +1624,6 @@ make_pass_rtl_fwprop (gcc::context *ctxt)
+   return new pass_rtl_fwprop (ctxt);
+ }
+ 
+-static unsigned int
+-fwprop_addr (void)
+-{
+-  unsigned i;
+-
+-  fwprop_init ();
+-
+-  /* Go through all the uses.  df_uses_create will create new ones at the
+-     end, and we'll go through them as well.  */
+-  for (i = 0; i < DF_USES_TABLE_SIZE (); i++)
+-    {
+-      if (!propagations_left)
+-	break;
+-
+-      df_ref use = DF_USES_GET (i);
+-      if (use)
+-	if (DF_REF_TYPE (use) != DF_REF_REG_USE
+-	    && DF_REF_BB (use)->loop_father != NULL
+-	    /* The outer most loop is not really a loop.  */
+-	    && loop_outer (DF_REF_BB (use)->loop_father) != NULL)
+-	  forward_propagate_into (use);
+-    }
+-
+-  fwprop_done ();
+-  return 0;
+-}
+-
+ namespace {
+ 
+ const pass_data pass_data_rtl_fwprop_addr =
+@@ -1600,7 +1648,7 @@ public:
+ 
+   /* opt_pass methods: */
+   virtual bool gate (function *) { return gate_fwprop (); }
+-  virtual unsigned int execute (function *) { return fwprop_addr (); }
++  virtual unsigned int execute (function *) { return fwprop (true); }
+ 
+ }; // class pass_rtl_fwprop_addr
+ 
+diff --git a/gcc/gimple.c b/gcc/gimple.c
+index 8fae60fb848..bf362dbe545 100644
+--- a/gcc/gimple.c
++++ b/gcc/gimple.c
+@@ -1771,6 +1771,8 @@ gimple_get_lhs (const gimple *stmt)
+     return gimple_assign_lhs (stmt);
+   else if (code == GIMPLE_CALL)
+     return gimple_call_lhs (stmt);
++  else if (code == GIMPLE_PHI)
++    return gimple_phi_result (stmt);
+   else
+     return NULL_TREE;
+ }
+diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
+index 4534d43721f..489d0b93b42 100644
+--- a/gcc/graphite-scop-detection.c
++++ b/gcc/graphite-scop-detection.c
+@@ -1105,14 +1105,12 @@ assign_parameter_index_in_region (tree name, sese_info_p region)
+   gcc_assert (TREE_CODE (name) == SSA_NAME
+ 	      && INTEGRAL_TYPE_P (TREE_TYPE (name))
+ 	      && ! defined_in_sese_p (name, region->region));
+-
+   int i;
+   tree p;
+   FOR_EACH_VEC_ELT (region->params, i, p)
+     if (p == name)
+       return;
+ 
+-  i = region->params.length ();
+   region->params.safe_push (name);
+ }
+ 
+diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
+index 95788dfee7d..21ecd566766 100644
+--- a/gcc/internal-fn.c
++++ b/gcc/internal-fn.c
+@@ -100,7 +100,7 @@ init_internal_fns ()
+ /* Create static initializers for the information returned by
+    direct_internal_fn.  */
+ #define not_direct { -2, -2, false }
+-#define mask_load_direct { -1, 2, false }
++#define mask_load_direct { -1, 2, true }
+ #define load_lanes_direct { -1, -1, false }
+ #define mask_load_lanes_direct { -1, -1, false }
+ #define gather_load_direct { -1, -1, false }
+diff --git a/gcc/ira-color.c b/gcc/ira-color.c
+index aa91b56c81f..8a90ae1b4e6 100644
+--- a/gcc/ira-color.c
++++ b/gcc/ira-color.c
+@@ -1108,7 +1108,6 @@ setup_profitable_hard_regs (void)
+ 	  || empty_profitable_hard_regs (a))
+ 	continue;
+       data = ALLOCNO_COLOR_DATA (a);
+-      mode = ALLOCNO_MODE (a);
+       if ((costs = ALLOCNO_UPDATED_HARD_REG_COSTS (a)) != NULL
+ 	  || (costs = ALLOCNO_HARD_REG_COSTS (a)) != NULL)
+ 	{
+diff --git a/gcc/ira.c b/gcc/ira.c
+index b330f2a287b..4262e5cf3b7 100644
+--- a/gcc/ira.c
++++ b/gcc/ira.c
+@@ -4414,10 +4414,9 @@ rtx_moveable_p (rtx *loc, enum op_type type)
+ {
+   const char *fmt;
+   rtx x = *loc;
+-  enum rtx_code code = GET_CODE (x);
+   int i, j;
+ 
+-  code = GET_CODE (x);
++  enum rtx_code code = GET_CODE (x);
+   switch (code)
+     {
+     case CONST:
+diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c
+index ee9fd51f129..7a345a52ae1 100644
+--- a/gcc/lra-eliminations.c
++++ b/gcc/lra-eliminations.c
+@@ -1146,7 +1146,6 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p,
+      single_set without having put new body into the insn and the
+      re-recognition won't hurt in this rare case.  */
+   id = lra_update_insn_recog_data (insn);
+-  static_id = id->insn_static_data;
+ }
+ 
+ /* Spill pseudos which are assigned to hard registers in SET.  Add
+diff --git a/gcc/lra.c b/gcc/lra.c
+index 1d2578f8c12..10b85340fc5 100644
+--- a/gcc/lra.c
++++ b/gcc/lra.c
+@@ -1029,12 +1029,8 @@ lra_set_insn_recog_data (rtx_insn *insn)
+ 			       data->operand_loc,
+ 			       constraints, operand_mode, NULL);
+ 	  if (nop > 0)
+-	    {
+-	      const char *p =  recog_data.constraints[0];
+-
+-	      for (p =	constraints[0]; *p; p++)
+-		nalt += *p == ',';
+-	    }
++	    for (const char *p =constraints[0]; *p; p++)
++	      nalt += *p == ',';
+ 	  data->insn_static_data = insn_static_data
+ 	    = get_static_insn_data (-1, nop, 0, nalt);
+ 	  for (i = 0; i < nop; i++)
+diff --git a/gcc/machmode.h b/gcc/machmode.h
+index 3a7cee88962..d564f9c6458 100644
+--- a/gcc/machmode.h
++++ b/gcc/machmode.h
+@@ -257,6 +257,9 @@ public:
+   bool exists () const;
+   template<typename U> bool exists (U *) const;
+ 
++  bool operator== (const T &m) const { return m_mode == m; }
++  bool operator!= (const T &m) const { return m_mode != m; }
++
+ private:
+   machine_mode m_mode;
+ };
+@@ -841,20 +844,9 @@ smallest_int_mode_for_size (poly_uint64 size)
+ extern opt_scalar_int_mode int_mode_for_mode (machine_mode);
+ extern opt_machine_mode bitwise_mode_for_mode (machine_mode);
+ extern opt_machine_mode mode_for_vector (scalar_mode, poly_uint64);
+-extern opt_machine_mode mode_for_int_vector (unsigned int, poly_uint64);
+-
+-/* Return the integer vector equivalent of MODE, if one exists.  In other
+-   words, return the mode for an integer vector that has the same number
+-   of bits as MODE and the same number of elements as MODE, with the
+-   latter being 1 if MODE is scalar.  The returned mode can be either
+-   an integer mode or a vector mode.  */
+-
+-inline opt_machine_mode
+-mode_for_int_vector (machine_mode mode)
+-{
+-  return mode_for_int_vector (GET_MODE_UNIT_BITSIZE (mode),
+-			      GET_MODE_NUNITS (mode));
+-}
++extern opt_machine_mode related_vector_mode (machine_mode, scalar_mode,
++					     poly_uint64 = 0);
++extern opt_machine_mode related_int_vector_mode (machine_mode);
+ 
+ /* A class for iterating through possible bitfield modes.  */
+ class bit_field_mode_iterator
+diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
+index 74159734fc8..0d7f104a2f2 100644
+--- a/gcc/omp-expand.c
++++ b/gcc/omp-expand.c
+@@ -4974,6 +4974,13 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
+ 	  && loop->safelen > 1)
+ 	{
+ 	  loop->force_vectorize = true;
++	  if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
++	    {
++	      unsigned HOST_WIDE_INT v
++		= tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
++	      if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
++		loop->simdlen = v;
++	    }
+ 	  cfun->has_force_vectorize_loops = true;
+ 	}
+       else if (dont_vectorize)
+diff --git a/gcc/omp-general.c b/gcc/omp-general.c
+index 356772ff458..4fb53af7587 100644
+--- a/gcc/omp-general.c
++++ b/gcc/omp-general.c
+@@ -468,13 +468,16 @@ omp_max_vf (void)
+ 	  && global_options_set.x_flag_tree_loop_vectorize))
+     return 1;
+ 
+-  auto_vector_sizes sizes;
+-  targetm.vectorize.autovectorize_vector_sizes (&sizes);
+-  if (!sizes.is_empty ())
++  auto_vector_modes modes;
++  targetm.vectorize.autovectorize_vector_modes (&modes, true);
++  if (!modes.is_empty ())
+     {
+       poly_uint64 vf = 0;
+-      for (unsigned int i = 0; i < sizes.length (); ++i)
+-	vf = ordered_max (vf, sizes[i]);
++      for (unsigned int i = 0; i < modes.length (); ++i)
++	/* The returned modes use the smallest element size (and thus
++	   the largest nunits) for the vectorization approach that they
++	   represent.  */
++	vf = ordered_max (vf, GET_MODE_NUNITS (modes[i]));
+       return vf;
+     }
+ 
+diff --git a/gcc/omp-low.c b/gcc/omp-low.c
+index 813cefd69b9..7866639f76c 100644
+--- a/gcc/omp-low.c
++++ b/gcc/omp-low.c
+@@ -3650,11 +3650,8 @@ omp_clause_aligned_alignment (tree clause)
+   /* Otherwise return implementation defined alignment.  */
+   unsigned int al = 1;
+   opt_scalar_mode mode_iter;
+-  auto_vector_sizes sizes;
+-  targetm.vectorize.autovectorize_vector_sizes (&sizes);
+-  poly_uint64 vs = 0;
+-  for (unsigned int i = 0; i < sizes.length (); ++i)
+-    vs = ordered_max (vs, sizes[i]);
++  auto_vector_modes modes;
++  targetm.vectorize.autovectorize_vector_modes (&modes, true);
+   static enum mode_class classes[]
+     = { MODE_INT, MODE_VECTOR_INT, MODE_FLOAT, MODE_VECTOR_FLOAT };
+   for (int i = 0; i < 4; i += 2)
+@@ -3665,19 +3662,18 @@ omp_clause_aligned_alignment (tree clause)
+ 	machine_mode vmode = targetm.vectorize.preferred_simd_mode (mode);
+ 	if (GET_MODE_CLASS (vmode) != classes[i + 1])
+ 	  continue;
+-	while (maybe_ne (vs, 0U)
+-	       && known_lt (GET_MODE_SIZE (vmode), vs)
+-	       && GET_MODE_2XWIDER_MODE (vmode).exists ())
+-	  vmode = GET_MODE_2XWIDER_MODE (vmode).require ();
++	machine_mode alt_vmode;
++	for (unsigned int j = 0; j < modes.length (); ++j)
++	  if (related_vector_mode (modes[j], mode).exists (&alt_vmode)
++	      && known_ge (GET_MODE_SIZE (alt_vmode), GET_MODE_SIZE (vmode)))
++	    vmode = alt_vmode;
+ 
+ 	tree type = lang_hooks.types.type_for_mode (mode, 1);
+ 	if (type == NULL_TREE || TYPE_MODE (type) != mode)
+ 	  continue;
+-	poly_uint64 nelts = exact_div (GET_MODE_SIZE (vmode),
+-				       GET_MODE_SIZE (mode));
+-	type = build_vector_type (type, nelts);
+-	if (TYPE_MODE (type) != vmode)
+-	  continue;
++	type = build_vector_type_for_mode (type, vmode);
++	/* The functions above are not allowed to return invalid modes.  */
++	gcc_assert (TYPE_MODE (type) == vmode);
+ 	if (TYPE_ALIGN_UNIT (type) > al)
+ 	  al = TYPE_ALIGN_UNIT (type);
+       }
+diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
+index 71c73fb43cc..61de7dc283b 100644
+--- a/gcc/optabs-query.c
++++ b/gcc/optabs-query.c
+@@ -354,11 +354,8 @@ can_conditionally_move_p (machine_mode mode)
+ opt_machine_mode
+ qimode_for_vec_perm (machine_mode mode)
+ {
+-  machine_mode qimode;
+-  if (GET_MODE_INNER (mode) != QImode
+-      && mode_for_vector (QImode, GET_MODE_SIZE (mode)).exists (&qimode)
+-      && VECTOR_MODE_P (qimode))
+-    return qimode;
++  if (GET_MODE_INNER (mode) != QImode)
++    return related_vector_mode (mode, QImode, GET_MODE_SIZE (mode));
+   return opt_machine_mode ();
+ }
+ 
+@@ -587,22 +584,21 @@ can_vec_mask_load_store_p (machine_mode mode,
+   if (!VECTOR_MODE_P (vmode))
+     return false;
+ 
+-  if ((targetm.vectorize.get_mask_mode
+-       (GET_MODE_NUNITS (vmode), GET_MODE_SIZE (vmode)).exists (&mask_mode))
++  if (targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
+       && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+     return true;
+ 
+-  auto_vector_sizes vector_sizes;
+-  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+-  for (unsigned int i = 0; i < vector_sizes.length (); ++i)
++  auto_vector_modes vector_modes;
++  targetm.vectorize.autovectorize_vector_modes (&vector_modes, true);
++  for (unsigned int i = 0; i < vector_modes.length (); ++i)
+     {
+-      poly_uint64 cur = vector_sizes[i];
++      poly_uint64 cur = GET_MODE_SIZE (vector_modes[i]);
+       poly_uint64 nunits;
+       if (!multiple_p (cur, GET_MODE_SIZE (smode), &nunits))
+ 	continue;
+       if (mode_for_vector (smode, nunits).exists (&vmode)
+ 	  && VECTOR_MODE_P (vmode)
+-	  && targetm.vectorize.get_mask_mode (nunits, cur).exists (&mask_mode)
++	  && targetm.vectorize.get_mask_mode (vmode).exists (&mask_mode)
+ 	  && convert_optab_handler (op, vmode, mask_mode) != CODE_FOR_nothing)
+ 	return true;
+     }
+diff --git a/gcc/optabs-tree.c b/gcc/optabs-tree.c
+index 8157798cc71..341e02bd51c 100644
+--- a/gcc/optabs-tree.c
++++ b/gcc/optabs-tree.c
+@@ -300,6 +300,20 @@ supportable_convert_operation (enum tree_code code,
+       return true;
+     }
+ 
++  if (GET_MODE_UNIT_PRECISION (m1) > GET_MODE_UNIT_PRECISION (m2)
++      && can_extend_p (m1, m2, TYPE_UNSIGNED (vectype_in)))
++    {
++      *code1 = code;
++      return true;
++    }
++
++  if (GET_MODE_UNIT_PRECISION (m1) < GET_MODE_UNIT_PRECISION (m2)
++      && convert_optab_handler (trunc_optab, m1, m2) != CODE_FOR_nothing)
++    {
++      *code1 = code;
++      return true;
++    }
++
+   /* Now check for builtin.  */
+   if (targetm.vectorize.builtin_conversion
+       && targetm.vectorize.builtin_conversion (code, vectype_out, vectype_in))
+diff --git a/gcc/optabs.c b/gcc/optabs.c
+index 7d7efe0a4a2..c2c1274ebdb 100644
+--- a/gcc/optabs.c
++++ b/gcc/optabs.c
+@@ -2095,8 +2095,8 @@ expand_twoval_binop (optab binoptab, rtx op0, rtx op1, rtx targ0, rtx targ1,
+       xop1 = avoid_expensive_constant (mode1, binoptab, 1, xop1, unsignedp);
+ 
+       create_fixed_operand (&ops[0], targ0);
+-      create_convert_operand_from (&ops[1], op0, mode, unsignedp);
+-      create_convert_operand_from (&ops[2], op1, mode, unsignedp);
++      create_convert_operand_from (&ops[1], xop0, mode, unsignedp);
++      create_convert_operand_from (&ops[2], xop1, mode, unsignedp);
+       create_fixed_operand (&ops[3], targ1);
+       if (maybe_expand_insn (icode, 4, ops))
+ 	return 1;
+@@ -5486,7 +5486,7 @@ expand_vec_perm_1 (enum insn_code icode, rtx target,
+   struct expand_operand ops[4];
+ 
+   gcc_assert (GET_MODE_CLASS (smode) == MODE_VECTOR_INT
+-	      || mode_for_int_vector (tmode).require () == smode);
++	      || related_int_vector_mode (tmode).require () == smode);
+   create_output_operand (&ops[0], target, tmode);
+   create_input_operand (&ops[3], sel, smode);
+ 
+@@ -5611,8 +5611,7 @@ expand_vec_perm_const (machine_mode mode, rtx v0, rtx v1,
+   /* The optabs are only defined for selectors with the same width
+      as the values being permuted.  */
+   machine_mode required_sel_mode;
+-  if (!mode_for_int_vector (mode).exists (&required_sel_mode)
+-      || !VECTOR_MODE_P (required_sel_mode))
++  if (!related_int_vector_mode (mode).exists (&required_sel_mode))
+     {
+       delete_insns_since (last);
+       return NULL_RTX;
+diff --git a/gcc/params.def b/gcc/params.def
+index 3f18642475a..b269045fb9c 100644
+--- a/gcc/params.def
++++ b/gcc/params.def
+@@ -1403,7 +1403,7 @@ DEFPARAM (PARAM_MAX_VRP_SWITCH_ASSERTIONS,
+ DEFPARAM (PARAM_VECT_EPILOGUES_NOMASK,
+ 	  "vect-epilogues-nomask",
+ 	  "Enable loop epilogue vectorization using smaller vector size.",
+-	  0, 0, 1)
++	  1, 0, 1)
+ 
+ DEFPARAM(PARAM_UNROLL_JAM_MIN_PERCENT,
+ 	 "unroll-jam-min-percent",
+diff --git a/gcc/poly-int.h b/gcc/poly-int.h
+index d68a652b5fa..ba39ca471be 100644
+--- a/gcc/poly-int.h
++++ b/gcc/poly-int.h
+@@ -1528,6 +1528,29 @@ constant_lower_bound (const poly_int_pod<N, Ca> &a)
+   return a.coeffs[0];
+ }
+ 
++/* Return the constant lower bound of A, given that it is no less than B.  */
++
++template<unsigned int N, typename Ca, typename Cb>
++inline POLY_CONST_COEFF (Ca, Cb)
++constant_lower_bound_with_limit (const poly_int_pod<N, Ca> &a, const Cb &b)
++{
++  if (known_ge (a, b))
++    return a.coeffs[0];
++  return b;
++}
++
++/* Return the constant upper bound of A, given that it is no greater
++   than B.  */
++
++template<unsigned int N, typename Ca, typename Cb>
++inline POLY_CONST_COEFF (Ca, Cb)
++constant_upper_bound_with_limit (const poly_int_pod<N, Ca> &a, const Cb &b)
++{
++  if (known_le (a, b))
++    return a.coeffs[0];
++  return b;
++}
++
+ /* Return a value that is known to be no greater than A and B.  This
+    will be the greatest lower bound for some indeterminate values but
+    not necessarily for all.  */
+diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
+index ebd69bde531..1af51f686c7 100644
+--- a/gcc/read-rtl.c
++++ b/gcc/read-rtl.c
+@@ -1282,7 +1282,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
+       m = add_mapping (&substs, subst_iters_table, attr_operands[1]);
+       end_ptr = &m->values;
+       end_ptr = add_map_value (end_ptr, 1, "");
+-      end_ptr = add_map_value (end_ptr, 2, "");
++      add_map_value (end_ptr, 2, "");
+ 
+       add_define_attr_for_define_subst (attr_operands[1], queue);
+     }
+@@ -1290,7 +1290,7 @@ read_subst_mapping (htab_t subst_iters_table, htab_t subst_attrs_table,
+   m = add_mapping (&substs, subst_attrs_table, attr_operands[0]);
+   end_ptr = &m->values;
+   end_ptr = add_map_value (end_ptr, 1, attr_operands[2]);
+-  end_ptr = add_map_value (end_ptr, 2, attr_operands[3]);
++  add_map_value (end_ptr, 2, attr_operands[3]);
+ }
+ 
+ /* Check newly-created code iterator ITERATOR to see whether every code has the
+diff --git a/gcc/regrename.c b/gcc/regrename.c
+index 637b3cbe6d7..5259d565e58 100644
+--- a/gcc/regrename.c
++++ b/gcc/regrename.c
+@@ -1426,10 +1426,9 @@ scan_rtx (rtx_insn *insn, rtx *loc, enum reg_class cl, enum scan_actions action,
+ {
+   const char *fmt;
+   rtx x = *loc;
+-  enum rtx_code code = GET_CODE (x);
+   int i, j;
+ 
+-  code = GET_CODE (x);
++  enum rtx_code code = GET_CODE (x);
+   switch (code)
+     {
+     case CONST:
+diff --git a/gcc/reorg.c b/gcc/reorg.c
+index 81349382b81..bdfcf8851cd 100644
+--- a/gcc/reorg.c
++++ b/gcc/reorg.c
+@@ -2708,14 +2708,13 @@ fill_slots_from_thread (rtx_jump_insn *insn, rtx condition,
+       && GET_CODE (PATTERN (new_thread)) != ASM_INPUT
+       && asm_noperands (PATTERN (new_thread)) < 0)
+     {
+-      rtx pat = PATTERN (new_thread);
+       rtx dest;
+       rtx src;
+ 
+       /* We know "new_thread" is an insn due to NONJUMP_INSN_P (new_thread)
+ 	 above.  */
+       trial = as_a <rtx_insn *> (new_thread);
+-      pat = PATTERN (trial);
++      rtx pat = PATTERN (trial);
+ 
+       if (!NONJUMP_INSN_P (trial)
+ 	  || GET_CODE (pat) != SET
+diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
+index 50bbb79655b..bdbd1b98eba 100644
+--- a/gcc/simplify-rtx.c
++++ b/gcc/simplify-rtx.c
+@@ -6709,6 +6709,17 @@ simplify_subreg (machine_mode outermode, rtx op,
+ 	}
+     }
+ 
++  /* If OP is a vector comparison and the subreg is not changing the
++     number of elements or the size of the elements, change the result
++     of the comparison to the new mode.  */
++  if (COMPARISON_P (op)
++      && VECTOR_MODE_P (outermode)
++      && VECTOR_MODE_P (innermode)
++      && known_eq (GET_MODE_NUNITS (outermode), GET_MODE_NUNITS (innermode))
++      && known_eq (GET_MODE_UNIT_SIZE (outermode),
++		    GET_MODE_UNIT_SIZE (innermode)))
++    return simplify_gen_relational (GET_CODE (op), outermode, innermode,
++				    XEXP (op, 0), XEXP (op, 1));
+   return NULL_RTX;
+ }
+ 
+diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
+index 5d6f2e0166c..a054b7887e7 100644
+--- a/gcc/stor-layout.c
++++ b/gcc/stor-layout.c
+@@ -514,18 +514,43 @@ mode_for_vector (scalar_mode innermode, poly_uint64 nunits)
+   return opt_machine_mode ();
+ }
+ 
+-/* Return the mode for a vector that has NUNITS integer elements of
+-   INT_BITS bits each, if such a mode exists.  The mode can be either
+-   an integer mode or a vector mode.  */
++/* If a piece of code is using vector mode VECTOR_MODE and also wants
++   to operate on elements of mode ELEMENT_MODE, return the vector mode
++   it should use for those elements.  If NUNITS is nonzero, ensure that
++   the mode has exactly NUNITS elements, otherwise pick whichever vector
++   size pairs the most naturally with VECTOR_MODE; this may mean choosing
++   a mode with a different size and/or number of elements, depending on
++   what the target prefers.  Return an empty opt_machine_mode if there
++   is no supported vector mode with the required properties.
++
++   Unlike mode_for_vector. any returned mode is guaranteed to satisfy
++   both VECTOR_MODE_P and targetm.vector_mode_supported_p.  */
+ 
+ opt_machine_mode
+-mode_for_int_vector (unsigned int int_bits, poly_uint64 nunits)
++related_vector_mode (machine_mode vector_mode, scalar_mode element_mode,
++		     poly_uint64 nunits)
+ {
++  gcc_assert (VECTOR_MODE_P (vector_mode));
++  return targetm.vectorize.related_mode (vector_mode, element_mode, nunits);
++}
++
++/* If a piece of code is using vector mode VECTOR_MODE and also wants
++   to operate on integer vectors with the same element size and number
++   of elements, return the vector mode it should use.  Return an empty
++   opt_machine_mode if there is no supported vector mode with the
++   required properties.
++
++   Unlike mode_for_vector. any returned mode is guaranteed to satisfy
++   both VECTOR_MODE_P and targetm.vector_mode_supported_p.  */
++
++opt_machine_mode
++related_int_vector_mode (machine_mode vector_mode)
++{
++  gcc_assert (VECTOR_MODE_P (vector_mode));
+   scalar_int_mode int_mode;
+-  machine_mode vec_mode;
+-  if (int_mode_for_size (int_bits, 0).exists (&int_mode)
+-      && mode_for_vector (int_mode, nunits).exists (&vec_mode))
+-    return vec_mode;
++  if (int_mode_for_mode (GET_MODE_INNER (vector_mode)).exists (&int_mode))
++    return related_vector_mode (vector_mode, int_mode,
++				GET_MODE_NUNITS (vector_mode));
+   return opt_machine_mode ();
+ }
+ 
+diff --git a/gcc/target.def b/gcc/target.def
+index 66cee075018..f998470fffd 100644
+--- a/gcc/target.def
++++ b/gcc/target.def
+@@ -1894,33 +1894,80 @@ reached.  The default is @var{mode} which means no splitting.",
+ /* Returns a mask of vector sizes to iterate over when auto-vectorizing
+    after processing the preferred one derived from preferred_simd_mode.  */
+ DEFHOOK
+-(autovectorize_vector_sizes,
+- "If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not\n\
+-the only one that is worth considering, this hook should add all suitable\n\
+-vector sizes to @var{sizes}, in order of decreasing preference.  The first\n\
+-one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
++(autovectorize_vector_modes,
++ "If using the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}\n\
++is not the only approach worth considering, this hook should add one mode to\n\
++@var{modes} for each useful alternative approach.  These modes are then\n\
++passed to @code{TARGET_VECTORIZE_RELATED_MODE} to obtain the vector mode\n\
++for a given element mode.\n\
++\n\
++The modes returned in @var{modes} should use the smallest element mode\n\
++possible for the vectorization approach that they represent, preferring\n\
++integer modes over floating-poing modes in the event of a tie.  The first\n\
++mode should be the @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} for its\n\
++element mode.\n\
++\n\
++If @var{all} is true, add suitable vector modes even when they are generally\n\
++not expected to be worthwhile.\n\
++\n\
++The hook returns a bitmask of flags that control how the modes in\n\
++@var{modes} are used.  The flags are:\n\
++@table @code\n\
++@item VECT_COMPARE_COSTS\n\
++Tells the loop vectorizer to try all the provided modes and pick the one\n\
++with the lowest cost.  By default the vectorizer will choose the first\n\
++mode that works.\n\
++@end table\n\
+ \n\
+ The hook does not need to do anything if the vector returned by\n\
+ @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is the only one relevant\n\
+-for autovectorization.  The default implementation does nothing.",
+- void,
+- (vector_sizes *sizes),
+- default_autovectorize_vector_sizes)
++for autovectorization.  The default implementation adds no modes and\n\
++returns 0.",
++ unsigned int,
++ (vector_modes *modes, bool all),
++ default_autovectorize_vector_modes)
++
++DEFHOOK
++(related_mode,
++ "If a piece of code is using vector mode @var{vector_mode} and also wants\n\
++to operate on elements of mode @var{element_mode}, return the vector mode\n\
++it should use for those elements.  If @var{nunits} is nonzero, ensure that\n\
++the mode has exactly @var{nunits} elements, otherwise pick whichever vector\n\
++size pairs the most naturally with @var{vector_mode}.  Return an empty\n\
++@code{opt_machine_mode} if there is no supported vector mode with the\n\
++required properties.\n\
++\n\
++There is no prescribed way of handling the case in which @var{nunits}\n\
++is zero.  One common choice is to pick a vector mode with the same size\n\
++as @var{vector_mode}; this is the natural choice if the target has a\n\
++fixed vector size.  Another option is to choose a vector mode with the\n\
++same number of elements as @var{vector_mode}; this is the natural choice\n\
++if the target has a fixed number of elements.  Alternatively, the hook\n\
++might choose a middle ground, such as trying to keep the number of\n\
++elements as similar as possible while applying maximum and minimum\n\
++vector sizes.\n\
++\n\
++The default implementation uses @code{mode_for_vector} to find the\n\
++requested mode, returning a mode with the same size as @var{vector_mode}\n\
++when @var{nunits} is zero.  This is the correct behavior for most targets.",
++ opt_machine_mode,
++ (machine_mode vector_mode, scalar_mode element_mode, poly_uint64 nunits),
++ default_vectorize_related_mode)
+ 
+ /* Function to get a target mode for a vector mask.  */
+ DEFHOOK
+ (get_mask_mode,
+- "A vector mask is a value that holds one boolean result for every element\n\
+-in a vector.  This hook returns the machine mode that should be used to\n\
+-represent such a mask when the vector in question is @var{length} bytes\n\
+-long and contains @var{nunits} elements.  The hook returns an empty\n\
+-@code{opt_machine_mode} if no such mode exists.\n\
+-\n\
+-The default implementation returns the mode of an integer vector that\n\
+-is @var{length} bytes long and that contains @var{nunits} elements,\n\
+-if such a mode exists.",
++ "Return the mode to use for a vector mask that holds one boolean\n\
++result for each element of vector mode @var{mode}.  The returned mask mode\n\
++can be a vector of integers (class @code{MODE_VECTOR_INT}), a vector of\n\
++booleans (class @code{MODE_VECTOR_BOOL}) or a scalar integer (class\n\
++@code{MODE_INT}).  Return an empty @code{opt_machine_mode} if no such\n\
++mask mode exists.\n\
++\n\
++The default implementation returns a @code{MODE_VECTOR_INT} with the\n\
++same size and number of elements as @var{mode}, if such a mode exists.",
+  opt_machine_mode,
+- (poly_uint64 nunits, poly_uint64 length),
++ (machine_mode mode),
+  default_get_mask_mode)
+ 
+ /* Function to say whether a masked operation is expensive when the
+diff --git a/gcc/target.h b/gcc/target.h
+index 008932b5dbd..057e6ae8768 100644
+--- a/gcc/target.h
++++ b/gcc/target.h
+@@ -199,11 +199,19 @@ enum vect_cost_model_location {
+ class vec_perm_indices;
+ 
+ /* The type to use for lists of vector sizes.  */
+-typedef vec<poly_uint64> vector_sizes;
++typedef vec<machine_mode> vector_modes;
+ 
+ /* Same, but can be used to construct local lists that are
+    automatically freed.  */
+-typedef auto_vec<poly_uint64, 8> auto_vector_sizes;
++typedef auto_vec<machine_mode, 8> auto_vector_modes;
++
++/* Flags returned by TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES:
++
++   VECT_COMPARE_COSTS
++       Tells the loop vectorizer to try all the provided modes and
++       pick the one with the lowest cost.  By default the vectorizer
++       will choose the first mode that works.  */
++const unsigned int VECT_COMPARE_COSTS = 1U << 0;
+ 
+ /* The target structure.  This holds all the backend hooks.  */
+ #define DEFHOOKPOD(NAME, DOC, TYPE, INIT) TYPE NAME;
+diff --git a/gcc/targhooks.c b/gcc/targhooks.c
+index 02b9dc59611..6396f6f4bdf 100644
+--- a/gcc/targhooks.c
++++ b/gcc/targhooks.c
+@@ -1312,32 +1312,39 @@ default_split_reduction (machine_mode mode)
+   return mode;
+ }
+ 
+-/* By default only the size derived from the preferred vector mode
+-   is tried.  */
++/* By default only the preferred vector mode is tried.  */
+ 
+-void
+-default_autovectorize_vector_sizes (vector_sizes *)
++unsigned int
++default_autovectorize_vector_modes (vector_modes *, bool)
+ {
++  return 0;
+ }
+ 
+-/* By default a vector of integers is used as a mask.  */
++/* The default implementation of TARGET_VECTORIZE_RELATED_MODE.  */
+ 
+ opt_machine_mode
+-default_get_mask_mode (poly_uint64 nunits, poly_uint64 vector_size)
+-{
+-  unsigned int elem_size = vector_element_size (vector_size, nunits);
+-  scalar_int_mode elem_mode
+-    = smallest_int_mode_for_size (elem_size * BITS_PER_UNIT);
+-  machine_mode vector_mode;
++default_vectorize_related_mode (machine_mode vector_mode,
++				scalar_mode element_mode,
++				poly_uint64 nunits)
++{
++  machine_mode result_mode;
++  if ((maybe_ne (nunits, 0U)
++       || multiple_p (GET_MODE_SIZE (vector_mode),
++		      GET_MODE_SIZE (element_mode), &nunits))
++      && mode_for_vector (element_mode, nunits).exists (&result_mode)
++      && VECTOR_MODE_P (result_mode)
++      && targetm.vector_mode_supported_p (result_mode))
++    return result_mode;
+ 
+-  gcc_assert (known_eq (elem_size * nunits, vector_size));
++  return opt_machine_mode ();
++}
+ 
+-  if (mode_for_vector (elem_mode, nunits).exists (&vector_mode)
+-      && VECTOR_MODE_P (vector_mode)
+-      && targetm.vector_mode_supported_p (vector_mode))
+-    return vector_mode;
++/* By default a vector of integers is used as a mask.  */
+ 
+-  return opt_machine_mode ();
++opt_machine_mode
++default_get_mask_mode (machine_mode mode)
++{
++  return related_int_vector_mode (mode);
+ }
+ 
+ /* By default consider masked stores to be expensive.  */
+diff --git a/gcc/targhooks.h b/gcc/targhooks.h
+index 59436278dcf..2d599190891 100644
+--- a/gcc/targhooks.h
++++ b/gcc/targhooks.h
+@@ -110,8 +110,11 @@ default_builtin_support_vector_misalignment (machine_mode mode,
+ 					     int, bool);
+ extern machine_mode default_preferred_simd_mode (scalar_mode mode);
+ extern machine_mode default_split_reduction (machine_mode);
+-extern void default_autovectorize_vector_sizes (vector_sizes *);
+-extern opt_machine_mode default_get_mask_mode (poly_uint64, poly_uint64);
++extern unsigned int default_autovectorize_vector_modes (vector_modes *, bool);
++extern opt_machine_mode default_vectorize_related_mode (machine_mode,
++							scalar_mode,
++							poly_uint64);
++extern opt_machine_mode default_get_mask_mode (machine_mode);
+ extern bool default_empty_mask_is_expensive (unsigned);
+ extern void *default_init_cost (struct loop *);
+ extern unsigned default_add_stmt_cost (void *, int, enum vect_cost_for_stmt,
+diff --git a/gcc/testsuite/g++.dg/opt/pr92317.C b/gcc/testsuite/g++.dg/opt/pr92317.C
+new file mode 100644
+index 00000000000..2bb9729fc96
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/opt/pr92317.C
+@@ -0,0 +1,51 @@
++// Copied from pr87967.C
++// { dg-do compile { target c++11 } }
++// { dg-options "-O2 -ftree-vectorize -fno-tree-pre --param vect-epilogues-nomask=1" }
++
++void h();
++template <typename b> struct k { using d = b; };
++template <typename b, template <typename> class> using e = k<b>;
++template <typename b, template <typename> class f>
++using g = typename e<b, f>::d;
++struct l {
++  template <typename i> using ab = typename i::j;
++};
++struct n : l {
++  using j = g<char *, ab>;
++};
++class o {
++public:
++  long r();
++};
++char m;
++char s() {
++  if (m)
++    return '0';
++  return 'A';
++}
++class t {
++public:
++  typedef char *ad;
++  ad m_fn2();
++};
++void fn3() {
++  char *a;
++  t b;
++  bool p = false;
++  while (*a) {
++    h();
++    o c;
++    if (*a)
++      a++;
++    if (c.r()) {
++      n::j q;
++      for (t::ad d = b.m_fn2(), e; d != e; d++) {
++        char f = *q;
++        *d = f + s();
++      }
++      p = true;
++    }
++  }
++  if (p)
++    throw;
++}
+diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr90883.C b/gcc/testsuite/g++.dg/tree-ssa/pr90883.C
+new file mode 100644
+index 00000000000..0e622f263d2
+--- /dev/null
++++ b/gcc/testsuite/g++.dg/tree-ssa/pr90883.C
+@@ -0,0 +1,20 @@
++// { dg-options "-O2 -Os -fdump-tree-dse-details -std=c++11 --param max-inline-insns-size=1" }
++
++
++    class C
++    {
++        char a[7]{};
++        int b{};
++    };
++
++    C slow()
++    {
++        return {};
++    }
++
++
++// We want to match enough here to capture that we deleted an empty
++// constructor store
++// aarch64 and mips will expand to loop to clear because CLEAR_RATIO.
++// { dg-final { scan-tree-dump "Deleted redundant store: .*\.a = {}" "dse1" { xfail { aarch64-*-* mips*-*-* } } } }
++
+diff --git a/gcc/testsuite/gcc.dg/pr92162.c b/gcc/testsuite/gcc.dg/pr92162.c
+new file mode 100644
+index 00000000000..ed82595a752
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/pr92162.c
+@@ -0,0 +1,10 @@
++/* { dg-do compile } */
++/* { dg-options "-Ofast" } */
++
++short int s8;
++
++void __attribute__ ((simd))
++gn (void)
++{
++  s8 = 0;
++}
+diff --git a/gcc/testsuite/gcc.dg/torture/pr91896.c b/gcc/testsuite/gcc.dg/torture/pr91896.c
+new file mode 100644
+index 00000000000..e728538bb9a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr91896.c
+@@ -0,0 +1,18 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++unsigned int
++zj (unsigned int et)
++{
++  signed char jr = 0;
++
++  do {
++    et *= 3;
++    jr += 2;
++  } while (jr >= 0);
++
++  if (et == (unsigned int) jr)
++    et = 0;
++
++  return et;
++}
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92069.c b/gcc/testsuite/gcc.dg/torture/pr92069.c
+new file mode 100644
+index 00000000000..806ff5fba14
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92069.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++unsigned int a, c, d;
++double b;
++void e()
++{
++  for (; d; d++)
++    {
++      double f;
++      a = 2;
++      for (; a; a++)
++	{
++	  c = b;
++	  b = f;
++	  f = c;
++	}
++    }
++}
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92173.c b/gcc/testsuite/gcc.dg/torture/pr92173.c
+new file mode 100644
+index 00000000000..fcb3548b716
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92173.c
+@@ -0,0 +1,11 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++unsigned int
++yo (unsigned int o0, signed char s1)
++{
++  for (s1 = 0; s1 < 1; s1 -= 2)
++    o0 += o0;
++
++  return o0 + s1;
++}
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92241.c b/gcc/testsuite/gcc.dg/torture/pr92241.c
+new file mode 100644
+index 00000000000..331d03b3d44
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92241.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++int a, b;
++char c[2];
++void d() {
++  char e;
++  for (; b; b--) {
++    e = 0;
++    for (; e <= 1; e++)
++      a &= c[b + e] && 1;
++  }
++}
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92275.c b/gcc/testsuite/gcc.dg/torture/pr92275.c
+new file mode 100644
+index 00000000000..b9f70889758
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92275.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++unsigned long a, c;
++int *b, *b2;
++long d;
++
++void fn1()
++{
++  for (; b < b2; b++)
++    d += *b * c;
++  d *= a;
++}
+diff --git a/gcc/testsuite/gcc.dg/torture/pr92371.c b/gcc/testsuite/gcc.dg/torture/pr92371.c
+new file mode 100644
+index 00000000000..0c78d32f471
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/torture/pr92371.c
+@@ -0,0 +1,12 @@
++/* { dg-do compile } */
++/* { dg-additional-options "-ftree-vectorize" } */
++
++int a, b;
++void d()
++{
++  int c = sizeof(int);
++  for (; a; a++)
++    c *= sizeof(int);
++  c *= sizeof(int);
++  b = c;
++}
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c
+new file mode 100644
+index 00000000000..23a53bb4ad2
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-36.c
+@@ -0,0 +1,65 @@
++/* { dg-options "-O2 -fdump-tree-dse-details -fno-tree-fre" } */
++#include <string.h>
++#include <stdlib.h>
++
++struct X
++{
++  char mem0[10];
++  char mem1[10];
++};
++
++
++void blah (struct X);
++
++
++void
++foo1()
++{
++  struct X x = { };
++  memset (x.mem1, 0, sizeof x.mem1);
++  blah (x);
++}
++
++void
++foo2()
++{
++  struct X x = { };
++  x.mem1[5] = 0;
++  blah (x);
++}
++
++void
++bar1 ()
++{
++  struct X x;
++  memset (&x, 0, sizeof x);
++  memset (&x.mem1, 0, sizeof x.mem1);
++  blah (x);
++}
++void
++bar2 ()
++{
++  struct X x;
++  memset (&x, 0, sizeof x);
++  x.mem1[5] = 0;
++  blah (x);
++}
++
++void
++baz1 ()
++{
++  struct X *x = calloc (sizeof (struct X), 1);
++  memset (&x->mem1, 0, sizeof x->mem1);
++  blah (*x);
++}
++
++void
++baz2 ()
++{
++  struct X *x = calloc (sizeof (struct X), 1);
++  x->mem1[5] = 0;
++  blah (*x);
++}
++/* { dg-final { scan-tree-dump-times "Deleted redundant call" 3 "dse1" } } */
++/* { dg-final { scan-tree-dump-times "Deleted redundant store" 3 "dse1" } } */
++
+diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c b/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
+new file mode 100644
+index 00000000000..715c22ac6c6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-div-2.c
+@@ -0,0 +1,14 @@
++/* { dg-do compile } */
++
++int x[4], y[4], z[4];
++
++void
++f (void)
++{
++  x[0] += y[0] / z[0] * 2;
++  x[1] += y[1] / z[1] * 2;
++  x[2] += y[2] / z[2] * 2;
++  x[3] += y[3] / z[3] * 2;
++}
++
++/* { dg-final { scan-tree-dump "basic block vectorized" "slp2" { target vect_int } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
+index 85f9a02582f..813b1af089a 100644
+--- a/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
++++ b/gcc/testsuite/gcc.dg/vect/bb-slp-pr69907.c
+@@ -18,5 +18,6 @@ void foo(unsigned *p1, unsigned short *p2)
+ }
+ 
+ /* Disable for SVE because for long or variable-length vectors we don't
+-   get an unrolled epilogue loop.  */
+-/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64_sve } } } } */
++   get an unrolled epilogue loop.  Also disable for AArch64 Advanced SIMD,
++   because there we can vectorize the epilogue using mixed vector sizes.  */
++/* { dg-final { scan-tree-dump "BB vectorization with gaps at the end of a load is not supported" "slp1" { target { ! aarch64*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
+index 228190ab05d..877de4eb5be 100644
+--- a/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
++++ b/gcc/testsuite/gcc.dg/vect/fast-math-vect-call-1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ #include "tree-vect.h"
+ 
+ extern float copysignf (float, float);
+diff --git a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
+index 7a148e41d51..5f871289337 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
++++ b/gcc/testsuite/gcc.dg/vect/no-fast-math-vect16.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float_strict } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
+index 1d674504e2c..022d49f1175 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
++++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-noreassoc-slp-reduc-7.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
+index e4202b10d06..b5f8c3c88e4 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
++++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-outer-12.c
+@@ -46,4 +46,4 @@ int main (void)
+ }
+ 
+ /* Until we support multiple types in the inner loop  */
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED." 1 "vect" { xfail { ! aarch64*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
+index 50b4998bb6c..7049e4936b9 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
++++ b/gcc/testsuite/gcc.dg/vect/no-scevccp-vect-iv-3.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
+index c3b242157ce..d2ae7976781 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
++++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-31.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
+index 470bbfb5537..243e01e6dad 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
++++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-64.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
+index 805024d8058..e339590bacb 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
++++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-66.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
+index 726c0de652f..c403a8302d8 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
++++ b/gcc/testsuite/gcc.dg/vect/no-section-anchors-vect-68.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-skip-if "AArch64 tiny code model does not support programs larger than 1MiB" {aarch64_tiny} } */
+ /* { dg-add-options bind_pic_locally } */
+diff --git a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
+index 4513c40b34f..dcb53701795 100644
+--- a/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
++++ b/gcc/testsuite/gcc.dg/vect/no-vfa-vect-dv-2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/pr33804.c b/gcc/testsuite/gcc.dg/vect/pr33804.c
+index 86babbe60e7..0db13674b42 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr33804.c
++++ b/gcc/testsuite/gcc.dg/vect/pr33804.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/pr53773.c b/gcc/testsuite/gcc.dg/vect/pr53773.c
+index 0bcc021767e..7f8229571ec 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr53773.c
++++ b/gcc/testsuite/gcc.dg/vect/pr53773.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-additional-options "-fdump-tree-optimized" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65930-1.c b/gcc/testsuite/gcc.dg/vect/pr65930-1.c
+new file mode 100644
+index 00000000000..895fbf8869d
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/pr65930-1.c
+@@ -0,0 +1,26 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++
++unsigned __attribute__((noipa))
++bar (unsigned int *x)
++{
++  int sum = 4;
++  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
++  for (int i = 0; i < 16; ++i)
++    sum += x[i];
++  return sum;
++}
++
++int
++main()
++{
++  static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
++    = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
++  check_vect ();
++  if (bar (a) != 260)
++    abort ();
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65930-2.c b/gcc/testsuite/gcc.dg/vect/pr65930-2.c
+new file mode 100644
+index 00000000000..9cfb9b102d9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/pr65930-2.c
+@@ -0,0 +1,28 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "tree-vect.h"
++
++int __attribute__((noipa))
++bar (unsigned int *x, int n)
++{
++  int sum = 4;
++  x = __builtin_assume_aligned (x, __BIGGEST_ALIGNMENT__);
++  for (int i = 0; i < n; ++i)
++    sum += x[i*4+0]+ x[i*4 + 1] + x[i*4 + 2] + x[i*4 + 3];
++  return sum;
++}
++
++int
++main ()
++{
++  static int a[16] __attribute__((aligned(__BIGGEST_ALIGNMENT__)))
++    = { 1, 3, 5, 8, 9, 10, 17, 18, 23, 29, 30, 55, 42, 2, 3, 1 };
++  check_vect ();
++  if (bar (a, 4) != 260)
++    abort ();
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" } } */
++/* { dg-final { scan-tree-dump "Loop contains only SLP stmts" "vect" } } */
++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-1.c b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+index 879819d576a..9fc74a1ab28 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-1.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_condition } */
+ 
+ #include "tree-vect.h"
+@@ -41,5 +43,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-10.c b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+index f37aecab082..e4a1d9419c2 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-10.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-10.c
+@@ -42,6 +42,6 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-12.c b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
+index b84fd41bc63..a47f4146a29 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-12.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-12.c
+@@ -42,5 +42,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-13.c b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
+index e1d3ff52f5c..a703923151d 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-13.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-13.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_condition } */
+ 
+ #include "tree-vect.h"
+@@ -41,5 +43,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { xfail vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { xfail vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-14.c b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
+index 9f1e4e1eb6a..3b76fda2122 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-14.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-14.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_condition } */
+ 
+ #include "tree-vect.h"
+@@ -41,5 +43,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-2.c b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+index 18d33c436a5..58ba5f764d0 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-2.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-2.c
+@@ -42,5 +42,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-3.c b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+index 427abdb4140..6b4077e1a62 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-3.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-3.c
+@@ -52,5 +52,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-4.c b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+index 186e03a6346..471fbe2da21 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-4.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-4.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_condition } */
+ 
+ #include "tree-vect.h"
+@@ -41,6 +43,6 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 4 "vect" { target { ! vect_fold_extract_last } } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-5.c b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+index c91b648aa05..4e3f765cd0c 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-5.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-5.c
+@@ -53,5 +53,5 @@ main (void)
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target { ! vect_fold_extract_last } } } } */
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { xfail vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-6.c b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+index b072c8d33a2..dde96d7a553 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-6.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-6.c
+@@ -41,5 +41,5 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 4 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr65947-9.c b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
+index e43e0e473be..1f295306016 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr65947-9.c
++++ b/gcc/testsuite/gcc.dg/vect/pr65947-9.c
+@@ -48,5 +48,5 @@ main ()
+ /* { dg-final { scan-tree-dump-not "LOOP VECTORIZED" "vect" { target { ! vect_fold_extract_last } } } } */
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 1 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump "loop size is greater than data size" "vect" { target { ! vect_fold_extract_last } } } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 1 "vect" { target vect_fold_extract_last } } } */
+ /* { dg-final { scan-tree-dump-not "condition expression based on integer induction." "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-1.c b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
+index f2405198a10..cbb9a6ff69a 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr80631-1.c
++++ b/gcc/testsuite/gcc.dg/vect/pr80631-1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* PR tree-optimization/80631 */
+ 
+ #include "tree-vect.h"
+@@ -72,5 +74,5 @@ main ()
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target { { ! vect_fold_extract_last } && vect_condition } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr80631-2.c b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
+index b334ca2345b..61e11316af2 100644
+--- a/gcc/testsuite/gcc.dg/vect/pr80631-2.c
++++ b/gcc/testsuite/gcc.dg/vect/pr80631-2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* PR tree-optimization/80631 */
+ 
+ #include "tree-vect.h"
+@@ -72,5 +74,5 @@ main ()
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 5 "vect" { target vect_condition } } } */
+-/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 10 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
+-/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 10 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 5 "vect" { target vect_condition xfail vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 5 "vect" { target vect_fold_extract_last } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/pr92205.c b/gcc/testsuite/gcc.dg/vect/pr92205.c
+new file mode 100644
+index 00000000000..a031c1fe297
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/pr92205.c
+@@ -0,0 +1,13 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target vect_int } */
++
++int b(int n, unsigned char *a)
++{
++  int d = 0;
++  a = __builtin_assume_aligned (a, __BIGGEST_ALIGNMENT__);
++  for (int c = 0; c < n; ++c)
++    d |= a[c];
++  return d;
++}
++
++/* { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { xfail *-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-23.c b/gcc/testsuite/gcc.dg/vect/slp-23.c
+index 7d330c787d1..d7c67fe2c6e 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-23.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-23.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-25.c b/gcc/testsuite/gcc.dg/vect/slp-25.c
+index ff7eff202cb..1c33927c434 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-25.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-25.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-9.c b/gcc/testsuite/gcc.dg/vect/slp-9.c
+index d0c94f1986b..d5212dca3dd 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-9.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-9.c
+@@ -44,5 +44,5 @@ int main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } }*/
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target vect_widen_mult_hi_to_si } } } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
+index 07c96c00eb0..15dd59922fc 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
+index fc689e46ba1..f457c11aa3c 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-5.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
+index 88591c5bdcb..1fd15aa3c87 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-6.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c
+new file mode 100644
+index 00000000000..7d9255e48f2
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/slp-reduc-sad-2.c
+@@ -0,0 +1,31 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
++/* { dg-do compile } */
++/* { dg-require-effective-target vect_usad_char } */
++/* With AVX256 or more we do not pull off the trick eliding the epilogue.  */
++/* { dg-additional-options "-mprefer-avx128" { target { x86_64-*-* i?86-*-* } } } */
++
++typedef unsigned char uint8_t;
++int x264_pixel_sad_8x8( uint8_t *pix1, uint8_t *pix2, int i_stride_pix2 )
++{
++  int i_sum = 0;
++  for( int y = 0; y < 8; y++ )
++    {
++      i_sum += __builtin_abs( pix1[0] - pix2[0] );
++      i_sum += __builtin_abs( pix1[1] - pix2[1] );
++      i_sum += __builtin_abs( pix1[2] - pix2[2] );
++      i_sum += __builtin_abs( pix1[3] - pix2[3] );
++      i_sum += __builtin_abs( pix1[4] - pix2[4] );
++      i_sum += __builtin_abs( pix1[5] - pix2[5] );
++      i_sum += __builtin_abs( pix1[6] - pix2[6] );
++      i_sum += __builtin_abs( pix1[7] - pix2[7] );
++      pix1 += 16;
++      pix2 += i_stride_pix2;
++    }
++  return i_sum;
++}
++
++/* { dg-final { scan-tree-dump "vect_recog_sad_pattern: detected" "vect" } } */
++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" } } */
++/* { dg-final { scan-tree-dump-not "access with gaps requires scalar epilogue loop" "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+index f5fb63e19f1..e3bfee33348 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-half.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
+index 4460d59b5a1..abb10fde45b 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
+@@ -38,5 +38,5 @@ int main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
+index 6e72c4878c2..0756119afb4 100644
+--- a/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
++++ b/gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
+@@ -38,5 +38,5 @@ int main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi || vect_unpack } } } } */
+-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
++/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
+index 8a57eb69a91..f09c964fdc1 100644
+--- a/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
++++ b/gcc/testsuite/gcc.dg/vect/trapv-vect-reduc-4.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-103.c b/gcc/testsuite/gcc.dg/vect/vect-103.c
+index 4a9e1574eb0..2a4510482d4 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-103.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-103.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdlib.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-109.c b/gcc/testsuite/gcc.dg/vect/vect-109.c
+index 9a507105899..ac5d0827899 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-109.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-109.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-skip-if "" { vect_no_align } } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-119.c b/gcc/testsuite/gcc.dg/vect/vect-119.c
+index aa8c3002bff..29a9c51cd29 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-119.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-119.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-24.c b/gcc/testsuite/gcc.dg/vect/vect-24.c
+index cbff6c55fa4..fa4c0620d29 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-24.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-24.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-26.c b/gcc/testsuite/gcc.dg/vect/vect-26.c
+index 4f0472b5d0f..8a141f38400 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-26.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-26.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-27.c b/gcc/testsuite/gcc.dg/vect/vect-27.c
+index 590217feee7..ac86b21aceb 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-27.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-27.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-29.c b/gcc/testsuite/gcc.dg/vect/vect-29.c
+index 86ec2cc1ddf..bbd446dfe63 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-29.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-29.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-42.c b/gcc/testsuite/gcc.dg/vect/vect-42.c
+index a65b4a62276..086cbf20c0a 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-42.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-42.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-44.c b/gcc/testsuite/gcc.dg/vect/vect-44.c
+index 03ef2c0f671..f7f1fd28665 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-44.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-44.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ /* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-48.c b/gcc/testsuite/gcc.dg/vect/vect-48.c
+index bac6ef6b8dd..b29fe47635a 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-48.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-48.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-50.c b/gcc/testsuite/gcc.dg/vect/vect-50.c
+index c9500ca91e5..f43676896af 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-50.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-50.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ /* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-52.c b/gcc/testsuite/gcc.dg/vect/vect-52.c
+index 0343d9a24d1..c20a4be2ede 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-52.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-52.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-54.c b/gcc/testsuite/gcc.dg/vect/vect-54.c
+index 58201abe069..2b236e48e19 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-54.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-54.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-56.c b/gcc/testsuite/gcc.dg/vect/vect-56.c
+index 8060b05e781..c914126ece5 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-56.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-56.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-58.c b/gcc/testsuite/gcc.dg/vect/vect-58.c
+index 441af51860e..da4f9740e33 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-58.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-58.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-60.c b/gcc/testsuite/gcc.dg/vect/vect-60.c
+index 3b7477c96ab..121c503c63a 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-60.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-60.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-72.c b/gcc/testsuite/gcc.dg/vect/vect-72.c
+index 472d8d57549..9e8e91b7ae6 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-72.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-72.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
+index 42b2b8d91aa..a3fb5053037 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-75-big-array.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-75.c b/gcc/testsuite/gcc.dg/vect/vect-75.c
+index 2cdd7032242..88da97f0bb7 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-75.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-75.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
+index 56ee797d10b..fb3e4992782 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-77-alignchecks.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-77-global.c b/gcc/testsuite/gcc.dg/vect/vect-77-global.c
+index f0b73505d68..1580d6e075b 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-77-global.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-77-global.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
+index c3ef8a36591..57e8da0a909 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-78-alignchecks.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-78-global.c b/gcc/testsuite/gcc.dg/vect/vect-78-global.c
+index 241e7fa94b5..ea039b389b2 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-78-global.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-78-global.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
+index decfbee318a..59e1aae0017 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-89-big-array.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-89.c b/gcc/testsuite/gcc.dg/vect/vect-89.c
+index 051698eada2..356ab96d330 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-89.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-89.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-91.c b/gcc/testsuite/gcc.dg/vect/vect-91.c
+index 9430da3290a..91264d9841d 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-91.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-91.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "--param vect-max-peeling-for-alignment=0" } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-92.c b/gcc/testsuite/gcc.dg/vect/vect-92.c
+index b9a1ce23d02..9ceb0fbadcd 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-92.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-92.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-96.c b/gcc/testsuite/gcc.dg/vect/vect-96.c
+index 0cb935b9f16..c0d6c37b21d 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-96.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-96.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
+index c2b1c773047..3887120b747 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-1.c
+@@ -15,3 +15,5 @@ fn1 ()
+ }
+ 
+ /* { dg-final { scan-tree-dump "improved number of alias checks from \[0-9\]* to 1" "vect" } } */
++/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
+index 0e6285e4a23..b6cc309dbe8 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-10.c
+@@ -65,3 +65,6 @@ main (void)
+   FOR_EACH_TYPE (DO_TEST)
+   return 0;
+ }
++
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
+index a0d5abc3aa4..09a4ebfa69e 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-11.c
+@@ -95,3 +95,6 @@ main (void)
+ /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-24, 24\)} "vect" { target vect_double } } } */
+ /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* step[^ ]* \* 8[)]* is outside \(-32, 32\)} "vect" { target vect_double } } } */
+ /* { dg-final { scan-tree-dump {run-time check [^\n]* abs \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
++
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
+index 788cdfc3cdc..63a897f4bad 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-12.c
+@@ -95,3 +95,6 @@ main (void)
+ /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 24\)} "vect" { target vect_double } } } */
+ /* { dg-final { scan-tree-dump {no alias between [^\n]* when [^\n]* [_a-z][^ ]* \* 8[)]* is outside \[0, 32\)} "vect" { target vect_double } } } */
+ /* { dg-final { scan-tree-dump {run-time check [^\n]* unsigned \([^*]* \* 8[)]* >= 32} "vect" { target vect_double } } } */
++
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
+index 60bc4730724..812aa9027dd 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-13.c
+@@ -18,4 +18,6 @@ f2 (int *x, long step2, int n)
+ 
+ /* { dg-final { scan-tree-dump {need run-time check that [^\n]*step1[^\n]* is nonzero} "vect" } } */
+ /* { dg-final { scan-tree-dump-not {need run-time check that [^\n]*step2[^\n]* is nonzero} "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+ /* { dg-final { scan-tree-dump-times {LOOP VECTORIZED} 2 "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
+new file mode 100644
+index 00000000000..1d148a04918
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-14.c
+@@ -0,0 +1,64 @@
++#define N 200
++#define M 4
++
++typedef signed char sc;
++typedef unsigned char uc;
++typedef signed short ss;
++typedef unsigned short us;
++typedef int si;
++typedef unsigned int ui;
++typedef signed long long sll;
++typedef unsigned long long ull;
++
++#define FOR_EACH_TYPE(M) \
++  M (sc) M (uc) \
++  M (ss) M (us) \
++  M (si) M (ui) \
++  M (sll) M (ull) \
++  M (float) M (double)
++
++#define TEST_VALUE(I) ((I) * 17 / 2)
++
++#define ADD_TEST(TYPE)				\
++  void __attribute__((noinline, noclone))	\
++  test_##TYPE (TYPE *a, TYPE *b)		\
++  {						\
++    for (int i = 0; i < N; i += 2)		\
++      {						\
++	TYPE b0 = b[i + 0];			\
++	TYPE b1 = b[i + 1];			\
++	a[i + 0] = b0 + 2;			\
++	a[i + 1] = b1 + 3;			\
++      }						\
++  }
++
++#define DO_TEST(TYPE)						\
++  for (int j = 0; j < M; ++j)					\
++    {								\
++      TYPE a[N + M];						\
++      for (int i = 0; i < N + M; ++i)				\
++	a[i] = TEST_VALUE (i);					\
++      test_##TYPE (a + j, a);					\
++      for (int i = 0; i < N; i += 2)				\
++	{							\
++	  TYPE base1 = j == 0 ? TEST_VALUE (i) : a[i];		\
++	  TYPE base2 = j <= 1 ? TEST_VALUE (i + 1) : a[i + 1];	\
++	  if (a[i + j] != (TYPE) (base1 + 2)			\
++	      || a[i + j + 1] != (TYPE) (base2 + 3))		\
++	    __builtin_abort ();					\
++	}							\
++    }
++
++FOR_EACH_TYPE (ADD_TEST)
++
++int
++main (void)
++{
++  FOR_EACH_TYPE (DO_TEST)
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump-not {flags: [^\n]*ARBITRARY\n} "vect" } } */
++/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
+new file mode 100644
+index 00000000000..fbe3f8431ff
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-15.c
+@@ -0,0 +1,61 @@
++#define N 200
++#define DIST 32
++
++typedef signed char sc;
++typedef unsigned char uc;
++typedef signed short ss;
++typedef unsigned short us;
++typedef int si;
++typedef unsigned int ui;
++typedef signed long long sll;
++typedef unsigned long long ull;
++
++#define FOR_EACH_TYPE(M) \
++  M (sc) M (uc) \
++  M (ss) M (us) \
++  M (si) M (ui) \
++  M (sll) M (ull) \
++  M (float) M (double)
++
++#define ADD_TEST(TYPE)				\
++  void __attribute__((noinline, noclone))	\
++  test_##TYPE (TYPE *x, TYPE *y)		\
++  {						\
++    for (int i = 0; i < N; ++i)			\
++      {						\
++	x[i] = i;				\
++	y[i] = 42 - i * 2;			\
++      }						\
++  }
++
++#define DO_TEST(TYPE)						\
++  for (int i = 0; i < DIST * 2; ++i)				\
++    {								\
++      TYPE a[N + DIST * 2] = {};				\
++      test_##TYPE (a + DIST, a + i);				\
++      for (int j = 0; j < N + DIST * 2; ++j)			\
++	{							\
++	  TYPE expected = 0;					\
++	  if (i > DIST && j >= i && j < i + N)			\
++	    expected = 42 - (j - i) * 2;			\
++	  if (j >= DIST && j < DIST + N)			\
++	    expected = j - DIST;				\
++	  if (i <= DIST && j >= i && j < i + N)			\
++	    expected = 42 - (j - i) * 2;			\
++	  if (expected != a[j])					\
++	    __builtin_abort ();					\
++	}							\
++    }
++
++FOR_EACH_TYPE (ADD_TEST)
++
++int
++main (void)
++{
++  FOR_EACH_TYPE (DO_TEST)
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an address-based WAR/WAW test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
+new file mode 100644
+index 00000000000..81c252dfc23
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-16.c
+@@ -0,0 +1,66 @@
++#define N 200
++#define DIST 32
++
++typedef signed char sc;
++typedef unsigned char uc;
++typedef signed short ss;
++typedef unsigned short us;
++typedef int si;
++typedef unsigned int ui;
++typedef signed long long sll;
++typedef unsigned long long ull;
++
++#define FOR_EACH_TYPE(M) \
++  M (sc) M (uc) \
++  M (ss) M (us) \
++  M (si) M (ui) \
++  M (sll) M (ull) \
++  M (float) M (double)
++
++#define TEST_VALUE(I) ((I) * 13 / 2)
++
++#define ADD_TEST(TYPE)				\
++  TYPE __attribute__((noinline, noclone))	\
++  test_##TYPE (TYPE *x, TYPE *y)		\
++  {						\
++    TYPE res = 0;				\
++    for (int i = 0; i < N; ++i)			\
++      {						\
++	x[i] = i;				\
++	res += y[i];				\
++      }						\
++    return res;					\
++  }
++
++#define DO_TEST(TYPE)						\
++  for (int i = 0; i < DIST * 2; ++i)				\
++    {								\
++      TYPE a[N + DIST * 2];					\
++      for (int j = 0; j < N + DIST * 2; ++j)			\
++	a[j] = TEST_VALUE (j);					\
++      TYPE res = test_##TYPE (a + DIST, a + i);			\
++      for (int j = 0; j < N; ++j)				\
++	if (a[j + DIST] != (TYPE) j)				\
++	  __builtin_abort ();					\
++      TYPE expected_res = 0;					\
++      for (int j = i; j < i + N; ++j)				\
++	if (i <= DIST && j >= DIST && j < DIST + N)		\
++	  expected_res += j - DIST;				\
++	else							\
++	  expected_res += TEST_VALUE (j);			\
++      if (expected_res != res)					\
++	__builtin_abort ();					\
++    }
++
++FOR_EACH_TYPE (ADD_TEST)
++
++int
++main (void)
++{
++  FOR_EACH_TYPE (DO_TEST)
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c
+new file mode 100644
+index 00000000000..c49c497c2d0
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-17.c
+@@ -0,0 +1,15 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target vect_load_lanes } */
++
++struct s { int x[100]; };
++
++void
++f (struct s *s1, int a, int b)
++{
++  for (int i = 0; i < 32; ++i)
++    s1->x[a + i] = s1->x[b + i * 2] + s1->x[b + i * 3];
++}
++
++/* { dg-final { scan-tree-dump {flags: *[^\n]*MIXED_STEPS} "vect" } } */
++/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
+new file mode 100644
+index 00000000000..9d0739151d9
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-18.c
+@@ -0,0 +1,64 @@
++#define N 200
++#define DIST 32
++
++typedef signed char sc;
++typedef unsigned char uc;
++typedef signed short ss;
++typedef unsigned short us;
++typedef int si;
++typedef unsigned int ui;
++typedef signed long long sll;
++typedef unsigned long long ull;
++
++#define FOR_EACH_TYPE(M) \
++  M (sc) M (uc) \
++  M (ss) M (us) \
++  M (si) M (ui) \
++  M (sll) M (ull) \
++  M (float) M (double)
++
++#define TEST_VALUE(I) ((I) * 11 / 2)
++
++#define ADD_TEST(TYPE)				\
++  TYPE a_##TYPE[N * 2];				\
++  void __attribute__((noinline, noclone))	\
++  test_##TYPE (int x, int y)			\
++  {						\
++    for (int i = 0; i < N; ++i)			\
++      a_##TYPE[x - i] += a_##TYPE[y - i];	\
++  }
++
++#define DO_TEST(TYPE)						\
++  for (int i = 0; i < DIST * 2; ++i)				\
++    {								\
++      for (int j = 0; j < N + DIST * 2; ++j)			\
++	a_##TYPE[j] = TEST_VALUE (j);				\
++      test_##TYPE (i + N - 1, DIST + N - 1);			\
++      for (int j = 0; j < N + DIST * 2; ++j)			\
++	{							\
++	  TYPE expected;					\
++	  if (j < i || j >= i + N)				\
++	    expected = TEST_VALUE (j);				\
++	  else if (i >= DIST)					\
++	    expected = ((TYPE) TEST_VALUE (j)			\
++			+ (TYPE) TEST_VALUE (j + DIST - i));	\
++	  else							\
++	    expected = ((TYPE) TEST_VALUE (j)			\
++			+ a_##TYPE[j + DIST - i]);		\
++	  if (expected != a_##TYPE[j])				\
++	    __builtin_abort ();					\
++	}							\
++    }
++
++FOR_EACH_TYPE (ADD_TEST)
++
++int
++main (void)
++{
++  FOR_EACH_TYPE (DO_TEST)
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
+new file mode 100644
+index 00000000000..7c0ff36a8c4
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-19.c
+@@ -0,0 +1,62 @@
++#define N 200
++#define DIST 32
++
++typedef signed char sc;
++typedef unsigned char uc;
++typedef signed short ss;
++typedef unsigned short us;
++typedef int si;
++typedef unsigned int ui;
++typedef signed long long sll;
++typedef unsigned long long ull;
++
++#define FOR_EACH_TYPE(M) \
++  M (sc) M (uc) \
++  M (ss) M (us) \
++  M (si) M (ui) \
++  M (sll) M (ull) \
++  M (float) M (double)
++
++#define ADD_TEST(TYPE)				\
++  TYPE a_##TYPE[N * 2];				\
++  void __attribute__((noinline, noclone))	\
++  test_##TYPE (int x, int y)			\
++  {						\
++    for (int i = 0; i < N; ++i)			\
++      {						\
++	a_##TYPE[i + x] = i;			\
++	a_##TYPE[i + y] = 42 - i * 2;		\
++      }						\
++  }
++
++#define DO_TEST(TYPE)						\
++  for (int i = 0; i < DIST * 2; ++i)				\
++    {								\
++      __builtin_memset (a_##TYPE, 0, sizeof (a_##TYPE));	\
++      test_##TYPE (DIST, i);					\
++      for (int j = 0; j < N + DIST * 2; ++j)			\
++	{							\
++	  TYPE expected = 0;					\
++	  if (i > DIST && j >= i && j < i + N)			\
++	    expected = 42 - (j - i) * 2;			\
++	  if (j >= DIST && j < DIST + N)			\
++	    expected = j - DIST;				\
++	  if (i <= DIST && j >= i && j < i + N)			\
++	    expected = 42 - (j - i) * 2;			\
++	  if (expected != a_##TYPE[j])				\
++	    __builtin_abort ();					\
++	}							\
++    }
++
++FOR_EACH_TYPE (ADD_TEST)
++
++int
++main (void)
++{
++  FOR_EACH_TYPE (DO_TEST)
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump {flags: *WAW\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
+new file mode 100644
+index 00000000000..8a699ebfda8
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-20.c
+@@ -0,0 +1,66 @@
++#define N 200
++#define DIST 32
++
++typedef signed char sc;
++typedef unsigned char uc;
++typedef signed short ss;
++typedef unsigned short us;
++typedef int si;
++typedef unsigned int ui;
++typedef signed long long sll;
++typedef unsigned long long ull;
++
++#define FOR_EACH_TYPE(M) \
++  M (sc) M (uc) \
++  M (ss) M (us) \
++  M (si) M (ui) \
++  M (sll) M (ull) \
++  M (float) M (double)
++
++#define TEST_VALUE(I) ((I) * 11 / 2)
++
++#define ADD_TEST(TYPE)				\
++  TYPE a_##TYPE[N * 2];				\
++  TYPE __attribute__((noinline, noclone))	\
++  test_##TYPE (int x, int y)			\
++  {						\
++    TYPE res = 0;				\
++    for (int i = 0; i < N; ++i)			\
++      {						\
++	a_##TYPE[i + x] = i;			\
++	res += a_##TYPE[i + y];			\
++      }						\
++    return res;					\
++  }
++
++#define DO_TEST(TYPE)						\
++  for (int i = 0; i < DIST * 2; ++i)				\
++    {								\
++      for (int j = 0; j < N + DIST * 2; ++j)			\
++	a_##TYPE[j] = TEST_VALUE (j);				\
++      TYPE res = test_##TYPE (DIST, i);				\
++      for (int j = 0; j < N; ++j)				\
++	if (a_##TYPE[j + DIST] != (TYPE) j)			\
++	  __builtin_abort ();					\
++      TYPE expected_res = 0;					\
++      for (int j = i; j < i + N; ++j)				\
++	if (i <= DIST && j >= DIST && j < DIST + N)		\
++	  expected_res += j - DIST;				\
++	else							\
++	  expected_res += TEST_VALUE (j);			\
++      if (expected_res != res)					\
++	__builtin_abort ();					\
++    }
++
++FOR_EACH_TYPE (ADD_TEST)
++
++int
++main (void)
++{
++  FOR_EACH_TYPE (DO_TEST)
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump {flags: *RAW\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an index-based overlap test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
+index 0569ca487b5..7e5df138999 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-8.c
+@@ -58,3 +58,7 @@ main (void)
+   FOR_EACH_TYPE (DO_TEST)
+   return 0;
+ }
++
++/* { dg-final { scan-tree-dump {flags: *WAR\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an index-based WAR/WAW test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an address-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
+index 5685bfee576..a7fc1fcebbb 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-alias-check-9.c
+@@ -17,7 +17,7 @@ typedef unsigned long long ull;
+   M (sll) M (ull) \
+   M (float) M (double)
+ 
+-#define TEST_VALUE(I) ((I) * 5 / 2)
++#define TEST_VALUE(I) ((I) * 17 / 2)
+ 
+ #define ADD_TEST(TYPE)				\
+   void __attribute__((noinline, noclone))	\
+@@ -51,3 +51,7 @@ main (void)
+   FOR_EACH_TYPE (DO_TEST)
+   return 0;
+ }
++
++/* { dg-final { scan-tree-dump {flags: [^\n]*ARBITRARY\n} "vect" { target vect_int } } } */
++/* { dg-final { scan-tree-dump "using an address-based overlap test" "vect" } } */
++/* { dg-final { scan-tree-dump-not "using an index-based" "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
+index 3c98b07e425..d29b352b832 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16.c
+@@ -1,4 +1,4 @@
+-/* { dg-require-effective-target vect_bswap } */
++/* { dg-additional-options "-msse4" { target sse4_runtime } } */
+ 
+ #include "tree-vect.h"
+ 
+@@ -39,4 +39,4 @@ main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_bswap || sse4_runtime } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c
+new file mode 100644
+index 00000000000..730dc4e8352
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-bswap16a.c
+@@ -0,0 +1,5 @@
++/* { dg-additional-options "-msse2 -mno-sse3" { target sse2_runtime } } */
++
++#include "vect-bswap16.c"
++
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_shift } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
+new file mode 100644
+index 00000000000..bb99b95eca5
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-3.c
+@@ -0,0 +1,47 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
++/* { dg-require-effective-target vect_condition } */
++/* { dg-require-effective-target vect_float } */
++
++#include "tree-vect.h"
++
++extern void abort (void) __attribute__ ((noreturn));
++
++#define N 27
++
++/* Condition reduction with different types.  */
++
++int
++condition_reduction (float *a, float min_v)
++{
++  int last = 0;
++
++  for (int i = 0; i < N; i++)
++    if (a[i] < min_v)
++      last = i;
++
++  return last;
++}
++
++int
++main (void)
++{
++  float a[N] = {
++  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
++  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
++  21, 22, 23, 24, 25, 26, 27
++  };
++
++  check_vect ();
++
++  int ret = condition_reduction (a, 10);
++  if (ret != 18)
++    abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
++
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
+new file mode 100644
+index 00000000000..8820075b1dc
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-cond-reduc-4.c
+@@ -0,0 +1,47 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
++/* { dg-require-effective-target vect_condition } */
++/* { dg-require-effective-target vect_double } */
++
++#include "tree-vect.h"
++
++extern void abort (void) __attribute__ ((noreturn));
++
++#define N 27
++
++/* Condition reduction with different types.  */
++
++int
++condition_reduction (double *a, double min_v)
++{
++  int last = 0;
++
++  for (int i = 0; i < N; i++)
++    if (a[i] < min_v)
++      last = i;
++
++  return last;
++}
++
++int
++main (void)
++{
++  double a[N] = {
++  11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
++  1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
++  21, 22, 23, 24, 25, 26, 27
++  };
++
++  check_vect ();
++
++  int ret = condition_reduction (a, 10);
++  if (ret != 18)
++    abort ();
++
++  return 0;
++}
++
++/* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" } } */
++/* { dg-final { scan-tree-dump-times "optimizing condition reduction with FOLD_EXTRACT_LAST" 2 "vect" { target vect_fold_extract_last } } } */
++/* { dg-final { scan-tree-dump-times "condition expression based on integer induction." 2 "vect" { target { ! vect_fold_extract_last } } } } */
++
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
+index 0ba33895592..079704cee81 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-double-reduc-5.c
+@@ -52,5 +52,5 @@ int main ()
+ 
+ /* Vectorization of loops with multiple types and double reduction is not 
+    supported yet.  */       
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
+       
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-epilogues.c b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
+new file mode 100644
+index 00000000000..946666e918f
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-epilogues.c
+@@ -0,0 +1,19 @@
++/* { dg-do compile } */
++
++/* Copied from PR 88915.  */
++void pixel_avg( unsigned char *dst, int i_dst_stride,
++                               unsigned char *src1, int i_src1_stride,
++                               unsigned char *src2, int i_src2_stride,
++                               int i_width, int i_height )
++ {
++     for( int y = 0; y < i_height; y++ )
++     {
++         for( int x = 0; x < i_width; x++ )
++             dst[x] = ( src1[x] + src2[x] + 1 ) >> 1;
++         dst += i_dst_stride;
++         src1 += i_src1_stride;
++         src2 += i_src2_stride;
++     }
++ }
++
++/* { dg-final { scan-tree-dump "LOOP EPILOGUE VECTORIZED" "vect" { xfail { arm*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-1.c
+index e170875d7ab..f628c5d3998 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-1.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-tree-scev-cprop" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-2.c
+index a6daa61829e..19d8c22859e 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-2.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_long } */
+ /* { dg-require-effective-target vect_shift } */
+ /* { dg-additional-options "-fno-tree-scev-cprop" } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-3.c
+index 3ffa5166f45..8f5ccb27365 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-3.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-3.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-4.c b/gcc/testsuite/gcc.dg/vect/vect-live-4.c
+index 21cc27320ac..553ffcd49f7 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-4.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-4.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
+index aff37c100f0..965437c8f03 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-tree-scev-cprop" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
+index 35689665b54..0d2f17f9003 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-tree-scev-cprop" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
+index 854116fa36e..a3f60f6ce6d 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-live-slp-3.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_long } */
+ /* { dg-additional-options "-fno-tree-scev-cprop" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
+index 18bf5e80917..1f82121df06 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
+index 43887865bf4..b0f74083f2b 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
+index b47a93ab326..864b17ac640 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options double_vectors } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
+index 13238dbe2f9..e65a092f5bf 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4e.c
+@@ -23,4 +23,4 @@ foo (){
+   return;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
+index d1fbe346a48..a88014a2fbf 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4f.c
+@@ -65,4 +65,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
+index d1fbe346a48..a88014a2fbf 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4g.c
+@@ -65,4 +65,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
+index d1fbe346a48..a88014a2fbf 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4k.c
+@@ -65,4 +65,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
+index d1fbe346a48..4f95c652ee3 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-outer-4l.c
+@@ -65,4 +65,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "OUTER LOOP VECTORIZED" 1 "vect" { xfail { ! aarch64*-*-* } } } }*/
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c
+new file mode 100644
+index 00000000000..f26d4220532
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-outer-call-1.c
+@@ -0,0 +1,22 @@
++/* { dg-do compile } */
++/* { dg-require-effective-target vect_float } */
++/* { dg-additional-options "-fno-math-errno" } */
++
++void
++foo (float * __restrict x, float *y, int n, int m)
++{
++  if (m > 0)
++    for (int i = 0; i < n; ++i)
++      {
++	float tem = x[i], tem1;
++	for (int j = 0; j < m; ++j)
++	  {
++	    tem += y[j];
++	    tem1 = tem;
++	    tem = __builtin_sqrtf (tem);
++	  }
++	x[i] = tem - tem1;
++      }
++}
++
++/* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" { target { vect_call_sqrtf } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c
+new file mode 100644
+index 00000000000..cc23c6b0866
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1-epilogue.c
+@@ -0,0 +1,3 @@
++/* { dg-require-effective-target vect_int } */
++
++#include "vect-peel-1-src.c"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c
+new file mode 100644
+index 00000000000..7980d4dd643
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1-src.c
+@@ -0,0 +1,48 @@
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 128
++
++int ib[N+7];
++
++__attribute__ ((noinline))
++int main1 ()
++{
++  int i;
++  int ia[N+1];
++
++  /* All the accesses are misaligned. With cost model disabled, we
++     count the number of aligned accesses for each peeling option, and
++     in this case we align the two loads if possible (i.e., if
++     misaligned stores are supported).  */
++  for (i = 1; i <= N; i++)
++    {
++      ia[i] = ib[i+2] + ib[i+6];
++    }
++
++  /* check results:  */
++  for (i = 1; i <= N; i++)
++    {
++      if (ia[i] != ib[i+2] + ib[i+6])
++        abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{ 
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i <= N+6; i++)
++    {
++      asm volatile ("" : "+r" (i));
++      ib[i] = i;
++    }
++
++  return main1 ();
++}
++
++
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
+index fae99ab0b08..a7660a381c4 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-1.c
+@@ -1,51 +1,8 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+-#include <stdarg.h>
+-#include "tree-vect.h"
+-
+-#define N 128
+-
+-int ib[N+7];
+-
+-__attribute__ ((noinline))
+-int main1 ()
+-{
+-  int i;
+-  int ia[N+1];
+-
+-  /* All the accesses are misaligned. With cost model disabled, we
+-     count the number of aligned accesses for each peeling option, and
+-     in this case we align the two loads if possible (i.e., if
+-     misaligned stores are supported).  */
+-  for (i = 1; i <= N; i++)
+-    {
+-      ia[i] = ib[i+2] + ib[i+6];
+-    }
+-
+-  /* check results:  */
+-  for (i = 1; i <= N; i++)
+-    {
+-      if (ia[i] != ib[i+2] + ib[i+6])
+-        abort ();
+-    }
+-
+-  return 0;
+-}
+-
+-int main (void)
+-{ 
+-  int i;
+-
+-  check_vect ();
+-
+-  for (i = 0; i <= N+6; i++)
+-    {
+-      asm volatile ("" : "+r" (i));
+-      ib[i] = i;
+-    }
+-
+-  return main1 ();
+-}
++#include "vect-peel-1-src.c"
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect" { target { { vect_element_align } && { vect_aligned_arrays } } xfail { ! vect_unaligned_possible } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c
+new file mode 100644
+index 00000000000..8af0fcdca0e
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3-epilogue.c
+@@ -0,0 +1,4 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-add-options bind_pic_locally } */
++
++#include "vect-peel-3-src.c"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c
+new file mode 100644
+index 00000000000..a21ce8c3d6a
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3-src.c
+@@ -0,0 +1,58 @@
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#if VECTOR_BITS > 128
++#define NINTS (VECTOR_BITS / 32)
++#define EXTRA (NINTS * 2)
++#else
++#define NINTS 4
++#define EXTRA 10
++#endif
++
++#define N 128
++
++#define RES_A (N * N / 4)
++#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
++#define RES_C (N * (N + 1) / 2 + (N + 1))
++#define RES (RES_A + RES_B + RES_C)
++
++int ib[N + EXTRA];
++int ia[N + EXTRA];
++int ic[N + EXTRA];
++
++__attribute__ ((noinline))
++int main1 ()
++{
++  int i, suma = 0, sumb = 0, sumc = 0;
++
++  /* ib and ic have same misalignment, we peel to align them.  */
++  for (i = 0; i <= N; i++)
++    {
++      suma += ia[i];
++      sumb += ib[i + NINTS + 1];
++      sumc += ic[i + 1];
++    }
++
++  /* check results:  */
++  if (suma + sumb + sumc != RES)
++    abort ();
++
++  return 0;
++}
++
++int main (void)
++{
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i < N + EXTRA; i++)
++    {
++      asm volatile ("" : "+r" (i));
++      ib[i] = i;
++      ic[i] = i+2;
++      ia[i] = i/2;
++    }
++
++  return main1 ();
++}
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
+index d5c0cf10ce1..2cd99573fd1 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-3.c
+@@ -1,64 +1,9 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+-#include <stdarg.h>
+-#include "tree-vect.h"
+-
+-#if VECTOR_BITS > 128
+-#define NINTS (VECTOR_BITS / 32)
+-#define EXTRA (NINTS * 2)
+-#else
+-#define NINTS 4
+-#define EXTRA 10
+-#endif
+-
+-#define N 128
+-
+-#define RES_A (N * N / 4)
+-#define RES_B (N * (N + 1) / 2 + (NINTS + 3) * (N + 1))
+-#define RES_C (N * (N + 1) / 2 + (N + 1))
+-#define RES (RES_A + RES_B + RES_C)
+-
+-int ib[N + EXTRA];
+-int ia[N + EXTRA];
+-int ic[N + EXTRA];
+-
+-__attribute__ ((noinline))
+-int main1 ()
+-{
+-  int i, suma = 0, sumb = 0, sumc = 0;
+-
+-  /* ib and ic have same misalignment, we peel to align them.  */
+-  for (i = 0; i <= N; i++)
+-    {
+-      suma += ia[i];
+-      sumb += ib[i + NINTS + 1];
+-      sumc += ic[i + 1];
+-    }
+-
+-  /* check results:  */
+-  if (suma + sumb + sumc != RES)
+-    abort ();
+-
+-  return 0;
+-}
+-
+-int main (void)
+-{
+-  int i;
+-
+-  check_vect ();
+-
+-  for (i = 0; i < N + EXTRA; i++)
+-    {
+-      asm volatile ("" : "+r" (i));
+-      ib[i] = i;
+-      ic[i] = i+2;
+-      ia[i] = i/2;
+-    }
+-
+-  return main1 ();
+-}
++#include "vect-peel-3-src.c"
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
+ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect"  { xfail { { ! vect_unaligned_possible } || vect_sizes_32B_16B } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c
+new file mode 100644
+index 00000000000..783982f04f6
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4-epilogue.c
+@@ -0,0 +1,4 @@
++/* { dg-require-effective-target vect_int } */
++/* { dg-add-options bind_pic_locally } */
++
++#include "vect-peel-4-src.c"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c
+new file mode 100644
+index 00000000000..33088fb0902
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4-src.c
+@@ -0,0 +1,45 @@
++#include <stdarg.h>
++#include "tree-vect.h"
++
++#define N 128
++
++int ib[N+7];
++int ia[N+1];
++
++__attribute__ ((noinline))
++int main1 ()
++{
++  int i;
++
++  /* Don't peel keeping one load and the store aligned.  */
++  for (i = 0; i <= N; i++)
++    {
++      ia[i] = ib[i] + ib[i+5];
++    }
++
++  /* check results:  */
++  for (i = 1; i <= N; i++)
++    {
++      if (ia[i] != ib[i] + ib[i+5])
++        abort ();
++    }
++
++  return 0;
++}
++
++int main (void)
++{ 
++  int i;
++
++  check_vect ();
++
++  for (i = 0; i <= N+6; i++)
++    {
++      asm volatile ("" : "+r" (i));
++      ib[i] = i;
++    }
++
++  return main1 ();
++}
++
++
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
+index 88f9f0ddcba..3b5272f284f 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-peel-4.c
+@@ -1,49 +1,9 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-add-options bind_pic_locally } */
+ 
+-#include <stdarg.h>
+-#include "tree-vect.h"
+-
+-#define N 128
+-
+-int ib[N+7];
+-int ia[N+1];
+-
+-__attribute__ ((noinline))
+-int main1 ()
+-{
+-  int i;
+-
+-  /* Don't peel keeping one load and the store aligned.  */
+-  for (i = 0; i <= N; i++)
+-    {
+-      ia[i] = ib[i] + ib[i+5];
+-    }
+-
+-  /* check results:  */
+-  for (i = 1; i <= N; i++)
+-    {
+-      if (ia[i] != ib[i] + ib[i+5])
+-        abort ();
+-    }
+-
+-  return 0;
+-}
+-
+-int main (void)
+-{ 
+-  int i;
+-
+-  check_vect ();
+-
+-  for (i = 0; i <= N+6; i++)
+-    {
+-      asm volatile ("" : "+r" (i));
+-      ib[i] = i;
+-    }
+-
+-  return main1 ();
+-}
++#include "vect-peel-4-src.c"
+ 
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail { vect_no_align && { ! vect_hw_misalign } } } } } */
+ /* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 1 "vect"  { xfail { ! vect_unaligned_possible } } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
+index e246ae7f3c6..c40f8625b84 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char-big-array.c
+@@ -62,4 +62,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
+index 5f0551ee372..dd3045502f1 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2char.c
+@@ -46,4 +46,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
+index 02c2bee8612..1a2d8d04f4e 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-2short.c
+@@ -45,4 +45,4 @@ int main (void)
+   return 0;
+ }
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
+index ad148046a8e..cc0d9694a4f 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-6.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_float_strict } */
+ /* { dg-additional-options "-fno-fast-math" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
+index 171451872e5..ffbc9706901 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s16a.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
+index ac674749b6f..05e343ad782 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
+ /* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
+index b036ad5b0b4..e0f47d8a4f2 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8b.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+@@ -12,12 +14,6 @@ signed char Y[N] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
+ 
+ /* char->short->short dot product.
+    The dot-product pattern should be detected.
+-   The reduction is currently not vectorized becaus of the signed->unsigned->signed
+-   casts, since this patch:
+-
+-     2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
+-                                                                                                
+-        PR tree-optimization/25125
+ 
+    When the dot-product is detected, the loop should be vectorized on vect_sdot_qi 
+    targets (targets that support dot-product of signed char).  
+@@ -60,5 +56,5 @@ int main (void)
+ /* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
+ /* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
+ 
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
+index 57e18040cf2..0fc112012cf 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16b.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
+index d020f643bb8..e23ebd9b072 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-require-effective-target arm_v8_2a_dotprod_neon_hw { target { aarch64*-*-* || arm*-*-* } } } */
+ /* { dg-additional-options "-march=armv8.2-a+dotprod" { target { aarch64*-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
+index 3155d97b3cd..288be13440d 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
+new file mode 100644
+index 00000000000..dc5704f5607
+--- /dev/null
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-epilogue-gaps.c
+@@ -0,0 +1,45 @@
++/* { dg-options "-O3 -fno-vect-cost-model" } */
++struct {
++    float real;
++    float img;
++} g[11];
++
++float __attribute__ ((noclone))
++foo_11 (void)
++{
++  float sum = 0.0;
++  for (int i = 0; i < 11; ++i)
++    sum += g[i].real;
++  return sum;
++}
++
++float __attribute__ ((noclone))
++foo_10 (void)
++{
++  float sum = 0.0;
++  for (int i = 0; i < 10; ++i)
++    sum += g[i].real;
++  return sum;
++}
++
++int main (void)
++{
++  float check_10 = 0.0;
++  float check_11 = 0.0;
++  for (int i = 0; i < 11; ++i)
++    {
++      asm volatile ("" : : : "memory");
++      g[i].real = (float) i;
++      g[i].img = (float) -i;
++      if (i < 10)
++	check_10 += (float) i;
++      check_11 += (float) i;
++    }
++
++  if (foo_10 () != check_10)
++    __builtin_abort ();
++  if (foo_11 () != check_11)
++    __builtin_abort ();
++
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
+index b06b234072b..1ddbe96ebc3 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1a.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
+index be03c7d011d..7ae2c838344 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1b-big-array.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
+index c30c85ce911..91ce0ef934e 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-1c-big-array.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
+index a98edd3045a..2190eaa6242 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2a.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
+index 570e56a8c9b..6ad645b3bdd 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2b-big-array.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
+index 8190622d5d7..71df5741e16 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-pattern-2c.c
+@@ -21,6 +21,8 @@ foo ()
+      2005-12-26  Kazu Hirata  <kazu@codesourcery.com>
+                                                                                 
+         PR tree-optimization/25125
++
++     but we still handle the reduction.
+    */
+ 
+   for (i = 0; i < N; i++)
+@@ -43,5 +45,4 @@ main (void)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "vect_recog_widen_sum_pattern: detected" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
+-/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 0 "vect" { target { ! vect_widen_sum_qi_to_hi } } } } */
++/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
+index a033a7d27d1..2f0bb692564 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-reduc-sad.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_usad_char } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
+index b912a3431f7..e5bbeaede09 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-tail-nomask-1.c
+@@ -106,4 +106,4 @@ main (int argc, const char **argv)
+ }
+ 
+ /* { dg-final { scan-tree-dump-times "LOOP VECTORIZED" 2 "vect" { target avx2_runtime } } } */
+-/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(VS=16\\)" 2 "vect" { target avx2_runtime } } } */
++/* { dg-final { scan-tree-dump-times "LOOP EPILOGUE VECTORIZED \\(MODE=V16QI\\)" 2 "vect" { target avx2_runtime } } } */
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
+index 89f983cad06..4c95dd20179 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-s16.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-ipa-icf" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
+index e319699cd92..4075f815cea 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-const-u16.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-ipa-icf" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+index ee0538c0635..c4ac88e186d 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half-u8.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ /* { dg-additional-options "-fno-ipa-icf" } */
+ 
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+index 6d74c693316..ebbf4f5e841 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-half.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include "tree-vect.h"
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
+index 942f63d6f31..2e28baae0b8 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
+index 98f78d3b37a..d277f0b2b94 100644
+--- a/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
++++ b/gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8-s16-s32.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
+index 176f183f3ce..6fc7a282351 100644
+--- a/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
++++ b/gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-require-effective-target vect_int } */
+ 
+ #include <stdarg.h>
+diff --git a/gcc/testsuite/gcc.dg/vshift-5.c b/gcc/testsuite/gcc.dg/vshift-5.c
+index daa5f1c5cd8..62e6328cb28 100644
+--- a/gcc/testsuite/gcc.dg/vshift-5.c
++++ b/gcc/testsuite/gcc.dg/vshift-5.c
+@@ -40,6 +40,42 @@ f2 (void)
+   a[3] = a3;
+ }
+ 
++__attribute__((noinline, noclone)) void
++f2a (int x)
++{
++  long long a0, a1, a2, a3;
++  a0 = a[0];
++  a1 = a[1];
++  a2 = a[2];
++  a3 = a[3];
++  a0 = a0 << x;
++  a1 = a1 << 2;
++  a2 = a2 << 2;
++  a3 = a3 << 2;
++  a[0] = a0;
++  a[1] = a1;
++  a[2] = a2;
++  a[3] = a3;
++}
++
++__attribute__((noinline, noclone)) void
++f2b (int x)
++{
++  long long a0, a1, a2, a3;
++  a0 = a[0];
++  a1 = a[1];
++  a2 = a[2];
++  a3 = a[3];
++  a0 = a0 << 2;
++  a1 = a1 << 2;
++  a2 = a2 << x;
++  a3 = a3 << 2;
++  a[0] = a0;
++  a[1] = a1;
++  a[2] = a2;
++  a[3] = a3;
++}
++
+ __attribute__((noinline, noclone)) void
+ f3 (int x)
+ {
+@@ -77,5 +113,13 @@ main ()
+   if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
+       || a[2] != (2LL << 9) || a[3] != (1LL << 10))
+     abort ();
++  f2a (3);
++  if (a[0] != (4LL << 10) || a[1] != (3LL << 10)
++      || a[2] != (2LL << 11) || a[3] != (1LL << 12))
++    abort ();
++  f2b (3);
++  if (a[0] != (4LL << 12) || a[1] != (3LL << 12)
++      || a[2] != (2LL << 14) || a[3] != (1LL << 14))
++    abort ();
+   return 0;
+ }
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c b/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c
+new file mode 100644
+index 00000000000..8ff66714e9b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sve/dot_1.c
+@@ -0,0 +1,39 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#include <stdint.h>
++
++#define DEF_DOT(TYPE1, TYPE2)						\
++TYPE1 __attribute__ ((noinline, noclone))				\
++dot_##TYPE1##_##TYPE2 (TYPE2 *restrict x, TYPE2 *restrict y, int n)	\
++{									\
++  TYPE1 sum = 0;							\
++  for (int i = 0; i < n; i++)						\
++    {									\
++      sum += x[i] * y[i];						\
++    }									\
++  return sum;								\
++}
++
++DEF_DOT(uint32_t, uint8_t)
++DEF_DOT(int32_t, int8_t)
++DEF_DOT(int64_t, int16_t)
++
++/* The uint16_t->uint64_t dot product requires a casting to satisfy the C
++   language rules.  */
++uint64_t __attribute__ ((noinline, noclone))
++dot_uint64_t_uint16_t (uint16_t *restrict x, uint16_t *restrict y, int n)
++{
++  uint64_t sum = 0;
++  for (int i = 0; i < n; i++)
++    {
++      sum += (unsigned int)x[i] * y[i];
++    }
++  return sum;
++}
++
++/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tsdot\tz[0-9]+\.s, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tudot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tsdot\tz[0-9]+\.d, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\twhilelo\t} 8 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
+index 5c04bcdb3f5..51925fa8f50 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/fmla_2.c
+@@ -17,3 +17,4 @@ f (double *restrict a, double *restrict b, double *restrict c,
+ 
+ /* { dg-final { scan-assembler-times {\tfmla\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+ /* { dg-final { scan-assembler-not {\tfmad\t} } } */
++/* { dg-final { scan-assembler-times {\tst1d} 2 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
+new file mode 100644
+index 00000000000..78c70b2be32
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/sve/mask_load_slp_1.c
+@@ -0,0 +1,90 @@
++/* { dg-do compile } */
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#include <stdint.h>
++
++#define MASK_SLP_2(TYPE_COND, ALT_VAL)					\
++void __attribute__ ((noinline, noclone))				\
++mask_slp_##TYPE_COND##_2_##ALT_VAL (int *restrict x, int *restrict y,	\
++				    TYPE_COND *restrict z, int n)	\
++{									\
++  for (int i = 0; i < n; i += 2)					\
++    {									\
++      x[i] = y[i] ? z[i] : 1;						\
++      x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
++    }									\
++}
++
++#define MASK_SLP_4(TYPE_COND, ALT_VAL)					\
++void __attribute__ ((noinline, noclone))				\
++mask_slp_##TYPE_COND##_4_##ALT_VAL (int *restrict x, int *restrict y,	\
++				    TYPE_COND *restrict z, int n)	\
++{									\
++  for (int i = 0; i < n; i += 4)					\
++    {									\
++      x[i] = y[i] ? z[i] : 1;						\
++      x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
++      x[i + 2] = y[i + 2] ? z[i + 2] : 1;				\
++      x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL;				\
++    }									\
++}
++
++#define MASK_SLP_8(TYPE_COND, ALT_VAL)					\
++void __attribute__ ((noinline, noclone))				\
++mask_slp_##TYPE_COND##_8_##ALT_VAL (int *restrict x, int *restrict y,	\
++				    TYPE_COND *restrict z, int n)	\
++{									\
++  for (int i = 0; i < n; i += 8)					\
++    {									\
++      x[i] = y[i] ? z[i] : 1;						\
++      x[i + 1] = y[i + 1] ? z[i + 1] : ALT_VAL;				\
++      x[i + 2] = y[i + 2] ? z[i + 2] : 1;				\
++      x[i + 3] = y[i + 3] ? z[i + 3] : ALT_VAL;				\
++      x[i + 4] = y[i + 4] ? z[i + 4] : 1;				\
++      x[i + 5] = y[i + 5] ? z[i + 5] : ALT_VAL;				\
++      x[i + 6] = y[i + 6] ? z[i + 6] : 1;				\
++      x[i + 7] = y[i + 7] ? z[i + 7] : ALT_VAL;				\
++    }									\
++}
++
++#define MASK_SLP_FAIL(TYPE_COND)					\
++void __attribute__ ((noinline, noclone))				\
++mask_slp_##TYPE_COND##_FAIL (int *restrict x, int *restrict y,		\
++			     TYPE_COND *restrict z, int n)		\
++{									\
++  for (int i = 0; i < n; i += 2)					\
++    {									\
++      x[i] = y[i] ? z[i] : 1;						\
++      x[i + 1] = y[i + 1] ? z[i + 1] : x[z[i + 1]];			\
++    }									\
++}
++
++MASK_SLP_2(int8_t, 1)
++MASK_SLP_2(int8_t, 2)
++MASK_SLP_2(int, 1)
++MASK_SLP_2(int, 2)
++MASK_SLP_2(int64_t, 1)
++MASK_SLP_2(int64_t, 2)
++
++MASK_SLP_4(int8_t, 1)
++MASK_SLP_4(int8_t, 2)
++MASK_SLP_4(int, 1)
++MASK_SLP_4(int, 2)
++MASK_SLP_4(int64_t, 1)
++MASK_SLP_4(int64_t, 2)
++
++MASK_SLP_8(int8_t, 1)
++MASK_SLP_8(int8_t, 2)
++MASK_SLP_8(int, 1)
++MASK_SLP_8(int, 2)
++MASK_SLP_8(int64_t, 1)
++MASK_SLP_8(int64_t, 2)
++
++MASK_SLP_FAIL(int8_t)
++MASK_SLP_FAIL(int)
++MASK_SLP_FAIL(int64_t)
++
++/* { dg-final { scan-assembler-not {\tld2w\t} } } */
++/* { dg-final { scan-assembler-not {\tst2w\t} } } */
++/* { dg-final { scan-assembler-times {\tld1w\t} 48 } } */
++/* { dg-final { scan-assembler-times {\tst1w\t} 40 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
+index a258344b0a9..f152d04b473 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_1.c
+@@ -105,8 +105,8 @@ reduc_##NAME##_##TYPE (TYPE *a, int n)		\
+ 
+ TEST_BITWISE (DEF_REDUC_BITWISE)
+ 
+-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 1 } } */
+-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, p[0-7]/m, z[0-9]+\.b, z[0-9]+\.b\n} 2 } } */
++/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.h, p[0-7]/m, z[0-9]+\.h, z[0-9]+\.h\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+ 
+@@ -157,8 +157,8 @@ TEST_BITWISE (DEF_REDUC_BITWISE)
+ /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.s, p[0-7]/m, z[0-9]+\.s, z[0-9]+\.s\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\teor\tz[0-9]+\.d, p[0-7]/m, z[0-9]+\.d, z[0-9]+\.d\n} 2 } } */
+ 
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
+index 376a453fc73..0640cba8e0f 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_2.c
+@@ -116,8 +116,8 @@ reduc_##NAME##TYPE (TYPE (*restrict a)[NUM_ELEMS(TYPE)],	\
+ 
+ TEST_BITWISE (DEF_REDUC_BITWISE)
+ 
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 1 } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h\n} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
+index ff535942331..cced4ad488e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_5.c
+@@ -23,16 +23,12 @@ REDUC (uint64_t)
+ REDUC (float)
+ REDUC (double)
+ 
+-/* XFAILed until we support sub-int reductions for signed types.  */
+-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 1 } } */
+-/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 1 } } */
++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.b, p[0-7]/m} 2 } } */
++/* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.h, p[0-7]/m} 2 } } */
+ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.s, p[0-7]/m} 2 } } */
+ /* { dg-final { scan-assembler-times {\tsub\tz[0-9]+\.d, p[0-7]/m} 2 } } */
+ /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.s, p[0-7]/m} 1 } } */
+ /* { dg-final { scan-assembler-times {\tfsub\tz[0-9]+\.d, p[0-7]/m} 1 } } */
+ 
+-/* XFAILed until we support sub-int reductions for signed types.  */
+-/* { dg-final { scan-assembler-times {\tsub\t} 8 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tsub\t} 8 } } */
+ /* { dg-final { scan-assembler-times {\tfsub\t} 2 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
+index 3913b8848c0..dec4c87e54d 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_8.c
+@@ -15,6 +15,5 @@ reduc (int *restrict a, int *restrict b, int *restrict c)
+ }
+ 
+ /* { dg-final { scan-assembler-times {\tcmpne\tp[0-9]+\.s, } 1 } } */
+-/* We ought to use the CMPNE result for the SEL too.  */
+-/* { dg-final { scan-assembler-not {\tcmpeq\tp[0-9]+\.s, } { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-not {\tcmpeq\tp[0-9]+\.s, } } } */
+ /* { dg-final { scan-assembler-times {\tsel\tz[0-9]+\.s, } 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c b/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
+index a718e9d2ebf..83ebec50bc6 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/reduc_strict_3.c
+@@ -1,10 +1,7 @@
+ /* { dg-do compile } */
+-/* { dg-options "-O2 -ftree-vectorize -fno-inline -msve-vector-bits=256 -fdump-tree-vect-details" } */
++/* { dg-options "-O2 -ftree-vectorize" } */
+ 
+-double mat[100][4];
+-double mat2[100][8];
+-double mat3[100][12];
+-double mat4[100][3];
++double mat[100][2];
+ 
+ double
+ slp_reduc_plus (int n)
+@@ -14,115 +11,8 @@ slp_reduc_plus (int n)
+     {
+       tmp = tmp + mat[i][0];
+       tmp = tmp + mat[i][1];
+-      tmp = tmp + mat[i][2];
+-      tmp = tmp + mat[i][3];
+     }
+   return tmp;
+ }
+ 
+-double
+-slp_reduc_plus2 (int n)
+-{
+-  double tmp = 0.0;
+-  for (int i = 0; i < n; i++)
+-    {
+-      tmp = tmp + mat2[i][0];
+-      tmp = tmp + mat2[i][1];
+-      tmp = tmp + mat2[i][2];
+-      tmp = tmp + mat2[i][3];
+-      tmp = tmp + mat2[i][4];
+-      tmp = tmp + mat2[i][5];
+-      tmp = tmp + mat2[i][6];
+-      tmp = tmp + mat2[i][7];
+-    }
+-  return tmp;
+-}
+-
+-double
+-slp_reduc_plus3 (int n)
+-{
+-  double tmp = 0.0;
+-  for (int i = 0; i < n; i++)
+-    {
+-      tmp = tmp + mat3[i][0];
+-      tmp = tmp + mat3[i][1];
+-      tmp = tmp + mat3[i][2];
+-      tmp = tmp + mat3[i][3];
+-      tmp = tmp + mat3[i][4];
+-      tmp = tmp + mat3[i][5];
+-      tmp = tmp + mat3[i][6];
+-      tmp = tmp + mat3[i][7];
+-      tmp = tmp + mat3[i][8];
+-      tmp = tmp + mat3[i][9];
+-      tmp = tmp + mat3[i][10];
+-      tmp = tmp + mat3[i][11];
+-    }
+-  return tmp;
+-}
+-
+-void
+-slp_non_chained_reduc (int n, double * restrict out)
+-{
+-  for (int i = 0; i < 3; i++)
+-    out[i] = 0;
+-
+-  for (int i = 0; i < n; i++)
+-    {
+-      out[0] = out[0] + mat4[i][0];
+-      out[1] = out[1] + mat4[i][1];
+-      out[2] = out[2] + mat4[i][2];
+-    }
+-}
+-
+-/* Strict FP reductions shouldn't be used for the outer loops, only the
+-   inner loops.  */
+-
+-float
+-double_reduc1 (float (*restrict i)[16])
+-{
+-  float l = 0;
+-
+-  for (int a = 0; a < 8; a++)
+-    for (int b = 0; b < 8; b++)
+-      l += i[b][a];
+-  return l;
+-}
+-
+-float
+-double_reduc2 (float *restrict i)
+-{
+-  float l = 0;
+-
+-  for (int a = 0; a < 8; a++)
+-    for (int b = 0; b < 16; b++)
+-      {
+-        l += i[b * 4];
+-        l += i[b * 4 + 1];
+-        l += i[b * 4 + 2];
+-        l += i[b * 4 + 3];
+-      }
+-  return l;
+-}
+-
+-float
+-double_reduc3 (float *restrict i, float *restrict j)
+-{
+-  float k = 0, l = 0;
+-
+-  for (int a = 0; a < 8; a++)
+-    for (int b = 0; b < 8; b++)
+-      {
+-        k += i[b];
+-        l += j[b];
+-      }
+-  return l * k;
+-}
+-
+-/* { dg-final { scan-assembler-times {\tfadda\ts[0-9]+, p[0-7], s[0-9]+, z[0-9]+\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d} 9 } } */
+-/* 1 reduction each for double_reduc{1,2} and 2 for double_reduc3.  Each one
+-   is reported three times, once for SVE, once for 128-bit AdvSIMD and once
+-   for 64-bit AdvSIMD.  */
+-/* { dg-final { scan-tree-dump-times "Detected double reduction" 12 "vect" } } */
+-/* double_reduc2 has 2 reductions and slp_non_chained_reduc has 3.  */
+-/* { dg-final { scan-tree-dump-times "Detected reduction" 10 "vect" } } */
++/* { dg-final { scan-assembler-times {\tfadda\td[0-9]+, p[0-7], d[0-9]+, z[0-9]+\.d\n} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
+index 0b2a7ad57e3..37b5f1148a3 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_13.c
+@@ -32,7 +32,6 @@ vec_slp_##TYPE (TYPE *restrict a, int n)			\
+ 
+ TEST_ALL (VEC_PERM)
+ 
+-/* ??? We don't treat the int8_t and int16_t loops as reductions.  */
+ /* ??? We don't treat the uint loops as SLP.  */
+ /* The loop should be fully-masked.  */
+ /* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
+@@ -41,15 +40,15 @@ TEST_ALL (VEC_PERM)
+ /* { dg-final { scan-assembler-times {\tld1w\t} 2 } } */
+ /* { dg-final { scan-assembler-times {\tld1d\t} 3 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times {\tld1d\t} 2 } } */
+-/* { dg-final { scan-assembler-not {\tldr} { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-not {\tldr} } } */
+ 
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
+ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+ 
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b\n} 2 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d\n} 2 } } */
+ /* { dg-final { scan-assembler-times {\tfadda\th[0-9]+, p[0-7], h[0-9]+, z[0-9]+\.h\n} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
+index b75edc69e2d..6a199d00659 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_5.c
+@@ -33,34 +33,24 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)	\
+ 
+ TEST_ALL (VEC_PERM)
+ 
+-/* ??? We don't think it's worth using SLP for the 64-bit loops and fall
+-   back to the less efficient non-SLP implementation instead.  */
+-/* ??? At present we don't treat the int8_t and int16_t loops as
+-   reductions.  */
+-/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
+-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
++/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
++/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
+ /* { dg-final { scan-assembler-times {\tld1d\t} 3 } } */
+ /* { dg-final { scan-assembler-not {\tld2b\t} } } */
+ /* { dg-final { scan-assembler-not {\tld2h\t} } } */
+ /* { dg-final { scan-assembler-not {\tld2w\t} } } */
+ /* { dg-final { scan-assembler-not {\tld2d\t} } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 2 } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 2 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 2 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 2 } } */
+ 
+-/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+-   int16_t.  */
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
+ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+ 
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
+index 9e6aa8ccbf8..19207207999 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_7.c
+@@ -31,45 +31,27 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE *restrict b, int n)	\
+   T (uint16_t)					\
+   T (int32_t)					\
+   T (uint32_t)					\
+-  T (int64_t)					\
+-  T (uint64_t)					\
+   T (_Float16)					\
+-  T (float)					\
+-  T (double)
++  T (float)
+ 
+ TEST_ALL (VEC_PERM)
+ 
+-/* We can't use SLP for the 64-bit loops, since the number of reduction
+-   results might be greater than the number of elements in the vector.
+-   Otherwise we have two loads per loop, one for the initial vector
+-   and one for the loop body.  */
+-/* ??? At present we don't treat the int8_t and int16_t loops as
+-   reductions.  */
+-/* { dg-final { scan-assembler-times {\tld1b\t} 2 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tld1h\t} 3 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tld1b\t} 1 } } */
+-/* { dg-final { scan-assembler-times {\tld1h\t} 2 } } */
++/* We have two loads per loop, one for the initial vector and one for
++   the loop body.  */
++/* { dg-final { scan-assembler-times {\tld1b\t} 2 } } */
++/* { dg-final { scan-assembler-times {\tld1h\t} 3 } } */
+ /* { dg-final { scan-assembler-times {\tld1w\t} 3 } } */
+-/* { dg-final { scan-assembler-times {\tld4d\t} 3 } } */
+ /* { dg-final { scan-assembler-not {\tld4b\t} } } */
+ /* { dg-final { scan-assembler-not {\tld4h\t} } } */
+ /* { dg-final { scan-assembler-not {\tld4w\t} } } */
+-/* { dg-final { scan-assembler-not {\tld1d\t} } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 4 } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.b} 8 } } */
++/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.h} 8 } } */
+ /* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.s} 8 } } */
+-/* { dg-final { scan-assembler-times {\tuaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 8 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\th[0-9]+, p[0-7], z[0-9]+\.h} 4 } } */
+ /* { dg-final { scan-assembler-times {\tfaddv\ts[0-9]+, p[0-7], z[0-9]+\.s} 4 } } */
+-/* { dg-final { scan-assembler-times {\tfaddv\td[0-9]+, p[0-7], z[0-9]+\.d} 4 } } */
+ 
+-/* Should be 4 and 6 respectively, if we used reductions for int8_t and
+-   int16_t.  */
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 2 } } */
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 4 } } */
++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.b} 4 } } */
++/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.h} 6 } } */
+ /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
+-/* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
+ 
+ /* { dg-final { scan-assembler-not {\tuqdec} } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
+index 68baba9e965..40ff2d561a8 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_1.c
+@@ -15,12 +15,9 @@ f (TYPE *x, TYPE *y, unsigned short n, l
+ /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+-/* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
+-/* { dg-final { scan-assembler-not {, 1024} } } */
+-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
+-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
+-/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+-/* { dg-final { scan-assembler-not {\tcsel\tx[0-9]+} } } */
+-/* Two range checks and a check for n being zero.  */
+-/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
+-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
++/* Should use a WAR check that multiplies by (VF-2)*4 rather than
++   an overlap check that multiplies by (257-1)*4.  */
++/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
++/* One range check and a check for n being zero.  */
++/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 1 } } */
++/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
+index 30f6d2691b8..b8afea70207 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_2.c
+@@ -15,7 +15,7 @@ f (TYPE *x, TYPE *y, unsigned short n, unsigned short m)
+ /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+-/* Should multiply by (257-1)*4 rather than (VF-1)*4.  */
++/* Should multiply by (257-1)*4 rather than (VF-1)*4 or (VF-2)*4.  */
+ /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x2, 10, 16\n} 1 } } */
+ /* { dg-final { scan-assembler-times {\tubfiz\tx[0-9]+, x3, 10, 16\n} 1 } } */
+ /* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
+index 70792ff9f33..5ab6859ad4e 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_3.c
+@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, int n, long m __attribute__((unused)))
+ /* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
+-/* Should multiply by (VF-1)*4 rather than (257-1)*4.  */
+-/* { dg-final { scan-assembler-not {, 1024} } } */
+-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
+-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]10} } } */
+-/* { dg-final { scan-assembler-not {\tcmp\tx[0-9]+, 0} } } */
+-/* { dg-final { scan-assembler {\tcmp\tw2, 0} } } */
+-/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
+-/* Two range checks and a check for n being zero.  */
+-/* { dg-final { scan-assembler {\tcmp\t} } } */
+-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
++/* Should use a WAR check that multiplies by (VF-2)*4 rather than
++   an overlap check that multiplies by (257-1)*4.  */
++/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
++/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
++/* One range check and a check for n being zero.  */
++/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
++/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
+index 688f3be61d7..93c114193e9 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_5.c
+@@ -15,13 +15,10 @@ f (TYPE *x, TYPE *y, long n, long m __attribute__((unused)))
+ /* { dg-final { scan-assembler {\tst1d\tz[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tldr\td[0-9]+} } } */
+ /* { dg-final { scan-assembler {\tstr\td[0-9]+} } } */
+-/* Should multiply by (VF-1)*8 rather than (257-1)*8.  */
+-/* { dg-final { scan-assembler-not {, 2048} } } */
+-/* { dg-final { scan-assembler-not {\t.bfiz\t} } } */
+-/* { dg-final { scan-assembler-not {lsl[^\n]*[, ]11} } } */
+-/* { dg-final { scan-assembler {\tcmp\tx[0-9]+, 0} } } */
+-/* { dg-final { scan-assembler-not {\tcmp\tw[0-9]+, 0} } } */
+-/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+} 2 } } */
+-/* Two range checks and a check for n being zero.  */
+-/* { dg-final { scan-assembler {\tcmp\t} } } */
+-/* { dg-final { scan-assembler-times {\tccmp\t} 2 } } */
++/* Should use a WAR check that multiplies by (VF-2)*8 rather than
++   an overlap check that multiplies by (257-1)*4.  */
++/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #16\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
++/* { dg-final { scan-assembler-times {\tcsel\tx[0-9]+[^\n]*xzr} 1 } } */
++/* One range check and a check for n being zero.  */
++/* { dg-final { scan-assembler-times {\tcmp\t} 1 } } */
++/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
+index 00d84760a19..b38f23e87ba 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_4.c
+@@ -98,24 +98,24 @@ TEST_CMP (nugt)
+ /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
+ 
+ /* 5 for lt, 5 for ult and 5 for nult.  */
+-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
++/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+ 
+ /* 5 for le, 5 for ule and 5 for nule.  */
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+ 
+ /* 5 for gt, 5 for ugt and 5 for nugt.  */
+-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
++/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+ 
+ /* 5 for ge, 5 for uge and 5 for nuge.  */
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} 15 } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 30 } } */
+ 
+ /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0\n} } } */
+ /* 3 loops * 5 invocations for all 12 unordered comparisons.  */
+-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s\n} 180 } } */
+ 
+ /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 7 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times {\tfcmeq\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 14 { xfail *-*-* } } } */
+@@ -123,19 +123,19 @@ TEST_CMP (nugt)
+ /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times {\tfcmne\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
+ 
+-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
++/* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+ 
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+ 
+-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
++/* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+ 
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} 21 } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 42 } } */
+ 
+ /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0\n} } } */
+ /* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
+    for all 12 unordered comparisons.  */
+-/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d\n} 252 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
+index 23bfb7b2649..2f16fbff522 100644
+--- a/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
++++ b/gcc/testsuite/gcc.target/aarch64/sve/vcond_5.c
+@@ -19,16 +19,16 @@
+ /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+ 
+ /* 5 for le, 5 for ule and 5 for nule.  */
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
+ 
+ /* 5 for gt, 5 for ugt, 5 for nueq and 5 for nugt.  */
+ /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 20 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 40 { xfail *-*-* } } } */
+ 
+ /* 5 for ge, 5 for uge and 5 for nuge.  */
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} 15 } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, z[0-9]+\.s} 30 } } */
+ 
+ /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.s, p[0-7]/z, z[0-9]+\.s, #0\.0} } } */
+ /* 3 loops * 5 invocations for ordered, unordered amd ueq.  */
+@@ -43,14 +43,14 @@
+ /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times {\tfcmlt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
+ 
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
++/* { dg-final { scan-assembler-times {\tfcmle\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
+ 
+ /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 28 { xfail *-*-* } } } */
+ /* { dg-final { scan-assembler-times {\tfcmgt\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 56 { xfail *-*-* } } } */
+ 
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 { xfail *-*-* } } } */
+-/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 { xfail *-*-* } } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} 21 } } */
++/* { dg-final { scan-assembler-times {\tfcmge\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, z[0-9]+\.d} 42 } } */
+ 
+ /* { dg-final { scan-assembler-not {\tfcmuo\tp[0-9]+\.d, p[0-7]/z, z[0-9]+\.d, #0\.0} } } */
+ /* 3 loops * 5 invocations, with 2 invocations having ncopies == 2,
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c
+new file mode 100644
+index 00000000000..fe490cfbf3f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_1.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int64_t *x, int64_t *y, int32_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] += y[i];
++      z[i] += z[i - 2];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2s,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
+new file mode 100644
+index 00000000000..81e77a8bb04
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_10.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int16_t *x, int16_t *y, uint8_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 8];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
+new file mode 100644
+index 00000000000..d9da6c1f12a
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_11.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int32_t *x, int64_t *y, int64_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 2];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.2s, v[0-9]+\.2d\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
+new file mode 100644
+index 00000000000..80dab8bf55f
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_12.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int16_t *x, int32_t *y, int32_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 4];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.4h, v[0-9]+\.4s\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
+new file mode 100644
+index 00000000000..655fa7d4bf1
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_13.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int8_t *x, int16_t *y, int16_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 8];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\txtn\tv[0-9]+\.8b, v[0-9]+\.8h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c
+new file mode 100644
+index 00000000000..1fe69cad259
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_2.c
+@@ -0,0 +1,19 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int32_t *x, int32_t *y, int16_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] += y[i];
++      z[i] += z[i - 4];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4h,} 1 } } */
++/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.2s,} } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c
+new file mode 100644
+index 00000000000..1290772216e
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_3.c
+@@ -0,0 +1,19 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int16_t *x, int16_t *y, int8_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] += y[i];
++      z[i] += z[i - 8];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
++/* { dg-final { scan-assembler-not {\tadd\tv[0-9]+\.4h,} } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c
+new file mode 100644
+index 00000000000..768ea8c7164
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_4.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int64_t *x, int64_t *y, int8_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] += y[i];
++      z[i] += z[i - 8];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 4 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8b,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
+new file mode 100644
+index 00000000000..ca8a65a16e7
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_5.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int64_t *x, int64_t *y, int32_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 2];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
+new file mode 100644
+index 00000000000..6c09b5b146b
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_6.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int32_t *x, int32_t *y, int16_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 4];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
+new file mode 100644
+index 00000000000..94a66c545ef
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_7.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int16_t *x, int16_t *y, int8_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 8];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tsxtl\tv[0-9]+\.8h, v[0-9]+\.8b\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.8h,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
+new file mode 100644
+index 00000000000..9531966c294
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_8.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int64_t *x, int64_t *y, uint32_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 2];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.2d, v[0-9]+\.2s\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.2d,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
+new file mode 100644
+index 00000000000..de8f6988685
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/aarch64/vect_mixed_sizes_9.c
+@@ -0,0 +1,18 @@
++/* { dg-options "-O2 -ftree-vectorize" } */
++
++#pragma GCC target "+nosve"
++
++#include <stdint.h>
++
++void
++f (int32_t *x, int32_t *y, uint16_t *z, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      x[i] = z[i];
++      y[i] += y[i - 4];
++    }
++}
++
++/* { dg-final { scan-assembler-times {\tuxtl\tv[0-9]+\.4s, v[0-9]+\.4h\n} 1 } } */
++/* { dg-final { scan-assembler-times {\tadd\tv[0-9]+\.4s,} 1 } } */
+diff --git a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
+index ae2f8611ea6..9d926ca5dfe 100644
+--- a/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
++++ b/gcc/testsuite/gcc.target/i386/avx-vzeroupper-19.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -mavx -mtune=generic -dp" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c b/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
+index 2a105601c71..51765900fcf 100644
+--- a/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
++++ b/gcc/testsuite/gcc.target/i386/avx2-vect-mask-store-move1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-options "-O3 -mavx2 -fdump-tree-vect-details" } */
+ /* { dg-require-effective-target avx2 } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
+index a26aa6529e8..4de04511934 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
++++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */ /* PR59617 */
+ /* { dg-options "-O3 -mavx512f -fdump-tree-vect-details -mtune=knl" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
+index 2bb9c5c090b..946117d9d30 100644
+--- a/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
++++ b/gcc/testsuite/gcc.target/i386/avx512f-gather-5.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -mavx512f -mtune=knl" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
+new file mode 100644
+index 00000000000..235fb917e17
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/avx512f-simd-1.c
+@@ -0,0 +1,35 @@
++/* { dg-do compile } */
++/* { dg-options "-fopenmp-simd -O2 -mavx512f -masm=att" } */
++/* { dg-final { scan-assembler "vpadd\[^\n\r]*%xmm" } } */
++/* { dg-final { scan-assembler "vpadd\[^\n\r]*%ymm" } } */
++/* { dg-final { scan-assembler "vpadd\[^\n\r]*%zmm" } } */
++
++#define N 1024
++int a[N];
++
++void
++f1 (void)
++{
++  int i;
++  #pragma omp simd simdlen (4)
++  for (i = 0; i < N; ++i)
++    a[i] = a[i] + 1;
++}
++
++void
++f2 (void)
++{
++  int i;
++  #pragma omp simd simdlen (8)
++  for (i = 0; i < N; ++i)
++    a[i] = a[i] + 2;
++}
++
++void
++f3 (void)
++{
++  int i;
++  #pragma omp simd simdlen (16)
++  for (i = 0; i < N; ++i)
++    a[i] = a[i] + 3;
++}
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
+index e5bcdabcf79..2472fb016ee 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_double_1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
+index dbd078abc81..3d569733b1e 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_double_2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
+index d0844f208e5..8e5ec4150cc 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_double_3.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
+index b9498a0ff13..0d2a0408d0b 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_double_4.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
+index 0292ba040a3..fcf1a6ceac1 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_double_5.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
+index a716006eda8..650e608117f 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_double_6.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
+index b386b83e39a..c29198ba666 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_float_1.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
+index 81193b2d8b1..cb38b77344f 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_float_2.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
+index d86cb904357..10a350e9e10 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_float_3.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
+index 68ca8388d70..020e5d86f35 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_float_4.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
+index 4db4749c024..3ff23c17aab 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_float_5.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
+index 0b86e6256bd..34671baa28a 100644
+--- a/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
++++ b/gcc/testsuite/gcc.target/i386/l_fma_float_6.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-O3 -Wno-attributes -mfpmath=sse -mfma -mtune=generic -mno-fma4" } */
+ 
+diff --git a/gcc/testsuite/gcc.target/i386/mask-pack.c b/gcc/testsuite/gcc.target/i386/mask-pack.c
+index 0b564ef4284..a607dfb460c 100644
+--- a/gcc/testsuite/gcc.target/i386/mask-pack.c
++++ b/gcc/testsuite/gcc.target/i386/mask-pack.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-mavx512bw -O3 -fopenmp-simd -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
+diff --git a/gcc/testsuite/gcc.target/i386/mask-unpack.c b/gcc/testsuite/gcc.target/i386/mask-unpack.c
+index 4291480cfff..ca71ea2e29d 100644
+--- a/gcc/testsuite/gcc.target/i386/mask-unpack.c
++++ b/gcc/testsuite/gcc.target/i386/mask-unpack.c
+@@ -1,3 +1,5 @@
++/* Disabling epilogues until we find a better way to deal with scans.  */
++/* { dg-additional-options "--param vect-epilogues-nomask=0" } */
+ /* { dg-do compile } */
+ /* { dg-options "-mavx512bw -mavx512dq -mno-stackrealign -O3 -fopenmp-simd -fdump-tree-vect-details" } */
+ /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 10 "vect" } } */
+diff --git a/gcc/testsuite/gcc.target/i386/pr90358.c b/gcc/testsuite/gcc.target/i386/pr90358.c
+new file mode 100644
+index 00000000000..4894fdbd079
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr90358.c
+@@ -0,0 +1,35 @@
++/* PR target/90358 */
++/* { dg-do run { target { sse4_runtime } } } */
++/* { dg-options "-O3 -msse4" } */
++
++struct s { unsigned int a, b, c; };
++
++void __attribute__ ((noipa))
++foo (struct s *restrict s1, struct s *restrict s2, int n)
++{
++  for (int i = 0; i < n; ++i)
++    {
++      s1[i].b = s2[i].b;
++      s1[i].c = s2[i].c;
++      s2[i].c = 0;
++    }
++}
++                            
++#define N 12
++
++int
++main ()
++{
++  struct s s1[N], s2[N];
++  for (unsigned int j = 0; j < N; ++j)
++    {
++      s2[j].a = j * 5;
++      s2[j].b = j * 5 + 2;
++      s2[j].c = j * 5 + 4;
++    }
++  foo (s1, s2, N);
++  for (unsigned int j = 0; j < N; ++j)
++  if (s1[j].b != j * 5 + 2)
++    __builtin_abort ();
++  return 0;
++}
+diff --git a/gcc/testsuite/gcc.target/i386/pr91033.c b/gcc/testsuite/gcc.target/i386/pr91033.c
+new file mode 100644
+index 00000000000..43d99d5a7dc
+--- /dev/null
++++ b/gcc/testsuite/gcc.target/i386/pr91033.c
+@@ -0,0 +1,15 @@
++/* PR tree-optimization/91033 */
++/* { dg-do compile { target pthread } } */
++/* { dg-options "-march=knl -O2 -fopenmp-simd -ftree-parallelize-loops=2" } */
++
++#define N 1024
++int a[N];
++
++void
++foo (void)
++{
++  int i;
++  #pragma omp simd simdlen (4)
++  for (i = 0; i < N; ++i)
++    a[i] = a[i] + 1;
++}
+diff --git a/gcc/testsuite/gfortran.dg/vect/vect-4.f90 b/gcc/testsuite/gfortran.dg/vect/vect-4.f90
+index b567cbd8644..c2eeafd3900 100644
+--- a/gcc/testsuite/gfortran.dg/vect/vect-4.f90
++++ b/gcc/testsuite/gfortran.dg/vect/vect-4.f90
+@@ -1,3 +1,5 @@
++! Disabling epilogues until we find a better way to deal with scans.
++! { dg-additional-options "--param vect-epilogues-nomask=0" }
+ ! { dg-do compile }
+ ! { dg-require-effective-target vect_float }
+ ! { dg-additional-options "--param vect-max-peeling-for-alignment=0" }
+diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
+index 0ac5f1c390b..1c243308476 100644
+--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
++++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
+@@ -704,5 +704,6 @@ CALL track('KERNEL  ')
+ RETURN
+ END SUBROUTINE kernel
+ 
+-! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target vect_intdouble_cvt } } }
+-! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { ! vect_intdouble_cvt } } } }
++! { dg-final { scan-tree-dump-times "vectorized 23 loops" 1 "vect" { target aarch64*-*-* } } }
++! { dg-final { scan-tree-dump-times "vectorized 22 loops" 1 "vect" { target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
++! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
+diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
+index f8aeec9bae8..621c8ea3dad 100644
+--- a/gcc/tree-cfg.c
++++ b/gcc/tree-cfg.c
+@@ -3557,6 +3557,24 @@ verify_gimple_assign_unary (gassign *stmt)
+     {
+     CASE_CONVERT:
+       {
++	/* Allow conversions between vectors with the same number of elements,
++	   provided that the conversion is OK for the element types too.  */
++	if (VECTOR_TYPE_P (lhs_type)
++	    && VECTOR_TYPE_P (rhs1_type)
++	    && known_eq (TYPE_VECTOR_SUBPARTS (lhs_type),
++			 TYPE_VECTOR_SUBPARTS (rhs1_type)))
++	  {
++	    lhs_type = TREE_TYPE (lhs_type);
++	    rhs1_type = TREE_TYPE (rhs1_type);
++	  }
++	else if (VECTOR_TYPE_P (lhs_type) || VECTOR_TYPE_P (rhs1_type))
++	  {
++	    error ("invalid vector types in nop conversion");
++	    debug_generic_expr (lhs_type);
++	    debug_generic_expr (rhs1_type);
++	    return true;
++	  }
++
+ 	/* Allow conversions from pointer type to integral type only if
+ 	   there is no sign or zero extension involved.
+ 	   For targets were the precision of ptrofftype doesn't match that
+diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
+index d00c1bd31e6..c95dd204870 100644
+--- a/gcc/tree-data-ref.c
++++ b/gcc/tree-data-ref.c
+@@ -1287,7 +1287,7 @@ create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
+   return dr;
+ }
+ 
+-/*  A helper function computes order between two tree epxressions T1 and T2.
++/*  A helper function computes order between two tree expressions T1 and T2.
+     This is used in comparator functions sorting objects based on the order
+     of tree expressions.  The function returns -1, 0, or 1.  */
+ 
+@@ -1454,6 +1454,54 @@ comp_dr_with_seg_len_pair (const void *pa_, const void *pb_)
+   return 0;
+ }
+ 
++/* Dump information about ALIAS_PAIR, indenting each line by INDENT.  */
++
++static void
++dump_alias_pair (dr_with_seg_len_pair_t *alias_pair, const char *indent)
++{
++  dump_printf (MSG_NOTE, "%sreference:      %T vs. %T\n", indent,
++	       DR_REF (alias_pair->first.dr),
++	       DR_REF (alias_pair->second.dr));
++
++  dump_printf (MSG_NOTE, "%ssegment length: %T", indent,
++	       alias_pair->first.seg_len);
++  if (!operand_equal_p (alias_pair->first.seg_len,
++			alias_pair->second.seg_len, 0))
++    dump_printf (MSG_NOTE, " vs. %T", alias_pair->second.seg_len);
++
++  dump_printf (MSG_NOTE, "\n%saccess size:    ", indent);
++  dump_dec (MSG_NOTE, alias_pair->first.access_size);
++  if (maybe_ne (alias_pair->first.access_size, alias_pair->second.access_size))
++    {
++      dump_printf (MSG_NOTE, " vs. ");
++      dump_dec (MSG_NOTE, alias_pair->second.access_size);
++    }
++
++  dump_printf (MSG_NOTE, "\n%salignment:      %d", indent,
++	       alias_pair->first.align);
++  if (alias_pair->first.align != alias_pair->second.align)
++    dump_printf (MSG_NOTE, " vs. %d", alias_pair->second.align);
++
++  dump_printf (MSG_NOTE, "\n%sflags:         ", indent);
++  if (alias_pair->flags & DR_ALIAS_RAW)
++    dump_printf (MSG_NOTE, " RAW");
++  if (alias_pair->flags & DR_ALIAS_WAR)
++    dump_printf (MSG_NOTE, " WAR");
++  if (alias_pair->flags & DR_ALIAS_WAW)
++    dump_printf (MSG_NOTE, " WAW");
++  if (alias_pair->flags & DR_ALIAS_ARBITRARY)
++    dump_printf (MSG_NOTE, " ARBITRARY");
++  if (alias_pair->flags & DR_ALIAS_SWAPPED)
++    dump_printf (MSG_NOTE, " SWAPPED");
++  if (alias_pair->flags & DR_ALIAS_UNSWAPPED)
++    dump_printf (MSG_NOTE, " UNSWAPPED");
++  if (alias_pair->flags & DR_ALIAS_MIXED_STEPS)
++    dump_printf (MSG_NOTE, " MIXED_STEPS");
++  if (alias_pair->flags == 0)
++    dump_printf (MSG_NOTE, " <none>");
++  dump_printf (MSG_NOTE, "\n");
++}
++
+ /* Merge alias checks recorded in ALIAS_PAIRS and remove redundant ones.
+    FACTOR is number of iterations that each data reference is accessed.
+ 
+@@ -1488,19 +1536,50 @@ void
+ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 			       poly_uint64)
+ {
++  if (alias_pairs->is_empty ())
++    return;
++
++  /* Canonicalize each pair so that the base components are ordered wrt
++     data_ref_compare_tree.  This allows the loop below to merge more
++     cases.  */
++  unsigned int i;
++  dr_with_seg_len_pair_t *alias_pair;
++  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
++    {
++      data_reference_p dr_a = alias_pair->first.dr;
++      data_reference_p dr_b = alias_pair->second.dr;
++      int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
++					    DR_BASE_ADDRESS (dr_b));
++      if (comp_res == 0)
++	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
++      if (comp_res == 0)
++	comp_res = data_ref_compare_tree (DR_INIT (dr_a), DR_INIT (dr_b));
++      if (comp_res > 0)
++	{
++	  std::swap (alias_pair->first, alias_pair->second);
++	  alias_pair->flags |= DR_ALIAS_SWAPPED;
++	}
++      else
++	alias_pair->flags |= DR_ALIAS_UNSWAPPED;
++    }
++
+   /* Sort the collected data ref pairs so that we can scan them once to
+      combine all possible aliasing checks.  */
+   alias_pairs->qsort (comp_dr_with_seg_len_pair);
+ 
+   /* Scan the sorted dr pairs and check if we can combine alias checks
+      of two neighboring dr pairs.  */
+-  for (size_t i = 1; i < alias_pairs->length (); ++i)
++  unsigned int last = 0;
++  for (i = 1; i < alias_pairs->length (); ++i)
+     {
+       /* Deal with two ddrs (dr_a1, dr_b1) and (dr_a2, dr_b2).  */
+-      dr_with_seg_len *dr_a1 = &(*alias_pairs)[i-1].first,
+-		      *dr_b1 = &(*alias_pairs)[i-1].second,
+-		      *dr_a2 = &(*alias_pairs)[i].first,
+-		      *dr_b2 = &(*alias_pairs)[i].second;
++      dr_with_seg_len_pair_t *alias_pair1 = &(*alias_pairs)[last];
++      dr_with_seg_len_pair_t *alias_pair2 = &(*alias_pairs)[i];
++
++      dr_with_seg_len *dr_a1 = &alias_pair1->first;
++      dr_with_seg_len *dr_b1 = &alias_pair1->second;
++      dr_with_seg_len *dr_a2 = &alias_pair2->first;
++      dr_with_seg_len *dr_b2 = &alias_pair2->second;
+ 
+       /* Remove duplicate data ref pairs.  */
+       if (*dr_a1 == *dr_a2 && *dr_b1 == *dr_b2)
+@@ -1509,10 +1588,16 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 	    dump_printf (MSG_NOTE, "found equal ranges %T, %T and %T, %T\n",
+ 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+-	  alias_pairs->ordered_remove (i--);
++	  alias_pair1->flags |= alias_pair2->flags;
+ 	  continue;
+ 	}
+ 
++      /* Assume that we won't be able to merge the pairs, then correct
++	 if we do.  */
++      last += 1;
++      if (last != i)
++	(*alias_pairs)[last] = (*alias_pairs)[i];
++
+       if (*dr_a1 == *dr_a2 || *dr_b1 == *dr_b2)
+ 	{
+ 	  /* We consider the case that DR_B1 and DR_B2 are same memrefs,
+@@ -1538,13 +1623,6 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 	  if (!ordered_p (init_a1, init_a2))
+ 	    continue;
+ 
+-	  /* Make sure dr_a1 starts left of dr_a2.  */
+-	  if (maybe_gt (init_a1, init_a2))
+-	    {
+-	      std::swap (*dr_a1, *dr_a2);
+-	      std::swap (init_a1, init_a2);
+-	    }
+-
+ 	  /* Work out what the segment length would be if we did combine
+ 	     DR_A1 and DR_A2:
+ 
+@@ -1561,7 +1639,10 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 
+ 	     The lengths both have sizetype, so the sign is taken from
+ 	     the step instead.  */
+-	  if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
++	  poly_uint64 new_seg_len = 0;
++	  bool new_seg_len_p = !operand_equal_p (dr_a1->seg_len,
++						 dr_a2->seg_len, 0);
++	  if (new_seg_len_p)
+ 	    {
+ 	      poly_uint64 seg_len_a1, seg_len_a2;
+ 	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
+@@ -1579,14 +1660,29 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 	      int sign_a = tree_int_cst_sgn (indicator_a);
+ 	      int sign_b = tree_int_cst_sgn (indicator_b);
+ 
+-	      poly_uint64 new_seg_len;
+ 	      if (sign_a <= 0 && sign_b <= 0)
+ 		new_seg_len = lower_bound (seg_len_a1, seg_len_a2);
+ 	      else if (sign_a >= 0 && sign_b >= 0)
+ 		new_seg_len = upper_bound (seg_len_a1, seg_len_a2);
+ 	      else
+ 		continue;
++	    }
++	  /* At this point we're committed to merging the refs.  */
+ 
++	  /* Make sure dr_a1 starts left of dr_a2.  */
++	  if (maybe_gt (init_a1, init_a2))
++	    {
++	      std::swap (*dr_a1, *dr_a2);
++	      std::swap (init_a1, init_a2);
++	    }
++
++	  /* The DR_Bs are equal, so only the DR_As can introduce
++	     mixed steps.  */
++	  if (!operand_equal_p (DR_STEP (dr_a1->dr), DR_STEP (dr_a2->dr), 0))
++	    alias_pair1->flags |= DR_ALIAS_MIXED_STEPS;
++
++	  if (new_seg_len_p)
++	    {
+ 	      dr_a1->seg_len = build_int_cst (TREE_TYPE (dr_a1->seg_len),
+ 					      new_seg_len);
+ 	      dr_a1->align = MIN (dr_a1->align, known_alignment (new_seg_len));
+@@ -1608,17 +1704,40 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 	    dump_printf (MSG_NOTE, "merging ranges for %T, %T and %T, %T\n",
+ 			 DR_REF (dr_a1->dr), DR_REF (dr_b1->dr),
+ 			 DR_REF (dr_a2->dr), DR_REF (dr_b2->dr));
+-	  alias_pairs->ordered_remove (i);
+-	  i--;
++	  alias_pair1->flags |= alias_pair2->flags;
++	  last -= 1;
+ 	}
+     }
++  alias_pairs->truncate (last + 1);
++
++  /* Try to restore the original dr_with_seg_len order within each
++     dr_with_seg_len_pair_t.  If we ended up combining swapped and
++     unswapped pairs into the same check, we have to invalidate any
++     RAW, WAR and WAW information for it.  */
++  if (dump_enabled_p ())
++    dump_printf (MSG_NOTE, "merged alias checks:\n");
++  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
++    {
++      unsigned int swap_mask = (DR_ALIAS_SWAPPED | DR_ALIAS_UNSWAPPED);
++      unsigned int swapped = (alias_pair->flags & swap_mask);
++      if (swapped == DR_ALIAS_SWAPPED)
++	std::swap (alias_pair->first, alias_pair->second);
++      else if (swapped != DR_ALIAS_UNSWAPPED)
++	alias_pair->flags |= DR_ALIAS_ARBITRARY;
++      alias_pair->flags &= ~swap_mask;
++      if (dump_enabled_p ())
++	dump_alias_pair (alias_pair, "  ");
++    }
+ }
+ 
+-/* Given LOOP's two data references and segment lengths described by DR_A
+-   and DR_B, create expression checking if the two addresses ranges intersect
+-   with each other based on index of the two addresses.  This can only be
+-   done if DR_A and DR_B referring to the same (array) object and the index
+-   is the only difference.  For example:
++/* Try to generate a runtime condition that is true if ALIAS_PAIR is
++   free of aliases, using a condition based on index values instead
++   of a condition based on addresses.  Return true on success,
++   storing the condition in *COND_EXPR.
++
++   This can only be done if the two data references in ALIAS_PAIR access
++   the same array object and the index is the only difference.  For example,
++   if the two data references are DR_A and DR_B:
+ 
+                        DR_A                           DR_B
+       data-ref         arr[i]                         arr[j]
+@@ -1635,16 +1754,20 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
+ 
+    We can create expression based on index rather than address:
+ 
+-     (i_0 + 4 < j_0 || j_0 + 4 < i_0)
++     (unsigned) (i_0 - j_0 + 3) <= 6
++
++   i.e. the indices are less than 4 apart.
+ 
+    Note evolution step of index needs to be considered in comparison.  */
+ 
+ static bool
+ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
+-				     const dr_with_seg_len& dr_a,
+-				     const dr_with_seg_len& dr_b)
++				     const dr_with_seg_len_pair_t &alias_pair)
+ {
+-  if (integer_zerop (DR_STEP (dr_a.dr))
++  const dr_with_seg_len &dr_a = alias_pair.first;
++  const dr_with_seg_len &dr_b = alias_pair.second;
++  if ((alias_pair.flags & DR_ALIAS_MIXED_STEPS)
++      || integer_zerop (DR_STEP (dr_a.dr))
+       || integer_zerop (DR_STEP (dr_b.dr))
+       || DR_NUM_DIMENSIONS (dr_a.dr) != DR_NUM_DIMENSIONS (dr_b.dr))
+     return false;
+@@ -1670,15 +1793,8 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
+   if (neg_step)
+     {
+       abs_step = -abs_step;
+-      seg_len1 = -seg_len1;
+-      seg_len2 = -seg_len2;
+-    }
+-  else
+-    {
+-      /* Include the access size in the length, so that we only have one
+-	 tree addition below.  */
+-      seg_len1 += dr_a.access_size;
+-      seg_len2 += dr_b.access_size;
++      seg_len1 = (-wi::to_poly_wide (dr_a.seg_len)).force_uhwi ();
++      seg_len2 = (-wi::to_poly_wide (dr_b.seg_len)).force_uhwi ();
+     }
+ 
+   /* Infer the number of iterations with which the memory segment is accessed
+@@ -1692,16 +1808,15 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
+       || !can_div_trunc_p (seg_len2 + abs_step - 1, abs_step, &niter_len2))
+     return false;
+ 
+-  poly_uint64 niter_access1 = 0, niter_access2 = 0;
+-  if (neg_step)
+-    {
+-      /* Divide each access size by the byte step, rounding up.  */
+-      if (!can_div_trunc_p (dr_a.access_size - abs_step - 1,
+-			    abs_step, &niter_access1)
+-	  || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
+-			       abs_step, &niter_access2))
+-	return false;
+-    }
++  /* Divide each access size by the byte step, rounding up.  */
++  poly_uint64 niter_access1, niter_access2;
++  if (!can_div_trunc_p (dr_a.access_size + abs_step - 1,
++			abs_step, &niter_access1)
++      || !can_div_trunc_p (dr_b.access_size + abs_step - 1,
++			   abs_step, &niter_access2))
++    return false;
++
++  bool waw_or_war_p = (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) == 0;
+ 
+   unsigned int i;
+   for (i = 0; i < DR_NUM_DIMENSIONS (dr_a.dr); i++)
+@@ -1741,44 +1856,298 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
+ 	 index of data reference.  Like segment length, index length is
+ 	 linear function of the number of iterations with index_step as
+ 	 the coefficient, i.e, niter_len * idx_step.  */
+-      tree idx_len1 = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
+-				   build_int_cst (TREE_TYPE (min1),
+-						  niter_len1));
+-      tree idx_len2 = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
+-				   build_int_cst (TREE_TYPE (min2),
+-						  niter_len2));
+-      tree max1 = fold_build2 (PLUS_EXPR, TREE_TYPE (min1), min1, idx_len1);
+-      tree max2 = fold_build2 (PLUS_EXPR, TREE_TYPE (min2), min2, idx_len2);
+-      /* Adjust ranges for negative step.  */
++      offset_int abs_idx_step = offset_int::from (wi::to_wide (idx_step),
++						  SIGNED);
+       if (neg_step)
+-	{
+-	  /* IDX_LEN1 and IDX_LEN2 are negative in this case.  */
+-	  std::swap (min1, max1);
+-	  std::swap (min2, max2);
+-
+-	  /* As with the lengths just calculated, we've measured the access
+-	     sizes in iterations, so multiply them by the index step.  */
+-	  tree idx_access1
+-	    = fold_build2 (MULT_EXPR, TREE_TYPE (min1), idx_step,
+-			   build_int_cst (TREE_TYPE (min1), niter_access1));
+-	  tree idx_access2
+-	    = fold_build2 (MULT_EXPR, TREE_TYPE (min2), idx_step,
+-			   build_int_cst (TREE_TYPE (min2), niter_access2));
+-
+-	  /* MINUS_EXPR because the above values are negative.  */
+-	  max1 = fold_build2 (MINUS_EXPR, TREE_TYPE (max1), max1, idx_access1);
+-	  max2 = fold_build2 (MINUS_EXPR, TREE_TYPE (max2), max2, idx_access2);
+-	}
+-      tree part_cond_expr
+-	= fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+-	    fold_build2 (LE_EXPR, boolean_type_node, max1, min2),
+-	    fold_build2 (LE_EXPR, boolean_type_node, max2, min1));
++	abs_idx_step = -abs_idx_step;
++      poly_offset_int idx_len1 = abs_idx_step * niter_len1;
++      poly_offset_int idx_len2 = abs_idx_step * niter_len2;
++      poly_offset_int idx_access1 = abs_idx_step * niter_access1;
++      poly_offset_int idx_access2 = abs_idx_step * niter_access2;
++
++      gcc_assert (known_ge (idx_len1, 0)
++		  && known_ge (idx_len2, 0)
++		  && known_ge (idx_access1, 0)
++		  && known_ge (idx_access2, 0));
++
++      /* Each access has the following pattern, with lengths measured
++	 in units of INDEX:
++
++	      <-- idx_len -->
++	      <--- A: -ve step --->
++	      +-----+-------+-----+-------+-----+
++	      | n-1 | ..... |  0  | ..... | n-1 |
++	      +-----+-------+-----+-------+-----+
++			    <--- B: +ve step --->
++			    <-- idx_len -->
++			    |
++			   min
++
++	 where "n" is the number of scalar iterations covered by the segment
++	 and where each access spans idx_access units.
++
++	 A is the range of bytes accessed when the step is negative,
++	 B is the range when the step is positive.
++
++	 When checking for general overlap, we need to test whether
++	 the range:
++
++	   [min1 + low_offset1, min2 + high_offset1 + idx_access1 - 1]
++
++	 overlaps:
++
++	   [min2 + low_offset2, min2 + high_offset2 + idx_access2 - 1]
++
++	 where:
++
++	    low_offsetN = +ve step ? 0 : -idx_lenN;
++	   high_offsetN = +ve step ? idx_lenN : 0;
++
++	 This is equivalent to testing whether:
++
++	   min1 + low_offset1 <= min2 + high_offset2 + idx_access2 - 1
++	   && min2 + low_offset2 <= min1 + high_offset1 + idx_access1 - 1
++
++	 Converting this into a single test, there is an overlap if:
++
++	   0 <= min2 - min1 + bias <= limit
++
++	 where  bias = high_offset2 + idx_access2 - 1 - low_offset1
++	       limit = (high_offset1 - low_offset1 + idx_access1 - 1)
++		     + (high_offset2 - low_offset2 + idx_access2 - 1)
++	  i.e. limit = idx_len1 + idx_access1 - 1 + idx_len2 + idx_access2 - 1
++
++	 Combining the tests requires limit to be computable in an unsigned
++	 form of the index type; if it isn't, we fall back to the usual
++	 pointer-based checks.
++
++	 We can do better if DR_B is a write and if DR_A and DR_B are
++	 well-ordered in both the original and the new code (see the
++	 comment above the DR_ALIAS_* flags for details).  In this case
++	 we know that for each i in [0, n-1], the write performed by
++	 access i of DR_B occurs after access numbers j<=i of DR_A in
++	 both the original and the new code.  Any write or anti
++	 dependencies wrt those DR_A accesses are therefore maintained.
++
++	 We just need to make sure that each individual write in DR_B does not
++	 overlap any higher-indexed access in DR_A; such DR_A accesses happen
++	 after the DR_B access in the original code but happen before it in
++	 the new code.
++
++	 We know the steps for both accesses are equal, so by induction, we
++	 just need to test whether the first write of DR_B overlaps a later
++	 access of DR_A.  In other words, we need to move min1 along by
++	 one iteration:
++
++	   min1' = min1 + idx_step
++
++	 and use the ranges:
++
++	   [min1' + low_offset1', min1' + high_offset1' + idx_access1 - 1]
++
++	 and:
++
++	   [min2, min2 + idx_access2 - 1]
++
++	 where:
++
++	    low_offset1' = +ve step ? 0 : -(idx_len1 - |idx_step|)
++	   high_offset1' = +ve_step ? idx_len1 - |idx_step| : 0.  */
++      if (waw_or_war_p)
++	idx_len1 -= abs_idx_step;
++
++      poly_offset_int limit = idx_len1 + idx_access1 - 1 + idx_access2 - 1;
++      if (!waw_or_war_p)
++	limit += idx_len2;
++
++      tree utype = unsigned_type_for (TREE_TYPE (min1));
++      if (!wi::fits_to_tree_p (limit, utype))
++	return false;
++
++      poly_offset_int low_offset1 = neg_step ? -idx_len1 : 0;
++      poly_offset_int high_offset2 = neg_step || waw_or_war_p ? 0 : idx_len2;
++      poly_offset_int bias = high_offset2 + idx_access2 - 1 - low_offset1;
++      /* Equivalent to adding IDX_STEP to MIN1.  */
++      if (waw_or_war_p)
++	bias -= wi::to_offset (idx_step);
++
++      tree subject = fold_build2 (MINUS_EXPR, utype,
++				  fold_convert (utype, min2),
++				  fold_convert (utype, min1));
++      subject = fold_build2 (PLUS_EXPR, utype, subject,
++			     wide_int_to_tree (utype, bias));
++      tree part_cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject,
++					 wide_int_to_tree (utype, limit));
+       if (*cond_expr)
+ 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ 				  *cond_expr, part_cond_expr);
+       else
+ 	*cond_expr = part_cond_expr;
+     }
++  if (dump_enabled_p ())
++    {
++      if (waw_or_war_p)
++	dump_printf (MSG_NOTE, "using an index-based WAR/WAW test\n");
++      else
++	dump_printf (MSG_NOTE, "using an index-based overlap test\n");
++    }
++  return true;
++}
++
++/* A subroutine of create_intersect_range_checks, with a subset of the
++   same arguments.  Try to optimize cases in which the second access
++   is a write and in which some overlap is valid.  */
++
++static bool
++create_waw_or_war_checks (tree *cond_expr,
++			  const dr_with_seg_len_pair_t &alias_pair)
++{
++  const dr_with_seg_len& dr_a = alias_pair.first;
++  const dr_with_seg_len& dr_b = alias_pair.second;
++
++  /* Check for cases in which:
++
++     (a) DR_B is always a write;
++     (b) the accesses are well-ordered in both the original and new code
++	 (see the comment above the DR_ALIAS_* flags for details); and
++     (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR.  */
++  if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
++    return false;
++
++  /* Check for equal (but possibly variable) steps.  */
++  tree step = DR_STEP (dr_a.dr);
++  if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
++    return false;
++
++  /* Make sure that we can operate on sizetype without loss of precision.  */
++  tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
++  if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
++    return false;
++
++  /* All addresses involved are known to have a common alignment ALIGN.
++     We can therefore subtract ALIGN from an exclusive endpoint to get
++     an inclusive endpoint.  In the best (and common) case, ALIGN is the
++     same as the access sizes of both DRs, and so subtracting ALIGN
++     cancels out the addition of an access size.  */
++  unsigned int align = MIN (dr_a.align, dr_b.align);
++  poly_uint64 last_chunk_a = dr_a.access_size - align;
++  poly_uint64 last_chunk_b = dr_b.access_size - align;
++
++  /* Get a boolean expression that is true when the step is negative.  */
++  tree indicator = dr_direction_indicator (dr_a.dr);
++  tree neg_step = fold_build2 (LT_EXPR, boolean_type_node,
++			       fold_convert (ssizetype, indicator),
++			       ssize_int (0));
++
++  /* Get lengths in sizetype.  */
++  tree seg_len_a
++    = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
++  step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
++
++  /* Each access has the following pattern:
++
++	  <- |seg_len| ->
++	  <--- A: -ve step --->
++	  +-----+-------+-----+-------+-----+
++	  | n-1 | ..... |  0  | ..... | n-1 |
++	  +-----+-------+-----+-------+-----+
++			<--- B: +ve step --->
++			<- |seg_len| ->
++			|
++		   base address
++
++     where "n" is the number of scalar iterations covered by the segment.
++
++     A is the range of bytes accessed when the step is negative,
++     B is the range when the step is positive.
++
++     We know that DR_B is a write.  We also know (from checking that
++     DR_A and DR_B are well-ordered) that for each i in [0, n-1],
++     the write performed by access i of DR_B occurs after access numbers
++     j<=i of DR_A in both the original and the new code.  Any write or
++     anti dependencies wrt those DR_A accesses are therefore maintained.
++
++     We just need to make sure that each individual write in DR_B does not
++     overlap any higher-indexed access in DR_A; such DR_A accesses happen
++     after the DR_B access in the original code but happen before it in
++     the new code.
++
++     We know the steps for both accesses are equal, so by induction, we
++     just need to test whether the first write of DR_B overlaps a later
++     access of DR_A.  In other words, we need to move addr_a along by
++     one iteration:
++
++       addr_a' = addr_a + step
++
++     and check whether:
++
++       [addr_b, addr_b + last_chunk_b]
++
++     overlaps:
++
++       [addr_a' + low_offset_a, addr_a' + high_offset_a + last_chunk_a]
++
++     where [low_offset_a, high_offset_a] spans accesses [1, n-1].  I.e.:
++
++	low_offset_a = +ve step ? 0 : seg_len_a - step
++       high_offset_a = +ve step ? seg_len_a - step : 0
++
++     This is equivalent to testing whether:
++
++       addr_a' + low_offset_a <= addr_b + last_chunk_b
++       && addr_b <= addr_a' + high_offset_a + last_chunk_a
++
++     Converting this into a single test, there is an overlap if:
++
++       0 <= addr_b + last_chunk_b - addr_a' - low_offset_a <= limit
++
++     where limit = high_offset_a - low_offset_a + last_chunk_a + last_chunk_b
++
++     If DR_A is performed, limit + |step| - last_chunk_b is known to be
++     less than the size of the object underlying DR_A.  We also know
++     that last_chunk_b <= |step|; this is checked elsewhere if it isn't
++     guaranteed at compile time.  There can therefore be no overflow if
++     "limit" is calculated in an unsigned type with pointer precision.  */
++  tree addr_a = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_a.dr),
++					 DR_OFFSET (dr_a.dr));
++  addr_a = fold_build_pointer_plus (addr_a, DR_INIT (dr_a.dr));
++
++  tree addr_b = fold_build_pointer_plus (DR_BASE_ADDRESS (dr_b.dr),
++					 DR_OFFSET (dr_b.dr));
++  addr_b = fold_build_pointer_plus (addr_b, DR_INIT (dr_b.dr));
++
++  /* Advance ADDR_A by one iteration and adjust the length to compensate.  */
++  addr_a = fold_build_pointer_plus (addr_a, step);
++  tree seg_len_a_minus_step = fold_build2 (MINUS_EXPR, sizetype,
++					   seg_len_a, step);
++  if (!CONSTANT_CLASS_P (seg_len_a_minus_step))
++    seg_len_a_minus_step = build1 (SAVE_EXPR, sizetype, seg_len_a_minus_step);
++
++  tree low_offset_a = fold_build3 (COND_EXPR, sizetype, neg_step,
++				   seg_len_a_minus_step, size_zero_node);
++  if (!CONSTANT_CLASS_P (low_offset_a))
++    low_offset_a = build1 (SAVE_EXPR, sizetype, low_offset_a);
++
++  /* We could use COND_EXPR <neg_step, size_zero_node, seg_len_a_minus_step>,
++     but it's usually more efficient to reuse the LOW_OFFSET_A result.  */
++  tree high_offset_a = fold_build2 (MINUS_EXPR, sizetype, seg_len_a_minus_step,
++				    low_offset_a);
++
++  /* The amount added to addr_b - addr_a'.  */
++  tree bias = fold_build2 (MINUS_EXPR, sizetype,
++			   size_int (last_chunk_b), low_offset_a);
++
++  tree limit = fold_build2 (MINUS_EXPR, sizetype, high_offset_a, low_offset_a);
++  limit = fold_build2 (PLUS_EXPR, sizetype, limit,
++		       size_int (last_chunk_a + last_chunk_b));
++
++  tree subject = fold_build2 (POINTER_DIFF_EXPR, ssizetype, addr_b, addr_a);
++  subject = fold_build2 (PLUS_EXPR, sizetype,
++			 fold_convert (sizetype, subject), bias);
++
++  *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
++  if (dump_enabled_p ())
++    dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
+   return true;
+ }
+ 
+@@ -1866,24 +2235,29 @@ get_segment_min_max (const dr_with_seg_len &d, tree *seg_min_out,
+   *seg_max_out = fold_build_pointer_plus (addr_base, max_reach);
+ }
+ 
+-/* Given two data references and segment lengths described by DR_A and DR_B,
+-   create expression checking if the two addresses ranges intersect with
+-   each other:
++/* Generate a runtime condition that is true if ALIAS_PAIR is free of aliases,
++   storing the condition in *COND_EXPR.  The fallback is to generate a
++   a test that the two accesses do not overlap:
+ 
+-     ((DR_A_addr_0 + DR_A_segment_length_0) <= DR_B_addr_0)
+-     || (DR_B_addr_0 + DER_B_segment_length_0) <= DR_A_addr_0))  */
++     end_a <= start_b || end_b <= start_a.  */
+ 
+ static void
+ create_intersect_range_checks (struct loop *loop, tree *cond_expr,
+-			       const dr_with_seg_len& dr_a,
+-			       const dr_with_seg_len& dr_b)
++			       const dr_with_seg_len_pair_t &alias_pair)
+ {
++  const dr_with_seg_len& dr_a = alias_pair.first;
++  const dr_with_seg_len& dr_b = alias_pair.second;
+   *cond_expr = NULL_TREE;
+-  if (create_intersect_range_checks_index (loop, cond_expr, dr_a, dr_b))
++  if (create_intersect_range_checks_index (loop, cond_expr, alias_pair))
++    return;
++
++  if (create_waw_or_war_checks (cond_expr, alias_pair))
+     return;
+ 
+   unsigned HOST_WIDE_INT min_align;
+   tree_code cmp_code;
++  /* We don't have to check DR_ALIAS_MIXED_STEPS here, since both versions
++     are equivalent.  This is just an optimization heuristic.  */
+   if (TREE_CODE (DR_STEP (dr_a.dr)) == INTEGER_CST
+       && TREE_CODE (DR_STEP (dr_b.dr)) == INTEGER_CST)
+     {
+@@ -1924,6 +2298,8 @@ create_intersect_range_checks (struct loop *loop, tree *cond_expr,
+     = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
+ 	fold_build2 (cmp_code, boolean_type_node, seg_a_max, seg_b_min),
+ 	fold_build2 (cmp_code, boolean_type_node, seg_b_max, seg_a_min));
++  if (dump_enabled_p ())
++    dump_printf (MSG_NOTE, "using an address-based overlap test\n");
+ }
+ 
+ /* Create a conditional expression that represents the run-time checks for
+@@ -1940,18 +2316,19 @@ create_runtime_alias_checks (struct loop *loop,
+   tree part_cond_expr;
+ 
+   fold_defer_overflow_warnings ();
+-  for (size_t i = 0, s = alias_pairs->length (); i < s; ++i)
++  dr_with_seg_len_pair_t *alias_pair;
++  unsigned int i;
++  FOR_EACH_VEC_ELT (*alias_pairs, i, alias_pair)
+     {
+-      const dr_with_seg_len& dr_a = (*alias_pairs)[i].first;
+-      const dr_with_seg_len& dr_b = (*alias_pairs)[i].second;
+-
++      gcc_assert (alias_pair->flags);
+       if (dump_enabled_p ())
+ 	dump_printf (MSG_NOTE,
+ 		     "create runtime check for data references %T and %T\n",
+-		     DR_REF (dr_a.dr), DR_REF (dr_b.dr));
++		     DR_REF (alias_pair->first.dr),
++		     DR_REF (alias_pair->second.dr));
+ 
+       /* Create condition expression for each pair data references.  */
+-      create_intersect_range_checks (loop, &part_cond_expr, dr_a, dr_b);
++      create_intersect_range_checks (loop, &part_cond_expr, *alias_pair);
+       if (*cond_expr)
+ 	*cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
+ 				  *cond_expr, part_cond_expr);
+diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
+index 70cbb03b49c..9cb48a2ea3e 100644
+--- a/gcc/tree-data-ref.h
++++ b/gcc/tree-data-ref.h
+@@ -221,19 +221,113 @@ struct dr_with_seg_len
+   unsigned int align;
+ };
+ 
++/* Flags that describe a potential alias between two dr_with_seg_lens.
++   In general, each pair of dr_with_seg_lens represents a composite of
++   multiple access pairs P, so testing flags like DR_IS_READ on the DRs
++   does not give meaningful information.
++
++   DR_ALIAS_RAW:
++	There is a pair in P for which the second reference is a read
++	and the first is a write.
++
++   DR_ALIAS_WAR:
++	There is a pair in P for which the second reference is a write
++	and the first is a read.
++
++   DR_ALIAS_WAW:
++	There is a pair in P for which both references are writes.
++
++   DR_ALIAS_ARBITRARY:
++	Either
++	(a) it isn't possible to classify one pair in P as RAW, WAW or WAR; or
++	(b) there is a pair in P that breaks the ordering assumption below.
++
++	This flag overrides the RAW, WAR and WAW flags above.
++
++   DR_ALIAS_UNSWAPPED:
++   DR_ALIAS_SWAPPED:
++	Temporary flags that indicate whether there is a pair P whose
++	DRs have or haven't been swapped around.
++
++   DR_ALIAS_MIXED_STEPS:
++	The DR_STEP for one of the data references in the pair does not
++	accurately describe that reference for all members of P.  (Note
++	that the flag does not say anything about whether the DR_STEPs
++	of the two references in the pair are the same.)
++
++   The ordering assumption mentioned above is that for every pair
++   (DR_A, DR_B) in P:
++
++   (1) The original code accesses n elements for DR_A and n elements for DR_B,
++       interleaved as follows:
++
++	 one access of size DR_A.access_size at DR_A.dr
++	 one access of size DR_B.access_size at DR_B.dr
++	 one access of size DR_A.access_size at DR_A.dr + STEP_A
++	 one access of size DR_B.access_size at DR_B.dr + STEP_B
++	 one access of size DR_A.access_size at DR_A.dr + STEP_A * 2
++	 one access of size DR_B.access_size at DR_B.dr + STEP_B * 2
++	 ...
++
++   (2) The new code accesses the same data in exactly two chunks:
++
++	 one group of accesses spanning |DR_A.seg_len| + DR_A.access_size
++	 one group of accesses spanning |DR_B.seg_len| + DR_B.access_size
++
++   A pair might break this assumption if the DR_A and DR_B accesses
++   in the original or the new code are mingled in some way.  For example,
++   if DR_A.access_size represents the effect of two individual writes
++   to nearby locations, the pair breaks the assumption if those writes
++   occur either side of the access for DR_B.
++
++   Note that DR_ALIAS_ARBITRARY describes whether the ordering assumption
++   fails to hold for any individual pair in P.  If the assumption *does*
++   hold for every pair in P, it doesn't matter whether it holds for the
++   composite pair or not.  In other words, P should represent the complete
++   set of pairs that the composite pair is testing, so only the ordering
++   of two accesses in the same member of P matters.  */
++const unsigned int DR_ALIAS_RAW = 1U << 0;
++const unsigned int DR_ALIAS_WAR = 1U << 1;
++const unsigned int DR_ALIAS_WAW = 1U << 2;
++const unsigned int DR_ALIAS_ARBITRARY = 1U << 3;
++const unsigned int DR_ALIAS_SWAPPED = 1U << 4;
++const unsigned int DR_ALIAS_UNSWAPPED = 1U << 5;
++const unsigned int DR_ALIAS_MIXED_STEPS = 1U << 6;
++
+ /* This struct contains two dr_with_seg_len objects with aliasing data
+    refs.  Two comparisons are generated from them.  */
+ 
+ struct dr_with_seg_len_pair_t
+ {
+-  dr_with_seg_len_pair_t (const dr_with_seg_len& d1,
+-			       const dr_with_seg_len& d2)
+-    : first (d1), second (d2) {}
++  /* WELL_ORDERED indicates that the ordering assumption described above
++     DR_ALIAS_ARBITRARY holds.  REORDERED indicates that it doesn't.  */
++  enum sequencing { WELL_ORDERED, REORDERED };
++
++  dr_with_seg_len_pair_t (const dr_with_seg_len &,
++			  const dr_with_seg_len &, sequencing);
+ 
+   dr_with_seg_len first;
+   dr_with_seg_len second;
++  unsigned int flags;
+ };
+ 
++inline dr_with_seg_len_pair_t::
++dr_with_seg_len_pair_t (const dr_with_seg_len &d1, const dr_with_seg_len &d2,
++			sequencing seq)
++  : first (d1), second (d2), flags (0)
++{
++  if (DR_IS_READ (d1.dr) && DR_IS_WRITE (d2.dr))
++    flags |= DR_ALIAS_WAR;
++  else if (DR_IS_WRITE (d1.dr) && DR_IS_READ (d2.dr))
++    flags |= DR_ALIAS_RAW;
++  else if (DR_IS_WRITE (d1.dr) && DR_IS_WRITE (d2.dr))
++    flags |= DR_ALIAS_WAW;
++  else
++    gcc_unreachable ();
++  if (seq == REORDERED)
++    flags |= DR_ALIAS_ARBITRARY;
++}
++
+ enum data_dependence_direction {
+   dir_positive,
+   dir_negative,
+diff --git a/gcc/tree-if-conv.c b/gcc/tree-if-conv.c
+index 2780a4b243f..bd946e14eb6 100644
+--- a/gcc/tree-if-conv.c
++++ b/gcc/tree-if-conv.c
+@@ -120,6 +120,7 @@ along with GCC; see the file COPYING3.  If not see
+ #include "fold-const.h"
+ #include "tree-ssa-sccvn.h"
+ #include "tree-cfgcleanup.h"
++#include "tree-ssa-dse.h"
+ 
+ /* Only handle PHIs with no more arguments unless we are asked to by
+    simd pragma.  */
+@@ -2884,7 +2885,7 @@ ifcvt_split_critical_edges (struct loop *loop, bool aggressive_if_conv)
+    loop vectorization.  */
+ 
+ static void
+-ifcvt_local_dce (basic_block bb)
++ifcvt_local_dce (class loop *loop)
+ {
+   gimple *stmt;
+   gimple *stmt1;
+@@ -2901,6 +2902,10 @@ ifcvt_local_dce (basic_block bb)
+     replace_uses_by (name_pair->first, name_pair->second);
+   redundant_ssa_names.release ();
+ 
++  /* The loop has a single BB only.  */
++  basic_block bb = loop->header;
++  tree latch_vdef = NULL_TREE;
++
+   worklist.create (64);
+   /* Consider all phi as live statements.  */
+   for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+@@ -2908,6 +2913,8 @@ ifcvt_local_dce (basic_block bb)
+       phi = gsi_stmt (gsi);
+       gimple_set_plf (phi, GF_PLF_2, true);
+       worklist.safe_push (phi);
++      if (virtual_operand_p (gimple_phi_result (phi)))
++	latch_vdef = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
+     }
+   /* Consider load/store statements, CALL and COND as live.  */
+   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
+@@ -2971,6 +2978,19 @@ ifcvt_local_dce (basic_block bb)
+   while (!gsi_end_p (gsi))
+     {
+       stmt = gsi_stmt (gsi);
++      if (gimple_store_p (stmt))
++	{
++	  tree lhs = gimple_get_lhs (stmt);
++	  ao_ref write;
++	  ao_ref_init (&write, lhs);
++
++          if (dse_classify_store (&write, stmt, false, NULL, NULL, latch_vdef)
++              == DSE_STORE_DEAD)
++            delete_dead_or_redundant_assignment (&gsi, "dead");
++	  gsi_next (&gsi);
++	  continue;
++	}
++
+       if (gimple_plf (stmt, GF_PLF_2))
+ 	{
+ 	  gsi_next (&gsi);
+@@ -3071,9 +3091,6 @@ tree_if_conversion (struct loop *loop, vec<gimple *> *preds)
+      on-the-fly.  */
+   combine_blocks (loop);
+ 
+-  /* Delete dead predicate computations.  */
+-  ifcvt_local_dce (loop->header);
+-
+   /* Perform local CSE, this esp. helps the vectorizer analysis if loads
+      and stores are involved.  CSE only the loop body, not the entry
+      PHIs, those are to be kept in sync with the non-if-converted copy.
+@@ -3082,6 +3099,9 @@ tree_if_conversion (struct loop *loop, vec<gimple *> *preds)
+   bitmap_set_bit (exit_bbs, single_exit (loop)->dest->index);
+   bitmap_set_bit (exit_bbs, loop->latch->index);
+   todo |= do_rpo_vn (cfun, loop_preheader_edge (loop), exit_bbs);
++
++  /* Delete dead predicate computations.  */
++  ifcvt_local_dce (loop);
+   BITMAP_FREE (exit_bbs);
+ 
+   todo |= TODO_cleanup_cfg;
+diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
+index d115fcb1a5b..2fbcd6e3e46 100644
+--- a/gcc/tree-inline.c
++++ b/gcc/tree-inline.c
+@@ -6201,11 +6201,11 @@ tree_function_versioning (tree old_decl, tree new_decl,
+ 	     in the debug info that var (whole DECL_ORIGIN is the parm
+ 	     PARM_DECL) is optimized away, but could be looked up at the
+ 	     call site as value of D#X there.  */
+-	  tree var = vars, vexpr;
++	  tree vexpr;
+ 	  gimple_stmt_iterator cgsi
+ 	    = gsi_after_labels (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+ 	  gimple *def_temp;
+-	  var = vars;
++	  tree var = vars;
+ 	  i = vec_safe_length (*debug_args);
+ 	  do
+ 	    {
+diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
+index 8959f52a67b..a002bcd57b2 100644
+--- a/gcc/tree-loop-distribution.c
++++ b/gcc/tree-loop-distribution.c
+@@ -2445,12 +2445,6 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
+       struct data_reference *dr_a = DDR_A (ddr);
+       struct data_reference *dr_b = DDR_B (ddr);
+       tree seg_length_a, seg_length_b;
+-      int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
+-					    DR_BASE_ADDRESS (dr_b));
+-
+-      if (comp_res == 0)
+-	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
+-      gcc_assert (comp_res != 0);
+ 
+       if (latch_dominated_by_data_ref (loop, dr_a))
+ 	seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
+@@ -2471,11 +2465,9 @@ compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs,
+ 
+       dr_with_seg_len_pair_t dr_with_seg_len_pair
+ 	(dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
+-	 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b));
+-
+-      /* Canonicalize pairs by sorting the two DR members.  */
+-      if (comp_res > 0)
+-	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
++	 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
++	 /* ??? Would WELL_ORDERED be safe?  */
++	 dr_with_seg_len_pair_t::REORDERED);
+ 
+       comp_alias_pairs->safe_push (dr_with_seg_len_pair);
+     }
+diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
+index dad6e2884db..e841da66db5 100644
+--- a/gcc/tree-parloops.c
++++ b/gcc/tree-parloops.c
+@@ -88,7 +88,8 @@ along with GCC; see the file COPYING3.  If not see
+    More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC  */
+ /*
+   Reduction handling:
+-  currently we use vect_force_simple_reduction() to detect reduction patterns.
++  currently we use code inspired by vect_force_simple_reduction to detect
++  reduction patterns.
+   The code transformation will be introduced by an example.
+ 
+ 
+@@ -182,6 +183,717 @@ parloop
+ 
+ */
+ 
++/* Error reporting helper for parloops_is_simple_reduction below.  GIMPLE
++   statement STMT is printed with a message MSG. */
++
++static void
++report_ploop_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
++{
++  dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
++}
++
++/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
++   operation.  Return true if the results of DEF_STMT_INFO are something
++   that can be accumulated by such a reduction.  */
++
++static bool
++parloops_valid_reduction_input_p (stmt_vec_info def_stmt_info)
++{
++  return (is_gimple_assign (def_stmt_info->stmt)
++	  || is_gimple_call (def_stmt_info->stmt)
++	  || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
++	  || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
++	      && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
++	      && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
++}
++
++/* Detect SLP reduction of the form:
++
++   #a1 = phi <a5, a0>
++   a2 = operation (a1)
++   a3 = operation (a2)
++   a4 = operation (a3)
++   a5 = operation (a4)
++
++   #a = phi <a5>
++
++   PHI is the reduction phi node (#a1 = phi <a5, a0> above)
++   FIRST_STMT is the first reduction stmt in the chain
++   (a2 = operation (a1)).
++
++   Return TRUE if a reduction chain was detected.  */
++
++static bool
++parloops_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
++			   gimple *first_stmt)
++{
++  class loop *loop = (gimple_bb (phi))->loop_father;
++  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
++  enum tree_code code;
++  gimple *loop_use_stmt = NULL;
++  stmt_vec_info use_stmt_info;
++  tree lhs;
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++  int nloop_uses, size = 0, n_out_of_loop_uses;
++  bool found = false;
++
++  if (loop != vect_loop)
++    return false;
++
++  auto_vec<stmt_vec_info, 8> reduc_chain;
++  lhs = PHI_RESULT (phi);
++  code = gimple_assign_rhs_code (first_stmt);
++  while (1)
++    {
++      nloop_uses = 0;
++      n_out_of_loop_uses = 0;
++      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
++        {
++	  gimple *use_stmt = USE_STMT (use_p);
++	  if (is_gimple_debug (use_stmt))
++	    continue;
++
++          /* Check if we got back to the reduction phi.  */
++	  if (use_stmt == phi)
++            {
++	      loop_use_stmt = use_stmt;
++              found = true;
++              break;
++            }
++
++          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
++            {
++	      loop_use_stmt = use_stmt;
++	      nloop_uses++;
++            }
++           else
++             n_out_of_loop_uses++;
++
++           /* There are can be either a single use in the loop or two uses in
++              phi nodes.  */
++           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
++             return false;
++        }
++
++      if (found)
++        break;
++
++      /* We reached a statement with no loop uses.  */
++      if (nloop_uses == 0)
++	return false;
++
++      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
++      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
++        return false;
++
++      if (!is_gimple_assign (loop_use_stmt)
++	  || code != gimple_assign_rhs_code (loop_use_stmt)
++	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
++        return false;
++
++      /* Insert USE_STMT into reduction chain.  */
++      use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
++      reduc_chain.safe_push (use_stmt_info);
++
++      lhs = gimple_assign_lhs (loop_use_stmt);
++      size++;
++   }
++
++  if (!found || loop_use_stmt != phi || size < 2)
++    return false;
++
++  /* Swap the operands, if needed, to make the reduction operand be the second
++     operand.  */
++  lhs = PHI_RESULT (phi);
++  for (unsigned i = 0; i < reduc_chain.length (); ++i)
++    {
++      gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
++      if (gimple_assign_rhs2 (next_stmt) == lhs)
++	{
++	  tree op = gimple_assign_rhs1 (next_stmt);
++	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
++
++	  /* Check that the other def is either defined in the loop
++	     ("vect_internal_def"), or it's an induction (defined by a
++	     loop-header phi-node).  */
++	  if (def_stmt_info
++	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
++	      && parloops_valid_reduction_input_p (def_stmt_info))
++	    {
++	      lhs = gimple_assign_lhs (next_stmt);
++	      continue;
++	    }
++
++	  return false;
++	}
++      else
++	{
++          tree op = gimple_assign_rhs2 (next_stmt);
++	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
++
++          /* Check that the other def is either defined in the loop
++            ("vect_internal_def"), or it's an induction (defined by a
++            loop-header phi-node).  */
++	  if (def_stmt_info
++	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
++	      && parloops_valid_reduction_input_p (def_stmt_info))
++	    {
++	      if (dump_enabled_p ())
++		dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
++				 next_stmt);
++
++	      swap_ssa_operands (next_stmt,
++				 gimple_assign_rhs1_ptr (next_stmt),
++                                 gimple_assign_rhs2_ptr (next_stmt));
++	      update_stmt (next_stmt);
++
++	      if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
++		LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
++	    }
++	  else
++	    return false;
++        }
++
++      lhs = gimple_assign_lhs (next_stmt);
++    }
++
++  /* Build up the actual chain.  */
++  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
++    {
++      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
++      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
++    }
++  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
++  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
++
++  /* Save the chain for further analysis in SLP detection.  */
++  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
++  REDUC_GROUP_SIZE (reduc_chain[0]) = size;
++
++  return true;
++}
++
++/* Return true if we need an in-order reduction for operation CODE
++   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
++   overflow must wrap.  */
++
++static bool
++parloops_needs_fold_left_reduction_p (tree type, tree_code code,
++				      bool need_wrapping_integral_overflow)
++{
++  /* CHECKME: check for !flag_finite_math_only too?  */
++  if (SCALAR_FLOAT_TYPE_P (type))
++    switch (code)
++      {
++      case MIN_EXPR:
++      case MAX_EXPR:
++	return false;
++
++      default:
++	return !flag_associative_math;
++      }
++
++  if (INTEGRAL_TYPE_P (type))
++    {
++      if (!operation_no_trapping_overflow (type, code))
++	return true;
++      if (need_wrapping_integral_overflow
++	  && !TYPE_OVERFLOW_WRAPS (type)
++	  && operation_can_overflow (code))
++	return true;
++      return false;
++    }
++
++  if (SAT_FIXED_POINT_TYPE_P (type))
++    return true;
++
++  return false;
++}
++
++
++/* Function parloops_is_simple_reduction
++
++   (1) Detect a cross-iteration def-use cycle that represents a simple
++   reduction computation.  We look for the following pattern:
++
++   loop_header:
++     a1 = phi < a0, a2 >
++     a3 = ...
++     a2 = operation (a3, a1)
++
++   or
++
++   a3 = ...
++   loop_header:
++     a1 = phi < a0, a2 >
++     a2 = operation (a3, a1)
++
++   such that:
++   1. operation is commutative and associative and it is safe to
++      change the order of the computation
++   2. no uses for a2 in the loop (a2 is used out of the loop)
++   3. no uses of a1 in the loop besides the reduction operation
++   4. no uses of a1 outside the loop.
++
++   Conditions 1,4 are tested here.
++   Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized.
++
++   (2) Detect a cross-iteration def-use cycle in nested loops, i.e.,
++   nested cycles.
++
++   (3) Detect cycles of phi nodes in outer-loop vectorization, i.e., double
++   reductions:
++
++     a1 = phi < a0, a2 >
++     inner loop (def of a3)
++     a2 = phi < a3 >
++
++   (4) Detect condition expressions, ie:
++     for (int i = 0; i < N; i++)
++       if (a[i] < val)
++	ret_val = a[i];
++
++*/
++
++static stmt_vec_info
++parloops_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
++			  bool *double_reduc,
++			  bool need_wrapping_integral_overflow,
++			  enum vect_reduction_type *v_reduc_type)
++{
++  gphi *phi = as_a <gphi *> (phi_info->stmt);
++  class loop *loop = (gimple_bb (phi))->loop_father;
++  class loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
++  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
++  gimple *phi_use_stmt = NULL;
++  enum tree_code orig_code, code;
++  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
++  tree type;
++  tree name;
++  imm_use_iterator imm_iter;
++  use_operand_p use_p;
++  bool phi_def;
++
++  *double_reduc = false;
++  *v_reduc_type = TREE_CODE_REDUCTION;
++
++  tree phi_name = PHI_RESULT (phi);
++  /* ???  If there are no uses of the PHI result the inner loop reduction
++     won't be detected as possibly double-reduction by vectorizable_reduction
++     because that tries to walk the PHI arg from the preheader edge which
++     can be constant.  See PR60382.  */
++  if (has_zero_uses (phi_name))
++    return NULL;
++  unsigned nphi_def_loop_uses = 0;
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
++    {
++      gimple *use_stmt = USE_STMT (use_p);
++      if (is_gimple_debug (use_stmt))
++	continue;
++
++      if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
++        {
++          if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "intermediate value used outside loop.\n");
++
++          return NULL;
++        }
++
++      nphi_def_loop_uses++;
++      phi_use_stmt = use_stmt;
++    }
++
++  edge latch_e = loop_latch_edge (loop);
++  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
++  if (TREE_CODE (loop_arg) != SSA_NAME)
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "reduction: not ssa_name: %T\n", loop_arg);
++      return NULL;
++    }
++
++  stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
++  if (!def_stmt_info
++      || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
++    return NULL;
++
++  if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
++    {
++      name = gimple_assign_lhs (def_stmt);
++      phi_def = false;
++    }
++  else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
++    {
++      name = PHI_RESULT (def_stmt);
++      phi_def = true;
++    }
++  else
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "reduction: unhandled reduction operation: %G",
++			 def_stmt_info->stmt);
++      return NULL;
++    }
++
++  unsigned nlatch_def_loop_uses = 0;
++  auto_vec<gphi *, 3> lcphis;
++  bool inner_loop_of_double_reduc = false;
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
++    {
++      gimple *use_stmt = USE_STMT (use_p);
++      if (is_gimple_debug (use_stmt))
++	continue;
++      if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
++	nlatch_def_loop_uses++;
++      else
++	{
++	  /* We can have more than one loop-closed PHI.  */
++	  lcphis.safe_push (as_a <gphi *> (use_stmt));
++	  if (nested_in_vect_loop
++	      && (STMT_VINFO_DEF_TYPE (loop_info->lookup_stmt (use_stmt))
++		  == vect_double_reduction_def))
++	    inner_loop_of_double_reduc = true;
++	}
++    }
++
++  /* If this isn't a nested cycle or if the nested cycle reduction value
++     is used ouside of the inner loop we cannot handle uses of the reduction
++     value.  */
++  if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
++      && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "reduction used in loop.\n");
++      return NULL;
++    }
++
++  /* If DEF_STMT is a phi node itself, we expect it to have a single argument
++     defined in the inner loop.  */
++  if (phi_def)
++    {
++      gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
++      op1 = PHI_ARG_DEF (def_stmt, 0);
++
++      if (gimple_phi_num_args (def_stmt) != 1
++          || TREE_CODE (op1) != SSA_NAME)
++        {
++          if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "unsupported phi node definition.\n");
++
++          return NULL;
++        }
++
++      gimple *def1 = SSA_NAME_DEF_STMT (op1);
++      if (gimple_bb (def1)
++	  && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
++          && loop->inner
++          && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
++          && is_gimple_assign (def1)
++	  && is_a <gphi *> (phi_use_stmt)
++	  && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
++        {
++          if (dump_enabled_p ())
++            report_ploop_op (MSG_NOTE, def_stmt,
++			     "detected double reduction: ");
++
++          *double_reduc = true;
++	  return def_stmt_info;
++        }
++
++      return NULL;
++    }
++
++  /* If we are vectorizing an inner reduction we are executing that
++     in the original order only in case we are not dealing with a
++     double reduction.  */
++  bool check_reduction = true;
++  if (flow_loop_nested_p (vect_loop, loop))
++    {
++      gphi *lcphi;
++      unsigned i;
++      check_reduction = false;
++      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
++	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
++	  {
++	    gimple *use_stmt = USE_STMT (use_p);
++	    if (is_gimple_debug (use_stmt))
++	      continue;
++	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
++	      check_reduction = true;
++	  }
++    }
++
++  gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
++  code = orig_code = gimple_assign_rhs_code (def_stmt);
++
++  if (nested_in_vect_loop && !check_reduction)
++    {
++      /* FIXME: Even for non-reductions code generation is funneled
++	 through vectorizable_reduction for the stmt defining the
++	 PHI latch value.  So we have to artificially restrict ourselves
++	 for the supported operations.  */
++      switch (get_gimple_rhs_class (code))
++	{
++	case GIMPLE_BINARY_RHS:
++	case GIMPLE_TERNARY_RHS:
++	  break;
++	default:
++	  /* Not supported by vectorizable_reduction.  */
++	  if (dump_enabled_p ())
++	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
++			     "nested cycle: not handled operation: ");
++	  return NULL;
++	}
++      if (dump_enabled_p ())
++	report_ploop_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
++      return def_stmt_info;
++    }
++
++  /* We can handle "res -= x[i]", which is non-associative by
++     simply rewriting this into "res += -x[i]".  Avoid changing
++     gimple instruction for the first simple tests and only do this
++     if we're allowed to change code at all.  */
++  if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
++    code = PLUS_EXPR;
++
++  if (code == COND_EXPR)
++    {
++      if (! nested_in_vect_loop)
++	*v_reduc_type = COND_REDUCTION;
++
++      op3 = gimple_assign_rhs1 (def_stmt);
++      if (COMPARISON_CLASS_P (op3))
++        {
++          op4 = TREE_OPERAND (op3, 1);
++          op3 = TREE_OPERAND (op3, 0);
++        }
++      if (op3 == phi_name || op4 == phi_name)
++	{
++	  if (dump_enabled_p ())
++	    report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
++			     "reduction: condition depends on previous"
++			     " iteration: ");
++	  return NULL;
++	}
++
++      op1 = gimple_assign_rhs2 (def_stmt);
++      op2 = gimple_assign_rhs3 (def_stmt);
++    }
++  else if (!commutative_tree_code (code) || !associative_tree_code (code))
++    {
++      if (dump_enabled_p ())
++	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
++			 "reduction: not commutative/associative: ");
++      return NULL;
++    }
++  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
++    {
++      op1 = gimple_assign_rhs1 (def_stmt);
++      op2 = gimple_assign_rhs2 (def_stmt);
++    }
++  else
++    {
++      if (dump_enabled_p ())
++	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
++			 "reduction: not handled operation: ");
++      return NULL;
++    }
++
++  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
++    {
++      if (dump_enabled_p ())
++	report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
++			 "reduction: both uses not ssa_names: ");
++
++      return NULL;
++    }
++
++  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
++  if ((TREE_CODE (op1) == SSA_NAME
++       && !types_compatible_p (type,TREE_TYPE (op1)))
++      || (TREE_CODE (op2) == SSA_NAME
++          && !types_compatible_p (type, TREE_TYPE (op2)))
++      || (op3 && TREE_CODE (op3) == SSA_NAME
++          && !types_compatible_p (type, TREE_TYPE (op3)))
++      || (op4 && TREE_CODE (op4) == SSA_NAME
++          && !types_compatible_p (type, TREE_TYPE (op4))))
++    {
++      if (dump_enabled_p ())
++        {
++          dump_printf_loc (MSG_NOTE, vect_location,
++			   "reduction: multiple types: operation type: "
++			   "%T, operands types: %T,%T",
++			   type,  TREE_TYPE (op1), TREE_TYPE (op2));
++          if (op3)
++	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
++
++          if (op4)
++	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
++          dump_printf (MSG_NOTE, "\n");
++        }
++
++      return NULL;
++    }
++
++  /* Check whether it's ok to change the order of the computation.
++     Generally, when vectorizing a reduction we change the order of the
++     computation.  This may change the behavior of the program in some
++     cases, so we need to check that this is ok.  One exception is when
++     vectorizing an outer-loop: the inner-loop is executed sequentially,
++     and therefore vectorizing reductions in the inner-loop during
++     outer-loop vectorization is safe.  */
++  if (check_reduction
++      && *v_reduc_type == TREE_CODE_REDUCTION
++      && parloops_needs_fold_left_reduction_p (type, code,
++					       need_wrapping_integral_overflow))
++    *v_reduc_type = FOLD_LEFT_REDUCTION;
++
++  /* Reduction is safe. We're dealing with one of the following:
++     1) integer arithmetic and no trapv
++     2) floating point arithmetic, and special flags permit this optimization
++     3) nested cycle (i.e., outer loop vectorization).  */
++  stmt_vec_info def1_info = loop_info->lookup_def (op1);
++  stmt_vec_info def2_info = loop_info->lookup_def (op2);
++  if (code != COND_EXPR && !def1_info && !def2_info)
++    {
++      if (dump_enabled_p ())
++	report_ploop_op (MSG_NOTE, def_stmt,
++			 "reduction: no defs for operands: ");
++      return NULL;
++    }
++
++  /* Check that one def is the reduction def, defined by PHI,
++     the other def is either defined in the loop ("vect_internal_def"),
++     or it's an induction (defined by a loop-header phi-node).  */
++
++  if (def2_info
++      && def2_info->stmt == phi
++      && (code == COND_EXPR
++	  || !def1_info
++	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
++	  || parloops_valid_reduction_input_p (def1_info)))
++    {
++      if (dump_enabled_p ())
++	report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
++      return def_stmt_info;
++    }
++
++  if (def1_info
++      && def1_info->stmt == phi
++      && (code == COND_EXPR
++	  || !def2_info
++	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
++	  || parloops_valid_reduction_input_p (def2_info)))
++    {
++      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
++	{
++	  /* Check if we can swap operands (just for simplicity - so that
++	     the rest of the code can assume that the reduction variable
++	     is always the last (second) argument).  */
++	  if (code == COND_EXPR)
++	    {
++	      /* Swap cond_expr by inverting the condition.  */
++	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
++	      enum tree_code invert_code = ERROR_MARK;
++	      enum tree_code cond_code = TREE_CODE (cond_expr);
++
++	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
++		{
++		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
++		  invert_code = invert_tree_comparison (cond_code, honor_nans);
++		}
++	      if (invert_code != ERROR_MARK)
++		{
++		  TREE_SET_CODE (cond_expr, invert_code);
++		  swap_ssa_operands (def_stmt,
++				     gimple_assign_rhs2_ptr (def_stmt),
++				     gimple_assign_rhs3_ptr (def_stmt));
++		}
++	      else
++		{
++		  if (dump_enabled_p ())
++		    report_ploop_op (MSG_NOTE, def_stmt,
++				     "detected reduction: cannot swap operands "
++				     "for cond_expr");
++		  return NULL;
++		}
++	    }
++	  else
++	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
++			       gimple_assign_rhs2_ptr (def_stmt));
++
++	  if (dump_enabled_p ())
++	    report_ploop_op (MSG_NOTE, def_stmt,
++			     "detected reduction: need to swap operands: ");
++
++	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
++	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
++        }
++      else
++        {
++          if (dump_enabled_p ())
++            report_ploop_op (MSG_NOTE, def_stmt, "detected reduction: ");
++        }
++
++      return def_stmt_info;
++    }
++
++  /* Try to find SLP reduction chain.  */
++  if (! nested_in_vect_loop
++      && code != COND_EXPR
++      && orig_code != MINUS_EXPR
++      && parloops_is_slp_reduction (loop_info, phi, def_stmt))
++    {
++      if (dump_enabled_p ())
++        report_ploop_op (MSG_NOTE, def_stmt,
++			 "reduction: detected reduction chain: ");
++
++      return def_stmt_info;
++    }
++
++  /* Look for the expression computing loop_arg from loop PHI result.  */
++  if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
++    return def_stmt_info;
++
++  if (dump_enabled_p ())
++    {
++      report_ploop_op (MSG_MISSED_OPTIMIZATION, def_stmt,
++		       "reduction: unknown pattern: ");
++    }
++
++  return NULL;
++}
++
++/* Wrapper around vect_is_simple_reduction, which will modify code
++   in-place if it enables detection of more reductions.  Arguments
++   as there.  */
++
++stmt_vec_info
++parloops_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
++			     bool *double_reduc,
++			     bool need_wrapping_integral_overflow)
++{
++  enum vect_reduction_type v_reduc_type;
++  stmt_vec_info def_info
++    = parloops_is_simple_reduction (loop_info, phi_info, double_reduc,
++				need_wrapping_integral_overflow,
++				&v_reduc_type);
++  if (def_info)
++    {
++      STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
++      STMT_VINFO_REDUC_DEF (phi_info) = def_info;
++      STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
++      STMT_VINFO_REDUC_DEF (def_info) = phi_info;
++    }
++  return def_info;
++}
++
+ /* Minimal number of iterations of a loop that should be executed in each
+    thread.  */
+ #define MIN_PER_THREAD PARAM_VALUE (PARAM_PARLOOPS_MIN_PER_THREAD)
+@@ -2614,9 +3326,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
+ 	continue;
+ 
+       stmt_vec_info reduc_stmt_info
+-	= vect_force_simple_reduction (simple_loop_info,
+-				       simple_loop_info->lookup_stmt (phi),
+-				       &double_reduc, true);
++	= parloops_force_simple_reduction (simple_loop_info,
++					   simple_loop_info->lookup_stmt (phi),
++					   &double_reduc, true);
+       if (!reduc_stmt_info || !valid_reduction_p (reduc_stmt_info))
+ 	continue;
+ 
+@@ -2663,9 +3375,9 @@ gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list
+ 	      stmt_vec_info inner_phi_info
+ 		= simple_loop_info->lookup_stmt (inner_phi);
+ 	      stmt_vec_info inner_reduc_stmt_info
+-		= vect_force_simple_reduction (simple_loop_info,
+-					       inner_phi_info,
+-					       &double_reduc, true);
++		= parloops_force_simple_reduction (simple_loop_info,
++						   inner_phi_info,
++						   &double_reduc, true);
+ 	      gcc_assert (!double_reduc);
+ 	      if (!inner_reduc_stmt_info
+ 		  || !valid_reduction_p (inner_reduc_stmt_info))
+diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
+index 01f095382d6..54e8adc8d7c 100644
+--- a/gcc/tree-ssa-alias.c
++++ b/gcc/tree-ssa-alias.c
+@@ -2535,13 +2535,36 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
+ 	  case BUILT_IN_MEMSET_CHK:
+ 	  case BUILT_IN_STRNCPY:
+ 	  case BUILT_IN_STPNCPY:
++	  case BUILT_IN_CALLOC:
+ 	    {
+ 	      /* For a must-alias check we need to be able to constrain
+ 		 the access properly.  */
+ 	      if (!ref->max_size_known_p ())
+ 		return false;
+-	      tree dest = gimple_call_arg (stmt, 0);
+-	      tree len = gimple_call_arg (stmt, 2);
++	      tree dest;
++	      tree len;
++
++	      /* In execution order a calloc call will never kill
++		 anything.  However, DSE will (ab)use this interface
++		 to ask if a calloc call writes the same memory locations
++		 as a later assignment, memset, etc.  So handle calloc
++		 in the expected way.  */
++	      if (DECL_FUNCTION_CODE (callee) == BUILT_IN_CALLOC)
++		{
++		  tree arg0 = gimple_call_arg (stmt, 0);
++		  tree arg1 = gimple_call_arg (stmt, 1);
++		  if (TREE_CODE (arg0) != INTEGER_CST
++		      || TREE_CODE (arg1) != INTEGER_CST)
++		    return false;
++
++		  dest = gimple_call_lhs (stmt);
++		  len = fold_build2 (MULT_EXPR, TREE_TYPE (arg0), arg0, arg1);
++		}
++	      else
++		{
++		  dest = gimple_call_arg (stmt, 0);
++		  len = gimple_call_arg (stmt, 2);
++		}
+ 	      if (!poly_int_tree_p (len))
+ 		return false;
+ 	      tree rbase = ref->base;
+diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
+index efe5b31cc0a..c20fbe048ed 100644
+--- a/gcc/tree-ssa-dse.c
++++ b/gcc/tree-ssa-dse.c
+@@ -1,4 +1,4 @@
+-/* Dead store elimination
++/* Dead and redundant store elimination
+    Copyright (C) 2004-2019 Free Software Foundation, Inc.
+ 
+ This file is part of GCC.
+@@ -36,17 +36,26 @@ along with GCC; see the file COPYING3.  If not see
+ #include "params.h"
+ #include "alias.h"
+ #include "tree-ssa-loop.h"
++#include "tree-ssa-dse.h"
+ 
+ /* This file implements dead store elimination.
+ 
+    A dead store is a store into a memory location which will later be
+    overwritten by another store without any intervening loads.  In this
+-   case the earlier store can be deleted.
++   case the earlier store can be deleted or trimmed if the store
++   was partially dead.
++
++   A redundant store is a store into a memory location which stores
++   the exact same value as a prior store to the same memory location.
++   While this can often be handled by dead store elimination, removing
++   the redundant store is often better than removing or trimming the
++   dead store.
+ 
+    In our SSA + virtual operand world we use immediate uses of virtual
+-   operands to detect dead stores.  If a store's virtual definition
++   operands to detect these cases.  If a store's virtual definition
+    is used precisely once by a later store to the same location which
+-   post dominates the first store, then the first store is dead.
++   post dominates the first store, then the first store is dead.  If
++   the data stored is the same, then the second store is redundant.
+ 
+    The single use of the store's virtual definition ensures that
+    there are no intervening aliased loads and the requirement that
+@@ -58,7 +67,9 @@ along with GCC; see the file COPYING3.  If not see
+    the point immediately before the later store.  Again, the single
+    use of the virtual definition and the post-dominance relationship
+    ensure that such movement would be safe.  Clearly if there are
+-   back to back stores, then the second is redundant.
++   back to back stores, then the second is makes the first dead.  If
++   the second store stores the same value, then the second store is
++   redundant.
+ 
+    Reviewing section 10.7.2 in Morgan's "Building an Optimizing Compiler"
+    may also help in understanding this code since it discusses the
+@@ -66,19 +77,13 @@ along with GCC; see the file COPYING3.  If not see
+    fact, they are the same transformation applied to different views of
+    the CFG.  */
+ 
++void delete_dead_or_redundant_assignment (gimple_stmt_iterator *, const char *);
++static void delete_dead_or_redundant_call (gimple_stmt_iterator *, const char *);
+ 
+ /* Bitmap of blocks that have had EH statements cleaned.  We should
+    remove their dead edges eventually.  */
+ static bitmap need_eh_cleanup;
+ 
+-/* Return value from dse_classify_store */
+-enum dse_store_status
+-{
+-  DSE_STORE_LIVE,
+-  DSE_STORE_MAYBE_PARTIAL_DEAD,
+-  DSE_STORE_DEAD
+-};
+-
+ /* STMT is a statement that may write into memory.  Analyze it and
+    initialize WRITE to describe how STMT affects memory.
+ 
+@@ -106,6 +111,25 @@ initialize_ao_ref_for_dse (gimple *stmt, ao_ref *write)
+ 	      ao_ref_init_from_ptr_and_size (write, ptr, size);
+ 	      return true;
+ 	    }
++
++	  /* A calloc call can never be dead, but it can make
++	     subsequent stores redundant if they store 0 into
++	     the same memory locations.  */
++	  case BUILT_IN_CALLOC:
++	    {
++	      tree nelem = gimple_call_arg (stmt, 0);
++	      tree selem = gimple_call_arg (stmt, 1);
++	      if (TREE_CODE (nelem) == INTEGER_CST
++		  && TREE_CODE (selem) == INTEGER_CST)
++		{
++		  tree lhs = gimple_call_lhs (stmt);
++		  tree size = fold_build2 (MULT_EXPR, TREE_TYPE (nelem),
++					   nelem, selem);
++		  ao_ref_init_from_ptr_and_size (write, lhs, size);
++		  return true;
++		}
++	    }
++
+ 	  default:
+ 	    break;
+ 	}
+@@ -551,16 +575,84 @@ check_name (tree, tree *idx, void *data)
+   return true;
+ }
+ 
++/* STMT stores the value 0 into one or more memory locations
++   (via memset, empty constructor, calloc call, etc).
++
++   See if there is a subsequent store of the value 0 to one
++   or more of the same memory location(s).  If so, the subsequent
++   store is redundant and can be removed.
++
++   The subsequent stores could be via memset, empty constructors,
++   simple MEM stores, etc.  */
++
++static void
++dse_optimize_redundant_stores (gimple *stmt)
++{
++  int cnt = 0;
++
++  /* We could do something fairly complex and look through PHIs
++     like DSE_CLASSIFY_STORE, but it doesn't seem to be worth
++     the effort.
++
++     Look at all the immediate uses of the VDEF (which are obviously
++     dominated by STMT).   See if one or more stores 0 into the same
++     memory locations a STMT, if so remove the immediate use statements.  */
++  tree defvar = gimple_vdef (stmt);
++  imm_use_iterator ui;
++  gimple *use_stmt;
++  FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
++    {
++      /* Limit stmt walking.  */
++      if (++cnt > PARAM_VALUE (PARAM_DSE_MAX_ALIAS_QUERIES_PER_STORE))
++	BREAK_FROM_IMM_USE_STMT (ui);
++
++      /* If USE_STMT stores 0 into one or more of the same locations
++	 as STMT and STMT would kill USE_STMT, then we can just remove
++	 USE_STMT.  */
++      tree fndecl;
++      if ((is_gimple_assign (use_stmt)
++	   && gimple_vdef (use_stmt)
++	   && ((gimple_assign_rhs_code (use_stmt) == CONSTRUCTOR
++	        && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (use_stmt)) == 0
++	        && !gimple_clobber_p (stmt))
++	       || (gimple_assign_rhs_code (use_stmt) == INTEGER_CST
++		   && integer_zerop (gimple_assign_rhs1 (use_stmt)))))
++	  || (gimple_call_builtin_p (use_stmt, BUILT_IN_NORMAL)
++	      && (fndecl = gimple_call_fndecl (use_stmt)) != NULL
++	      && (DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
++		  || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
++	      && integer_zerop (gimple_call_arg (use_stmt, 1))))
++	{
++	  ao_ref write;
++
++	  if (!initialize_ao_ref_for_dse (use_stmt, &write))
++	    BREAK_FROM_IMM_USE_STMT (ui)
++
++	  if (valid_ao_ref_for_dse (&write)
++	      && stmt_kills_ref_p (stmt, &write))
++	    {
++	      gimple_stmt_iterator gsi = gsi_for_stmt (use_stmt);
++	      if (is_gimple_assign (use_stmt))
++		delete_dead_or_redundant_assignment (&gsi, "redundant");
++	      else if (is_gimple_call (use_stmt))
++		delete_dead_or_redundant_call (&gsi, "redundant");
++	      else
++		gcc_unreachable ();
++	    }
++	}
++    }
++}
++
+ /* A helper of dse_optimize_stmt.
+    Given a GIMPLE_ASSIGN in STMT that writes to REF, classify it
+    according to downstream uses and defs.  Sets *BY_CLOBBER_P to true
+    if only clobber statements influenced the classification result.
+    Returns the classification.  */
+ 
+-static dse_store_status
++dse_store_status
+ dse_classify_store (ao_ref *ref, gimple *stmt,
+ 		    bool byte_tracking_enabled, sbitmap live_bytes,
+-		    bool *by_clobber_p = NULL)
++		    bool *by_clobber_p, tree stop_at_vuse)
+ {
+   gimple *temp;
+   int cnt = 0;
+@@ -596,6 +688,11 @@ dse_classify_store (ao_ref *ref, gimple *stmt,
+ 	}
+       else
+ 	defvar = gimple_vdef (temp);
++
++      /* If we're instructed to stop walking at region boundary, do so.  */
++      if (defvar == stop_at_vuse)
++	return DSE_STORE_LIVE;
++
+       auto_vec<gimple *, 10> defs;
+       gimple *phi_def = NULL;
+       FOR_EACH_IMM_USE_STMT (use_stmt, ui, defvar)
+@@ -763,12 +860,12 @@ private:
+ 
+ /* Delete a dead call at GSI, which is mem* call of some kind.  */
+ static void
+-delete_dead_call (gimple_stmt_iterator *gsi)
++delete_dead_or_redundant_call (gimple_stmt_iterator *gsi, const char *type)
+ {
+   gimple *stmt = gsi_stmt (*gsi);
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "  Deleted dead call: ");
++      fprintf (dump_file, "  Deleted %s call: ", type);
+       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+       fprintf (dump_file, "\n");
+     }
+@@ -796,13 +893,13 @@ delete_dead_call (gimple_stmt_iterator *gsi)
+ 
+ /* Delete a dead store at GSI, which is a gimple assignment. */
+ 
+-static void
+-delete_dead_assignment (gimple_stmt_iterator *gsi)
++void
++delete_dead_or_redundant_assignment (gimple_stmt_iterator *gsi, const char *type)
+ {
+   gimple *stmt = gsi_stmt (*gsi);
+   if (dump_file && (dump_flags & TDF_DETAILS))
+     {
+-      fprintf (dump_file, "  Deleted dead store: ");
++      fprintf (dump_file, "  Deleted %s store: ", type);
+       print_gimple_stmt (dump_file, stmt, 0, dump_flags);
+       fprintf (dump_file, "\n");
+     }
+@@ -855,7 +952,8 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
+      some builtin calls.  */
+   if (gimple_call_builtin_p (stmt, BUILT_IN_NORMAL))
+     {
+-      switch (DECL_FUNCTION_CODE (gimple_call_fndecl (stmt)))
++      tree fndecl = gimple_call_fndecl (stmt);
++      switch (DECL_FUNCTION_CODE (fndecl))
+ 	{
+ 	  case BUILT_IN_MEMCPY:
+ 	  case BUILT_IN_MEMMOVE:
+@@ -867,10 +965,18 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
+ 	      tree size = gimple_call_arg (stmt, 2);
+ 	      if (integer_zerop (size))
+ 		{
+-		  delete_dead_call (gsi);
++		  delete_dead_or_redundant_call (gsi, "dead");
+ 		  return;
+ 		}
+ 
++	      /* If this is a memset call that initializes an object
++		 to zero, it may be redundant with an earlier memset
++		 or empty CONSTRUCTOR of a larger object.  */
++	      if ((DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET
++		   || DECL_FUNCTION_CODE (fndecl) == BUILT_IN_MEMSET_CHK)
++		  && integer_zerop (gimple_call_arg (stmt, 1)))
++		dse_optimize_redundant_stores (stmt);
++
+ 	      enum dse_store_status store_status;
+ 	      m_byte_tracking_enabled
+ 		= setup_live_bytes_from_ref (&ref, m_live_bytes);
+@@ -887,10 +993,14 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
+ 		}
+ 
+ 	      if (store_status == DSE_STORE_DEAD)
+-		delete_dead_call (gsi);
++		delete_dead_or_redundant_call (gsi, "dead");
+ 	      return;
+ 	    }
+ 
++	  case BUILT_IN_CALLOC:
++	    /* We already know the arguments are integer constants.  */
++	    dse_optimize_redundant_stores (stmt);
++
+ 	  default:
+ 	    return;
+ 	}
+@@ -900,6 +1010,18 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
+     {
+       bool by_clobber_p = false;
+ 
++      /* First see if this store is a CONSTRUCTOR and if there
++	 are subsequent CONSTRUCTOR stores which are totally
++	 subsumed by this statement.  If so remove the subsequent
++	 CONSTRUCTOR store.
++
++	 This will tend to make fewer calls into memset with longer
++	 arguments.  */
++      if (gimple_assign_rhs_code (stmt) == CONSTRUCTOR
++	  && CONSTRUCTOR_NELTS (gimple_assign_rhs1 (stmt)) == 0
++	  && !gimple_clobber_p (stmt))
++	dse_optimize_redundant_stores (stmt);
++
+       /* Self-assignments are zombies.  */
+       if (operand_equal_p (gimple_assign_rhs1 (stmt),
+ 			   gimple_assign_lhs (stmt), 0))
+@@ -930,7 +1052,7 @@ dse_dom_walker::dse_optimize_stmt (gimple_stmt_iterator *gsi)
+ 	  && !by_clobber_p)
+ 	return;
+ 
+-      delete_dead_assignment (gsi);
++      delete_dead_or_redundant_assignment (gsi, "dead");
+     }
+ }
+ 
+diff --git a/gcc/tree-ssa-dse.h b/gcc/tree-ssa-dse.h
+new file mode 100644
+index 00000000000..a5eccbd746d
+--- /dev/null
++++ b/gcc/tree-ssa-dse.h
+@@ -0,0 +1,36 @@
++/* Support routines for dead store elimination. 
++   Copyright (C) 2019 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef GCC_TREE_SSA_DSE_H
++#define GCC_TREE_SSA_DSE_H
++
++/* Return value from dse_classify_store */
++enum dse_store_status
++{
++  DSE_STORE_LIVE,
++  DSE_STORE_MAYBE_PARTIAL_DEAD,
++  DSE_STORE_DEAD
++};
++
++dse_store_status dse_classify_store (ao_ref *, gimple *, bool, sbitmap,
++				     bool * = NULL, tree = NULL);
++
++void delete_dead_or_redundant_assignment (gimple_stmt_iterator *, const char *);
++
++#endif   /* GCC_TREE_SSA_DSE_H  */
+diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
+index d241becd481..2d54e13b180 100644
+--- a/gcc/tree-ssa-loop-niter.c
++++ b/gcc/tree-ssa-loop-niter.c
+@@ -1928,7 +1928,7 @@ number_of_iterations_cond (struct loop *loop,
+ 
+ tree
+ simplify_replace_tree (tree expr, tree old, tree new_tree,
+-		       tree (*valueize) (tree))
++		       tree (*valueize) (tree, void*), void *context)
+ {
+   unsigned i, n;
+   tree ret = NULL_TREE, e, se;
+@@ -1944,7 +1944,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
+     {
+       if (TREE_CODE (expr) == SSA_NAME)
+ 	{
+-	  new_tree = valueize (expr);
++	  new_tree = valueize (expr, context);
+ 	  if (new_tree != expr)
+ 	    return new_tree;
+ 	}
+@@ -1960,7 +1960,7 @@ simplify_replace_tree (tree expr, tree old, tree new_tree,
+   for (i = 0; i < n; i++)
+     {
+       e = TREE_OPERAND (expr, i);
+-      se = simplify_replace_tree (e, old, new_tree, valueize);
++      se = simplify_replace_tree (e, old, new_tree, valueize, context);
+       if (e == se)
+ 	continue;
+ 
+diff --git a/gcc/tree-ssa-loop-niter.h b/gcc/tree-ssa-loop-niter.h
+index dc116489218..fb192d2c250 100644
+--- a/gcc/tree-ssa-loop-niter.h
++++ b/gcc/tree-ssa-loop-niter.h
+@@ -53,7 +53,9 @@ extern bool scev_probably_wraps_p (tree, tree, tree, gimple *,
+ 				   struct loop *, bool);
+ extern void free_numbers_of_iterations_estimates (struct loop *);
+ extern void free_numbers_of_iterations_estimates (function *);
+-extern tree simplify_replace_tree (tree, tree, tree, tree (*)(tree) = NULL);
++extern tree simplify_replace_tree (tree, tree,
++				   tree, tree (*)(tree, void *) = NULL,
++				   void * = NULL);
+ extern void substitute_in_loop_info (struct loop *, tree, tree);
+ 
+ #endif /* GCC_TREE_SSA_LOOP_NITER_H */
+diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
+index 00a09508836..551718637f1 100644
+--- a/gcc/tree-ssa-loop.c
++++ b/gcc/tree-ssa-loop.c
+@@ -768,9 +768,9 @@ get_lsm_tmp_name (tree ref, unsigned n, const char *suffix)
+       ns[1] = 0;
+       lsm_tmp_name_add (ns);
+     }
+-  return lsm_tmp_name;
+   if (suffix != NULL)
+     lsm_tmp_name_add (suffix);
++  return lsm_tmp_name;
+ }
+ 
+ /* Computes an estimated number of insns in LOOP, weighted by WEIGHTS.  */
+diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
+index 6794fbde29e..9c1a9a651fe 100644
+--- a/gcc/tree-ssa-reassoc.c
++++ b/gcc/tree-ssa-reassoc.c
+@@ -2039,9 +2039,6 @@ optimize_ops_list (enum tree_code opcode,
+       i++;
+     }
+ 
+-  length = ops->length ();
+-  oelast = ops->last ();
+-
+   if (iterate)
+     optimize_ops_list (opcode, ops);
+ }
+diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
+index 95fbead7b1e..cd5a3a75eaa 100644
+--- a/gcc/tree-ssa-sccvn.c
++++ b/gcc/tree-ssa-sccvn.c
+@@ -309,6 +309,10 @@ static vn_tables_t valid_info;
+ /* Valueization hook.  Valueize NAME if it is an SSA name, otherwise
+    just return it.  */
+ tree (*vn_valueize) (tree);
++tree vn_valueize_wrapper (tree t, void* context ATTRIBUTE_UNUSED)
++{
++  return vn_valueize (t);
++}
+ 
+ 
+ /* This represents the top of the VN lattice, which is the universal
+@@ -6364,7 +6368,7 @@ process_bb (rpo_elim &avail, basic_block bb,
+       if (bb->loop_father->nb_iterations)
+ 	bb->loop_father->nb_iterations
+ 	  = simplify_replace_tree (bb->loop_father->nb_iterations,
+-				   NULL_TREE, NULL_TREE, vn_valueize);
++				   NULL_TREE, NULL_TREE, &vn_valueize_wrapper);
+     }
+ 
+   /* Value-number all defs in the basic-block.  */
+diff --git a/gcc/tree-ssa-sink.c b/gcc/tree-ssa-sink.c
+index 2648b24f7d5..98b6caced03 100644
+--- a/gcc/tree-ssa-sink.c
++++ b/gcc/tree-ssa-sink.c
+@@ -433,7 +433,6 @@ statement_sink_location (gimple *stmt, basic_block frombb,
+ 
+       if (gimple_code (use) != GIMPLE_PHI)
+ 	{
+-	  sinkbb = gimple_bb (use);
+ 	  sinkbb = select_best_block (frombb, gimple_bb (use), stmt);
+ 
+ 	  if (sinkbb == frombb)
+diff --git a/gcc/tree-ssa-threadedge.c b/gcc/tree-ssa-threadedge.c
+index c3ea2d680d8..91494d76176 100644
+--- a/gcc/tree-ssa-threadedge.c
++++ b/gcc/tree-ssa-threadedge.c
+@@ -1299,7 +1299,6 @@ thread_across_edge (gcond *dummy_cond,
+ 
+         x = new jump_thread_edge (taken_edge, EDGE_COPY_SRC_JOINER_BLOCK);
+ 	path->safe_push (x);
+-	found = false;
+ 	found = thread_around_empty_blocks (taken_edge,
+ 					    dummy_cond,
+ 					    avail_exprs_stack,
+diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
+index 17a4fc8e279..e822ffc1a01 100644
+--- a/gcc/tree-vect-data-refs.c
++++ b/gcc/tree-vect-data-refs.c
+@@ -2863,10 +2863,12 @@ strip_conversion (tree op)
+ }
+ 
+ /* Return true if vectorizable_* routines can handle statements STMT1_INFO
+-   and STMT2_INFO being in a single group.  */
++   and STMT2_INFO being in a single group.  When ALLOW_SLP_P, masked loads can
++   be grouped in SLP mode.  */
+ 
+ static bool
+-can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
++can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info,
++		   bool allow_slp_p)
+ {
+   if (gimple_assign_single_p (stmt1_info->stmt))
+     return gimple_assign_single_p (stmt2_info->stmt);
+@@ -2888,7 +2890,8 @@ can_group_stmts_p (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
+ 	 like those created by build_mask_conversion.  */
+       tree mask1 = gimple_call_arg (call1, 2);
+       tree mask2 = gimple_call_arg (call2, 2);
+-      if (!operand_equal_p (mask1, mask2, 0))
++      if (!operand_equal_p (mask1, mask2, 0)
++          && (ifn == IFN_MASK_STORE || !allow_slp_p))
+ 	{
+ 	  mask1 = strip_conversion (mask1);
+ 	  if (!mask1)
+@@ -2974,7 +2977,7 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
+ 	      || data_ref_compare_tree (DR_BASE_ADDRESS (dra),
+ 					DR_BASE_ADDRESS (drb)) != 0
+ 	      || data_ref_compare_tree (DR_OFFSET (dra), DR_OFFSET (drb)) != 0
+-	      || !can_group_stmts_p (stmtinfo_a, stmtinfo_b))
++	      || !can_group_stmts_p (stmtinfo_a, stmtinfo_b, true))
+ 	    break;
+ 
+ 	  /* Check that the data-refs have the same constant size.  */
+@@ -3059,6 +3062,13 @@ vect_analyze_data_ref_accesses (vec_info *vinfo)
+ 	  DR_GROUP_NEXT_ELEMENT (lastinfo) = stmtinfo_b;
+ 	  lastinfo = stmtinfo_b;
+ 
++	  STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a)
++	    = !can_group_stmts_p (stmtinfo_a, stmtinfo_b, false);
++
++	  if (dump_enabled_p () && STMT_VINFO_SLP_VECT_ONLY (stmtinfo_a))
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "Load suitable for SLP vectorization only.\n");
++
+ 	  if (init_b == init_prev
+ 	      && !to_fixup.add (DR_GROUP_FIRST_ELEMENT (stmtinfo_a))
+ 	      && dump_enabled_p ())
+@@ -3446,7 +3456,6 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+   /* First, we collect all data ref pairs for aliasing checks.  */
+   FOR_EACH_VEC_ELT (may_alias_ddrs, i, ddr)
+     {
+-      int comp_res;
+       poly_uint64 lower_bound;
+       tree segment_length_a, segment_length_b;
+       unsigned HOST_WIDE_INT access_size_a, access_size_b;
+@@ -3478,10 +3487,13 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+       dr_vec_info *dr_info_b = loop_vinfo->lookup_dr (DDR_B (ddr));
+       stmt_vec_info stmt_info_b = dr_info_b->stmt;
+ 
++      bool preserves_scalar_order_p
++	= vect_preserves_scalar_order_p (dr_info_a, dr_info_b);
++
+       /* Skip the pair if inter-iteration dependencies are irrelevant
+ 	 and intra-iteration dependencies are guaranteed to be honored.  */
+       if (ignore_step_p
+-	  && (vect_preserves_scalar_order_p (dr_info_a, dr_info_b)
++	  && (preserves_scalar_order_p
+ 	      || vectorizable_with_step_bound_p (dr_info_a, dr_info_b,
+ 						 &lower_bound)))
+ 	{
+@@ -3562,14 +3574,11 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+       align_a = vect_vfa_align (dr_info_a);
+       align_b = vect_vfa_align (dr_info_b);
+ 
+-      comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_info_a->dr),
+-					DR_BASE_ADDRESS (dr_info_b->dr));
+-      if (comp_res == 0)
+-	comp_res = data_ref_compare_tree (DR_OFFSET (dr_info_a->dr),
+-					  DR_OFFSET (dr_info_b->dr));
+-
+       /* See whether the alias is known at compilation time.  */
+-      if (comp_res == 0
++      if (operand_equal_p (DR_BASE_ADDRESS (dr_info_a->dr),
++			   DR_BASE_ADDRESS (dr_info_b->dr), 0)
++	  && operand_equal_p (DR_OFFSET (dr_info_a->dr),
++			      DR_OFFSET (dr_info_b->dr), 0)
+ 	  && TREE_CODE (DR_STEP (dr_info_a->dr)) == INTEGER_CST
+ 	  && TREE_CODE (DR_STEP (dr_info_b->dr)) == INTEGER_CST
+ 	  && poly_int_tree_p (segment_length_a)
+@@ -3602,15 +3611,21 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
+ 					   stmt_info_b->stmt);
+ 	}
+ 
+-      dr_with_seg_len_pair_t dr_with_seg_len_pair
+-	(dr_with_seg_len (dr_info_a->dr, segment_length_a,
+-			  access_size_a, align_a),
+-	 dr_with_seg_len (dr_info_b->dr, segment_length_b,
+-			  access_size_b, align_b));
++      dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
++			    access_size_a, align_a);
++      dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
++			    access_size_b, align_b);
++      /* Canonicalize the order to be the one that's needed for accurate
++	 RAW, WAR and WAW flags, in cases where the data references are
++	 well-ordered.  The order doesn't really matter otherwise,
++	 but we might as well be consistent.  */
++      if (get_later_stmt (stmt_info_a, stmt_info_b) == stmt_info_a)
++	std::swap (dr_a, dr_b);
+ 
+-      /* Canonicalize pairs by sorting the two DR members.  */
+-      if (comp_res > 0)
+-	std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
++      dr_with_seg_len_pair_t dr_with_seg_len_pair
++	(dr_a, dr_b, (preserves_scalar_order_p
++		      ? dr_with_seg_len_pair_t::WELL_ORDERED
++		      : dr_with_seg_len_pair_t::REORDERED));
+ 
+       comp_alias_ddrs.safe_push (dr_with_seg_len_pair);
+     }
+@@ -4123,7 +4138,7 @@ vect_find_stmt_data_reference (loop_p loop, gimple *stmt,
+ */
+ 
+ opt_result
+-vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
++vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf, bool *fatal)
+ {
+   struct loop *loop = NULL;
+   unsigned int i;
+@@ -4298,7 +4313,7 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
+       /* Set vectype for STMT.  */
+       scalar_type = TREE_TYPE (DR_REF (dr));
+       STMT_VINFO_VECTYPE (stmt_info)
+-	= get_vectype_for_scalar_type (scalar_type);
++	= get_vectype_for_scalar_type (vinfo, scalar_type);
+       if (!STMT_VINFO_VECTYPE (stmt_info))
+         {
+           if (dump_enabled_p ())
+@@ -4344,13 +4359,18 @@ vect_analyze_data_refs (vec_info *vinfo, poly_uint64 *min_vf)
+ 	  if (!vect_check_gather_scatter (stmt_info,
+ 					  as_a <loop_vec_info> (vinfo),
+ 					  &gs_info)
+-	      || !get_vectype_for_scalar_type (TREE_TYPE (gs_info.offset)))
+-	    return opt_result::failure_at
+-	      (stmt_info->stmt,
+-	       (gatherscatter == GATHER) ?
+-	       "not vectorized: not suitable for gather load %G" :
+-	       "not vectorized: not suitable for scatter store %G",
+-	       stmt_info->stmt);
++	      || !get_vectype_for_scalar_type (vinfo,
++					       TREE_TYPE (gs_info.offset)))
++	    {
++	      if (fatal)
++		*fatal = false;
++	      return opt_result::failure_at
++			(stmt_info->stmt,
++			 (gatherscatter == GATHER)
++			 ? "not vectorized: not suitable for gather load %G"
++			 : "not vectorized: not suitable for scatter store %G",
++			 stmt_info->stmt);
++	    }
+ 	  STMT_VINFO_GATHER_SCATTER_P (stmt_info) = gatherscatter;
+ 	}
+     }
+diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
+index ad1ea4e7b97..39bc2a82b37 100644
+--- a/gcc/tree-vect-generic.c
++++ b/gcc/tree-vect-generic.c
+@@ -694,7 +694,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
+ 	      tree zero, cst, cond, mask_type;
+ 	      gimple *stmt;
+ 
+-	      mask_type = build_same_sized_truth_vector_type (type);
++	      mask_type = truth_type_for (type);
+ 	      zero = build_zero_cst (type);
+ 	      cond = build2 (LT_EXPR, mask_type, op0, zero);
+ 	      tree_vector_builder vec (type, nunits, 1);
+diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
+index b3fae5ba4da..20ede85633b 100644
+--- a/gcc/tree-vect-loop-manip.c
++++ b/gcc/tree-vect-loop-manip.c
+@@ -47,6 +47,9 @@ along with GCC; see the file COPYING3.  If not see
+ #include "stor-layout.h"
+ #include "optabs-query.h"
+ #include "vec-perm-indices.h"
++#include "insn-config.h"
++#include "rtl.h"
++#include "recog.h"
+ 
+ /*************************************************************************
+   Simple Loop Peeling Utilities
+@@ -323,13 +326,18 @@ vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
+   tree src_masktype = src_rgm->mask_type;
+   tree dest_masktype = dest_rgm->mask_type;
+   machine_mode src_mode = TYPE_MODE (src_masktype);
++  insn_code icode1, icode2;
+   if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
+-      && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
+-      && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
++      && (icode1 = optab_handler (vec_unpacku_hi_optab,
++				  src_mode)) != CODE_FOR_nothing
++      && (icode2 = optab_handler (vec_unpacku_lo_optab,
++				  src_mode)) != CODE_FOR_nothing)
+     {
+       /* Unpacking the source masks gives at least as many mask bits as
+ 	 we need.  We can then VIEW_CONVERT any excess bits away.  */
+-      tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
++      machine_mode dest_mode = insn_data[icode1].operand[0].mode;
++      gcc_assert (dest_mode == insn_data[icode2].operand[0].mode);
++      tree unpack_masktype = vect_halve_mask_nunits (src_masktype, dest_mode);
+       for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
+ 	{
+ 	  tree src = src_rgm->masks[i / 2];
+@@ -1745,7 +1753,7 @@ vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
+    Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
+    CODE and NITERS are as for vect_update_inits_of_dr.  */
+ 
+-static void
++void
+ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
+ 			  tree_code code)
+ {
+@@ -1755,21 +1763,12 @@ vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
+ 
+   DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
+ 
+-  /* Adjust niters to sizetype and insert stmts on loop preheader edge.  */
++  /* Adjust niters to sizetype.  We used to insert the stmts on loop preheader
++     here, but since we might use these niters to update the epilogues niters
++     and data references we can't insert them here as this definition might not
++     always dominate its uses.  */
+   if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
+-    {
+-      gimple_seq seq;
+-      edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
+-      tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
+-
+-      niters = fold_convert (sizetype, niters);
+-      niters = force_gimple_operand (niters, &seq, false, var);
+-      if (seq)
+-	{
+-	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+-	  gcc_assert (!new_bb);
+-	}
+-    }
++    niters = fold_convert (sizetype, niters);
+ 
+   FOR_EACH_VEC_ELT (datarefs, i, dr)
+     {
+@@ -2032,6 +2031,29 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
+   *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
+ }
+ 
++/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
++   this function searches for the corresponding lcssa phi node in exit
++   bb of LOOP.  If it is found, return the phi result; otherwise return
++   NULL.  */
++
++static tree
++find_guard_arg (class loop *loop, class loop *epilog ATTRIBUTE_UNUSED,
++		gphi *lcssa_phi)
++{
++  gphi_iterator gsi;
++  edge e = single_exit (loop);
++
++  gcc_assert (single_pred_p (e->dest));
++  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
++    {
++      gphi *phi = gsi.phi ();
++      if (operand_equal_p (PHI_ARG_DEF (phi, 0),
++			   PHI_ARG_DEF (lcssa_phi, 0), 0))
++	return PHI_RESULT (phi);
++    }
++  return NULL_TREE;
++}
++
+ /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
+    from SECOND/FIRST and puts it at the original loop's preheader/exit
+    edge, the two loops are arranged as below:
+@@ -2119,6 +2141,29 @@ slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
+ 	 incoming edge.  */
+       adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
+     }
++
++  /* For epilogue peeling we have to make sure to copy all LC PHIs
++     for correct vectorization of live stmts.  */
++  if (loop == first)
++    {
++      basic_block orig_exit = single_exit (second)->dest;
++      for (gsi_orig = gsi_start_phis (orig_exit);
++	   !gsi_end_p (gsi_orig); gsi_next (&gsi_orig))
++	{
++	  gphi *orig_phi = gsi_orig.phi ();
++	  tree orig_arg = PHI_ARG_DEF (orig_phi, 0);
++	  if (TREE_CODE (orig_arg) != SSA_NAME || virtual_operand_p  (orig_arg))
++	    continue;
++
++	  /* Already created in the above loop.   */
++	  if (find_guard_arg (first, second, orig_phi))
++	    continue;
++
++	  tree new_res = copy_ssa_name (orig_arg);
++	  gphi *lcphi = create_phi_node (new_res, between_bb);
++	  add_phi_arg (lcphi, orig_arg, single_exit (first), UNKNOWN_LOCATION);
++	}
++    }
+ }
+ 
+ /* Function slpeel_add_loop_guard adds guard skipping from the beginning
+@@ -2203,29 +2248,6 @@ slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
+     }
+ }
+ 
+-/* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
+-   this function searches for the corresponding lcssa phi node in exit
+-   bb of LOOP.  If it is found, return the phi result; otherwise return
+-   NULL.  */
+-
+-static tree
+-find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
+-		gphi *lcssa_phi)
+-{
+-  gphi_iterator gsi;
+-  edge e = single_exit (loop);
+-
+-  gcc_assert (single_pred_p (e->dest));
+-  for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
+-    {
+-      gphi *phi = gsi.phi ();
+-      if (operand_equal_p (PHI_ARG_DEF (phi, 0),
+-			   PHI_ARG_DEF (lcssa_phi, 0), 0))
+-	return PHI_RESULT (phi);
+-    }
+-  return NULL_TREE;
+-}
+-
+ /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
+    from LOOP.  Function slpeel_add_loop_guard adds guard skipping from a
+    point between the two loops to the end of EPILOG.  Edges GUARD_EDGE
+@@ -2296,12 +2318,14 @@ slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
+     {
+       gphi *update_phi = gsi.phi ();
+       tree old_arg = PHI_ARG_DEF (update_phi, 0);
+-      /* This loop-closed-phi actually doesn't represent a use out of the
+-	 loop - the phi arg is a constant.  */
+-      if (TREE_CODE (old_arg) != SSA_NAME)
+-	continue;
+ 
+-      tree merge_arg = get_current_def (old_arg);
++      tree merge_arg = NULL_TREE;
++
++      /* If the old argument is a SSA_NAME use its current_def.  */
++      if (TREE_CODE (old_arg) == SSA_NAME)
++	merge_arg = get_current_def (old_arg);
++      /* If it's a constant or doesn't have a current_def, just use the old
++	 argument.  */
+       if (!merge_arg)
+ 	merge_arg = old_arg;
+ 
+@@ -2412,7 +2436,22 @@ slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
+ 
+    Note this function peels prolog and epilog only if it's necessary,
+    as well as guards.
+-   Returns created epilogue or NULL.
++   This function returns the epilogue loop if a decision was made to vectorize
++   it, otherwise NULL.
++
++   The analysis resulting in this epilogue loop's loop_vec_info was performed
++   in the same vect_analyze_loop call as the main loop's.  At that time
++   vect_analyze_loop constructs a list of accepted loop_vec_info's for lower
++   vectorization factors than the main loop.  This list is stored in the main
++   loop's loop_vec_info in the 'epilogue_vinfos' member.  Everytime we decide to
++   vectorize the epilogue loop for a lower vectorization factor,  the
++   loop_vec_info sitting at the top of the epilogue_vinfos list is removed,
++   updated and linked to the epilogue loop.  This is later used to vectorize
++   the epilogue.  The reason the loop_vec_info needs updating is that it was
++   constructed based on the original main loop, and the epilogue loop is a
++   copy of this loop, so all links pointing to statements in the original loop
++   need updating.  Furthermore, these loop_vec_infos share the
++   data_reference's records, which will also need to be updated.
+ 
+    TODO: Guard for prefer_scalar_loop should be emitted along with
+    versioning conditions if loop versioning is needed.  */
+@@ -2422,7 +2461,8 @@ struct loop *
+ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+ 		 tree *niters_vector, tree *step_vector,
+ 		 tree *niters_vector_mult_vf_var, int th,
+-		 bool check_profitability, bool niters_no_overflow)
++		 bool check_profitability, bool niters_no_overflow,
++		 tree *advance, drs_init_vec &orig_drs_init)
+ {
+   edge e, guard_e;
+   tree type = TREE_TYPE (niters), guard_cond;
+@@ -2430,6 +2470,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+   profile_probability prob_prolog, prob_vector, prob_epilog;
+   int estimated_vf;
+   int prolog_peeling = 0;
++  bool vect_epilogues = loop_vinfo->epilogue_vinfos.length () > 0;
+   /* We currently do not support prolog peeling if the target alignment is not
+      known at compile time.  'vect_gen_prolog_loop_niters' depends on the
+      target alignment being constant.  */
+@@ -2483,19 +2524,77 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+   int bound_prolog = 0;
+   if (prolog_peeling)
+     niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
+-						 &bound_prolog);
++						  &bound_prolog);
+   else
+     niters_prolog = build_int_cst (type, 0);
+ 
++  loop_vec_info epilogue_vinfo = NULL;
++  if (vect_epilogues)
++    {
++      epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
++      loop_vinfo->epilogue_vinfos.ordered_remove (0);
++    }
++
++  tree niters_vector_mult_vf = NULL_TREE;
++  /* Saving NITERs before the loop, as this may be changed by prologue.  */
++  tree before_loop_niters = LOOP_VINFO_NITERS (loop_vinfo);
++  edge update_e = NULL, skip_e = NULL;
++  unsigned int lowest_vf = constant_lower_bound (vf);
++  /* If we know the number of scalar iterations for the main loop we should
++     check whether after the main loop there are enough iterations left over
++     for the epilogue.  */
++  if (vect_epilogues
++      && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
++      && prolog_peeling >= 0
++      && known_eq (vf, lowest_vf))
++    {
++      unsigned HOST_WIDE_INT eiters
++	= (LOOP_VINFO_INT_NITERS (loop_vinfo)
++	   - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
++
++      eiters -= prolog_peeling;
++      eiters
++	= eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
++
++      unsigned int ratio;
++      unsigned int epilogue_gaps
++	= LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
++      while (!(constant_multiple_p
++	       (GET_MODE_SIZE (loop_vinfo->vector_mode),
++		GET_MODE_SIZE (epilogue_vinfo->vector_mode), &ratio)
++	       && eiters >= lowest_vf / ratio + epilogue_gaps))
++	{
++	  delete epilogue_vinfo;
++	  epilogue_vinfo = NULL;
++	  if (loop_vinfo->epilogue_vinfos.length () == 0)
++	    {
++	      vect_epilogues = false;
++	      break;
++	    }
++	  epilogue_vinfo = loop_vinfo->epilogue_vinfos[0];
++	  loop_vinfo->epilogue_vinfos.ordered_remove (0);
++	  epilogue_gaps = LOOP_VINFO_PEELING_FOR_GAPS (epilogue_vinfo);
++	}
++    }
+   /* Prolog loop may be skipped.  */
+   bool skip_prolog = (prolog_peeling != 0);
+-  /* Skip to epilog if scalar loop may be preferred.  It's only needed
+-     when we peel for epilog loop and when it hasn't been checked with
+-     loop versioning.  */
++  /* Skip this loop to epilog when there are not enough iterations to enter this
++     vectorized loop.  If true we should perform runtime checks on the NITERS
++     to check whether we should skip the current vectorized loop.  If we know
++     the number of scalar iterations we may choose to add a runtime check if
++     this number "maybe" smaller than the number of iterations required
++     when we know the number of scalar iterations may potentially
++     be smaller than the number of iterations required to enter this loop, for
++     this we use the upper bounds on the prolog and epilog peeling.  When we
++     don't know the number of iterations and don't require versioning it is
++     because we have asserted that there are enough scalar iterations to enter
++     the main loop, so this skip is not necessary.  When we are versioning then
++     we only add such a skip if we have chosen to vectorize the epilogue.  */
+   bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+ 		      ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
+ 				  bound_prolog + bound_epilog)
+-		      : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
++		      : (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
++			 || vect_epilogues));
+   /* Epilog loop must be executed if the number of iterations for epilog
+      loop is known at compile time, otherwise we need to add a check at
+      the end of vector loop and skip to the end of epilog loop.  */
+@@ -2525,6 +2624,12 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+ 
+   dump_user_location_t loop_loc = find_loop_location (loop);
+   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
++  if (vect_epilogues)
++    /* Make sure to set the epilogue's epilogue scalar loop, such that we can
++       use the original scalar loop as remaining epilogue if necessary.  */
++    LOOP_VINFO_SCALAR_LOOP (epilogue_vinfo)
++      = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
++
+   if (prolog_peeling)
+     {
+       e = loop_preheader_edge (loop);
+@@ -2571,6 +2676,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+ 	  scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
+ 	  scale_loop_profile (prolog, prob_prolog, bound_prolog);
+ 	}
++
++      /* Save original inits for each data_reference before advancing them with
++	 NITERS_PROLOG.  */
++      unsigned int i;
++      struct data_reference *dr;
++      vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
++      FOR_EACH_VEC_ELT (datarefs, i, dr)
++	orig_drs_init.safe_push (std::make_pair (dr, DR_OFFSET (dr)));
++
+       /* Update init address of DRs.  */
+       vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
+       /* Update niters for vector loop.  */
+@@ -2605,8 +2719,15 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+ 			   "loop can't be duplicated to exit edge.\n");
+ 	  gcc_unreachable ();
+ 	}
+-      /* Peel epilog and put it on exit edge of loop.  */
+-      epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
++      /* Peel epilog and put it on exit edge of loop.  If we are vectorizing
++	 said epilog then we should use a copy of the main loop as a starting
++	 point.  This loop may have already had some preliminary transformations
++	 to allow for more optimal vectorization, for example if-conversion.
++	 If we are not vectorizing the epilog then we should use the scalar loop
++	 as the transformations mentioned above make less or no sense when not
++	 vectorizing.  */
++      epilog = vect_epilogues ? get_loop_copy (loop) : scalar_loop;
++      epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, epilog, e);
+       if (!epilog)
+ 	{
+ 	  dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
+@@ -2635,6 +2756,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+ 					   guard_to, guard_bb,
+ 					   prob_vector.invert (),
+ 					   irred_flag);
++	  skip_e = guard_e;
+ 	  e = EDGE_PRED (guard_to, 0);
+ 	  e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
+ 	  slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
+@@ -2656,7 +2778,6 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+ 	}
+ 
+       basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
+-      tree niters_vector_mult_vf;
+       /* If loop is peeled for non-zero constant times, now niters refers to
+ 	 orig_niters - prolog_peeling, it won't overflow even the orig_niters
+ 	 overflows.  */
+@@ -2679,7 +2800,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+       /* Update IVs of original loop as if they were advanced by
+ 	 niters_vector_mult_vf steps.  */
+       gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
+-      edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
++      update_e = skip_vector ? e : loop_preheader_edge (epilog);
+       vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
+ 					update_e);
+ 
+@@ -2720,10 +2841,75 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
+       adjust_vec_debug_stmts ();
+       scev_reset ();
+     }
++
++  if (vect_epilogues)
++    {
++      epilog->aux = epilogue_vinfo;
++      LOOP_VINFO_LOOP (epilogue_vinfo) = epilog;
++
++      loop_constraint_clear (epilog, LOOP_C_INFINITE);
++
++      /* We now must calculate the number of NITERS performed by the previous
++	 loop and EPILOGUE_NITERS to be performed by the epilogue.  */
++      tree niters = fold_build2 (PLUS_EXPR, TREE_TYPE (niters_vector_mult_vf),
++				 niters_prolog, niters_vector_mult_vf);
++
++      /* If skip_vector we may skip the previous loop, we insert a phi-node to
++	 determine whether we are coming from the previous vectorized loop
++	 using the update_e edge or the skip_vector basic block using the
++	 skip_e edge.  */
++      if (skip_vector)
++	{
++	  gcc_assert (update_e != NULL && skip_e != NULL);
++	  gphi *new_phi = create_phi_node (make_ssa_name (TREE_TYPE (niters)),
++					   update_e->dest);
++	  tree new_ssa = make_ssa_name (TREE_TYPE (niters));
++	  gimple *stmt = gimple_build_assign (new_ssa, niters);
++	  gimple_stmt_iterator gsi;
++	  if (TREE_CODE (niters_vector_mult_vf) == SSA_NAME
++	      && SSA_NAME_DEF_STMT (niters_vector_mult_vf)->bb != NULL)
++	    {
++	      gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (niters_vector_mult_vf));
++	      gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
++	    }
++	  else
++	    {
++	      gsi = gsi_last_bb (update_e->src);
++	      gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
++	    }
++
++	  niters = new_ssa;
++	  add_phi_arg (new_phi, niters, update_e, UNKNOWN_LOCATION);
++	  add_phi_arg (new_phi, build_zero_cst (TREE_TYPE (niters)), skip_e,
++		       UNKNOWN_LOCATION);
++	  niters = PHI_RESULT (new_phi);
++	}
++
++      /* Subtract the number of iterations performed by the vectorized loop
++	 from the number of total iterations.  */
++      tree epilogue_niters = fold_build2 (MINUS_EXPR, TREE_TYPE (niters),
++					  before_loop_niters,
++					  niters);
++
++      LOOP_VINFO_NITERS (epilogue_vinfo) = epilogue_niters;
++      LOOP_VINFO_NITERSM1 (epilogue_vinfo)
++	= fold_build2 (MINUS_EXPR, TREE_TYPE (epilogue_niters),
++		       epilogue_niters,
++		       build_one_cst (TREE_TYPE (epilogue_niters)));
++
++      /* Set ADVANCE to the number of iterations performed by the previous
++	 loop and its prologue.  */
++      *advance = niters;
++
++      /* Redo the peeling for niter analysis as the NITERs and alignment
++	 may have been updated to take the main loop into account.  */
++      determine_peel_for_niter (epilogue_vinfo);
++    }
++
+   adjust_vec.release ();
+   free_original_copy_tables ();
+ 
+-  return epilog;
++  return vect_epilogues ? epilog : NULL;
+ }
+ 
+ /* Function vect_create_cond_for_niters_checks.
+@@ -2987,9 +3173,7 @@ vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
+    *COND_EXPR_STMT_LIST.  */
+ 
+ struct loop *
+-vect_loop_versioning (loop_vec_info loop_vinfo,
+-		      unsigned int th, bool check_profitability,
+-		      poly_uint64 versioning_threshold)
++vect_loop_versioning (loop_vec_info loop_vinfo)
+ {
+   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
+   struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
+@@ -3009,10 +3193,15 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
+   bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
+   bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
+   bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
++  poly_uint64 versioning_threshold
++    = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
+   tree version_simd_if_cond
+     = LOOP_REQUIRES_VERSIONING_FOR_SIMD_IF_COND (loop_vinfo);
++  unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+ 
+-  if (check_profitability)
++  if (th >= vect_vf_for_cost (loop_vinfo)
++      && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
++      && !ordered_p (th, versioning_threshold))
+     cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
+ 			     build_int_cst (TREE_TYPE (scalar_loop_iters),
+ 					    th - 1));
+diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
+index 0308b26b808..6cbdfd1ad1a 100644
+--- a/gcc/tree-vect-loop.c
++++ b/gcc/tree-vect-loop.c
+@@ -154,6 +154,8 @@ along with GCC; see the file COPYING3.  If not see
+ */
+ 
+ static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
++static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
++					       bool *);
+ 
+ /* Subroutine of vect_determine_vf_for_stmt that handles only one
+    statement.  VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
+@@ -325,7 +327,7 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
+ 				 "get vectype for scalar type:  %T\n",
+ 				 scalar_type);
+ 
+-	      vectype = get_vectype_for_scalar_type (scalar_type);
++	      vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
+ 	      if (!vectype)
+ 		return opt_result::failure_at (phi,
+ 					       "not vectorized: unsupported "
+@@ -559,19 +561,19 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
+ 		  && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
+ 
+       stmt_vec_info reduc_stmt_info
+-	= vect_force_simple_reduction (loop_vinfo, stmt_vinfo,
+-				       &double_reduc, false);
++	= vect_is_simple_reduction (loop_vinfo, stmt_vinfo, &double_reduc);
+       if (reduc_stmt_info)
+         {
+-          if (double_reduc)
+-            {
+-              if (dump_enabled_p ())
+-                dump_printf_loc (MSG_NOTE, vect_location,
++	  STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
++	  STMT_VINFO_REDUC_DEF (reduc_stmt_info) = stmt_vinfo;
++	  if (double_reduc)
++	    {
++	      if (dump_enabled_p ())
++		dump_printf_loc (MSG_NOTE, vect_location,
+ 				 "Detected double reduction.\n");
+ 
+               STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
+-	      STMT_VINFO_DEF_TYPE (reduc_stmt_info)
+-		= vect_double_reduction_def;
++	      STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_double_reduction_def;
+             }
+           else
+             {
+@@ -582,7 +584,6 @@ vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
+ 				     "Detected vectorizable nested cycle.\n");
+ 
+                   STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
+-		  STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_nested_cycle;
+                 }
+               else
+                 {
+@@ -688,13 +689,16 @@ vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
+ 	stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
+ 	while (next)
+ 	  {
+-	    if (! STMT_VINFO_IN_PATTERN_P (next))
++	    if (! STMT_VINFO_IN_PATTERN_P (next)
++		|| STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (next)) == -1)
+ 	      break;
+ 	    next = REDUC_GROUP_NEXT_ELEMENT (next);
+ 	  }
+-	/* If not all stmt in the chain are patterns try to handle
+-	   the chain without patterns.  */
+-	if (! next)
++	/* If not all stmt in the chain are patterns or if we failed
++	   to update STMT_VINFO_REDUC_IDX try to handle the chain
++	   without patterns.  */
++	if (! next
++	    && STMT_VINFO_REDUC_IDX (STMT_VINFO_RELATED_STMT (first)) != -1)
+ 	  {
+ 	    vect_fixup_reduc_chain (first);
+ 	    LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
+@@ -730,9 +734,7 @@ vect_get_loop_niters (struct loop *loop, tree *assumptions,
+   if (!exit)
+     return cond;
+ 
+-  niter = chrec_dont_know;
+   may_be_zero = NULL_TREE;
+-  niter_assumptions = boolean_true_node;
+   if (!number_of_iterations_exit_assumptions (loop, exit, &niter_desc, NULL)
+       || chrec_contains_undetermined (niter_desc.niter))
+     return cond;
+@@ -826,6 +828,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
+     ivexpr_map (NULL),
+     slp_unrolling_factor (1),
+     single_scalar_iteration_cost (0),
++    vec_outside_cost (0),
++    vec_inside_cost (0),
+     vectorizable (false),
+     can_fully_mask_p (true),
+     fully_masked_p (false),
+@@ -885,6 +889,8 @@ _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
+ 	    }
+ 	}
+     }
++
++  epilogue_vinfos.create (6);
+ }
+ 
+ /* Free all levels of MASKS.  */
+@@ -959,6 +965,7 @@ _loop_vec_info::~_loop_vec_info ()
+ 
+   release_vec_loop_masks (&masks);
+   delete ivexpr_map;
++  epilogue_vinfos.release ();
+ 
+   loop->aux = NULL;
+ }
+@@ -1431,8 +1438,8 @@ vect_update_vf_for_slp (loop_vec_info loop_vinfo)
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+ 			 "Loop contains SLP and non-SLP stmts\n");
+       /* Both the vectorization factor and unroll factor have the form
+-	 current_vector_size * X for some rational X, so they must have
+-	 a common multiple.  */
++	 GET_MODE_SIZE (loop_vinfo->vector_mode) * X for some rational X,
++	 so they must have a common multiple.  */
+       vectorization_factor
+ 	= force_common_multiple (vectorization_factor,
+ 				 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
+@@ -1535,12 +1542,18 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+                   phi_op = PHI_ARG_DEF (phi, 0);
+ 		  stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
+ 		  if (!op_def_info)
+-		    return opt_result::failure_at (phi, "unsupported phi");
++		    return opt_result::failure_at (phi, "unsupported phi\n");
+ 
+ 		  if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
+ 		      && (STMT_VINFO_RELEVANT (op_def_info)
+ 			  != vect_used_in_outer_by_reduction))
+-		    return opt_result::failure_at (phi, "unsupported phi");
++		    return opt_result::failure_at (phi, "unsupported phi\n");
++
++		  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def
++		       || (STMT_VINFO_DEF_TYPE (stmt_info)
++			   == vect_double_reduction_def))
++		      && !vectorizable_lc_phi (stmt_info, NULL, NULL))
++		    return opt_result::failure_at (phi, "unsupported phi\n");
+                 }
+ 
+               continue;
+@@ -1564,18 +1577,19 @@ vect_analyze_loop_operations (loop_vec_info loop_vinfo)
+ 		ok = vectorizable_induction (stmt_info, NULL, NULL, NULL,
+ 					     &cost_vec);
+ 	      else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
++			|| (STMT_VINFO_DEF_TYPE (stmt_info)
++			    == vect_double_reduction_def)
+ 			|| STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
+ 		       && ! PURE_SLP_STMT (stmt_info))
+-		ok = vectorizable_reduction (stmt_info, NULL, NULL, NULL, NULL,
+-					     &cost_vec);
++		ok = vectorizable_reduction (stmt_info, NULL, NULL, &cost_vec);
+             }
+ 
+ 	  /* SLP PHIs are tested by vect_slp_analyze_node_operations.  */
+ 	  if (ok
+ 	      && STMT_VINFO_LIVE_P (stmt_info)
+ 	      && !PURE_SLP_STMT (stmt_info))
+-	    ok = vectorizable_live_operation (stmt_info, NULL, NULL, -1, NULL,
+-					      &cost_vec);
++	    ok = vectorizable_live_operation (stmt_info, NULL, NULL, NULL,
++					      -1, NULL, &cost_vec);
+ 
+           if (!ok)
+ 	    return opt_result::failure_at (phi,
+@@ -1692,9 +1706,20 @@ vect_analyze_loop_costing (loop_vec_info loop_vinfo)
+       return 0;
+     }
+ 
+-  HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
+-  if (estimated_niter == -1)
+-    estimated_niter = likely_max_stmt_executions_int (loop);
++  HOST_WIDE_INT estimated_niter;
++
++  /* If we are vectorizing an epilogue then we know the maximum number of
++     scalar iterations it will cover is at least one lower than the
++     vectorization factor of the main loop.  */
++  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
++    estimated_niter
++      = vect_vf_for_cost (LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo)) - 1;
++  else
++    {
++      estimated_niter = estimated_stmt_executions_int (loop);
++      if (estimated_niter == -1)
++	estimated_niter = likely_max_stmt_executions_int (loop);
++    }
+   if (estimated_niter != -1
+       && ((unsigned HOST_WIDE_INT) estimated_niter
+ 	  < MAX (th, (unsigned) min_profitable_estimate)))
+@@ -1774,6 +1799,101 @@ vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
+   return opt_result::success ();
+ }
+ 
++/* Look for SLP-only access groups and turn each individual access into its own
++   group.  */
++static void
++vect_dissolve_slp_only_groups (loop_vec_info loop_vinfo)
++{
++  unsigned int i;
++  struct data_reference *dr;
++
++  DUMP_VECT_SCOPE ("vect_dissolve_slp_only_groups");
++
++  vec<data_reference_p> datarefs = loop_vinfo->shared->datarefs;
++  FOR_EACH_VEC_ELT (datarefs, i, dr)
++    {
++      gcc_assert (DR_REF (dr));
++      stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (DR_STMT (dr));
++
++      /* Check if the load is a part of an interleaving chain.  */
++      if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
++	{
++	  stmt_vec_info first_element = DR_GROUP_FIRST_ELEMENT (stmt_info);
++	  unsigned int group_size = DR_GROUP_SIZE (first_element);
++
++	  /* Check if SLP-only groups.  */
++	  if (!STMT_SLP_TYPE (stmt_info)
++	      && STMT_VINFO_SLP_VECT_ONLY (first_element))
++	    {
++	      /* Dissolve the group.  */
++	      STMT_VINFO_SLP_VECT_ONLY (first_element) = false;
++
++	      stmt_vec_info vinfo = first_element;
++	      while (vinfo)
++		{
++		  stmt_vec_info next = DR_GROUP_NEXT_ELEMENT (vinfo);
++		  DR_GROUP_FIRST_ELEMENT (vinfo) = vinfo;
++		  DR_GROUP_NEXT_ELEMENT (vinfo) = NULL;
++		  DR_GROUP_SIZE (vinfo) = 1;
++		  DR_GROUP_GAP (vinfo) = group_size - 1;
++		  vinfo = next;
++		}
++	    }
++	}
++    }
++}
++
++
++/* Decides whether we need to create an epilogue loop to handle
++   remaining scalar iterations and sets PEELING_FOR_NITERS accordingly.  */
++
++void
++determine_peel_for_niter (loop_vec_info loop_vinfo)
++{
++  LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
++
++  unsigned HOST_WIDE_INT const_vf;
++  HOST_WIDE_INT max_niter
++    = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
++
++  unsigned th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
++  if (!th && LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo))
++    th = LOOP_VINFO_COST_MODEL_THRESHOLD (LOOP_VINFO_ORIG_LOOP_INFO
++					  (loop_vinfo));
++
++  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
++    /* The main loop handles all iterations.  */
++    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
++  else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
++	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
++    {
++      /* Work out the (constant) number of iterations that need to be
++	 peeled for reasons other than niters.  */
++      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
++      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
++	peel_niter += 1;
++      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
++		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
++	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
++    }
++  else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
++	   /* ??? When peeling for gaps but not alignment, we could
++	      try to check whether the (variable) niters is known to be
++	      VF * N + 1.  That's something of a niche case though.  */
++	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
++	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
++	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
++		< (unsigned) exact_log2 (const_vf))
++	       /* In case of versioning, check if the maximum number of
++		  iterations is greater than th.  If they are identical,
++		  the epilogue is unnecessary.  */
++	       && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
++		   || ((unsigned HOST_WIDE_INT) max_niter
++		       > (th / const_vf) * const_vf))))
++    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
++}
++
++
+ /* Function vect_analyze_loop_2.
+ 
+    Apply a set of analyses on LOOP, and create a loop_vec_info struct
+@@ -1786,6 +1906,15 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
+   int res;
+   unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
+   poly_uint64 min_vf = 2;
++  loop_vec_info orig_loop_vinfo = NULL;
++
++  /* If we are dealing with an epilogue then orig_loop_vinfo points to the
++     loop_vec_info of the first vectorized loop.  */
++  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
++    orig_loop_vinfo = LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo);
++  else
++    orig_loop_vinfo = loop_vinfo;
++  gcc_assert (orig_loop_vinfo);
+ 
+   /* The first group of checks is independent of the vector size.  */
+   fatal = true;
+@@ -1824,7 +1953,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
+   /* Analyze the data references and also adjust the minimal
+      vectorization factor according to the loads and stores.  */
+ 
+-  ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
++  ok = vect_analyze_data_refs (loop_vinfo, &min_vf, &fatal);
+   if (!ok)
+     {
+       if (dump_enabled_p ())
+@@ -1855,7 +1984,7 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
+ 
+   /* Data-flow analysis to detect stmts that do not need to be vectorized.  */
+ 
+-  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
++  ok = vect_mark_stmts_to_be_vectorized (loop_vinfo, &fatal);
+   if (!ok)
+     {
+       if (dump_enabled_p ())
+@@ -1901,7 +2030,6 @@ vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
+   vect_compute_single_scalar_iteration_cost (loop_vinfo);
+ 
+   poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+-  unsigned th;
+ 
+   /* Check the SLP opportunities in the loop, analyze and build SLP trees.  */
+   ok = vect_analyze_slp (loop_vinfo, *n_stmts);
+@@ -1941,9 +2069,6 @@ start_over:
+ 		   LOOP_VINFO_INT_NITERS (loop_vinfo));
+     }
+ 
+-  HOST_WIDE_INT max_niter
+-    = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
+-
+   /* Analyze the alignment of the data-refs in the loop.
+      Fail if a data reference is found that cannot be vectorized.  */
+ 
+@@ -1990,6 +2115,9 @@ start_over:
+ 	}
+     }
+ 
++  /* Dissolve SLP-only groups.  */
++  vect_dissolve_slp_only_groups (loop_vinfo);
++
+   /* Scan all the remaining operations in the loop that are not subject
+      to SLP and make sure they are vectorizable.  */
+   ok = vect_analyze_loop_operations (loop_vinfo);
+@@ -2032,6 +2160,16 @@ start_over:
+ 				       " support peeling for gaps.\n");
+     }
+ 
++  /* If we're vectorizing an epilogue loop, we either need a fully-masked
++     loop or a loop that has a lower VF than the main loop.  */
++  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
++      && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
++      && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
++		   LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
++    return opt_result::failure_at (vect_location,
++				   "Vectorization factor too high for"
++				   " epilogue loop.\n");
++
+   /* Check the costings of the loop make vectorizing worthwhile.  */
+   res = vect_analyze_loop_costing (loop_vinfo);
+   if (res < 0)
+@@ -2044,42 +2182,7 @@ start_over:
+     return opt_result::failure_at (vect_location,
+ 				   "Loop costings not worthwhile.\n");
+ 
+-  /* Decide whether we need to create an epilogue loop to handle
+-     remaining scalar iterations.  */
+-  th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+-
+-  unsigned HOST_WIDE_INT const_vf;
+-  if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
+-    /* The main loop handles all iterations.  */
+-    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
+-  else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+-	   && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+-    {
+-      /* Work out the (constant) number of iterations that need to be
+-	 peeled for reasons other than niters.  */
+-      unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+-      if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+-	peel_niter += 1;
+-      if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
+-		       LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
+-	LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+-    }
+-  else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
+-	   /* ??? When peeling for gaps but not alignment, we could
+-	      try to check whether the (variable) niters is known to be
+-	      VF * N + 1.  That's something of a niche case though.  */
+-	   || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+-	   || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
+-	   || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
+-		< (unsigned) exact_log2 (const_vf))
+-	       /* In case of versioning, check if the maximum number of
+-		  iterations is greater than th.  If they are identical,
+-		  the epilogue is unnecessary.  */
+-	       && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
+-		   || ((unsigned HOST_WIDE_INT) max_niter
+-		       > (th / const_vf) * const_vf))))
+-    LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
+-
++  determine_peel_for_niter (loop_vinfo);
+   /* If an epilogue loop is required make sure we can create one.  */
+   if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+       || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
+@@ -2101,10 +2204,21 @@ start_over:
+   /* During peeling, we need to check if number of loop iterations is
+      enough for both peeled prolog loop and vector loop.  This check
+      can be merged along with threshold check of loop versioning, so
+-     increase threshold for this case if necessary.  */
+-  if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
++     increase threshold for this case if necessary.
++
++     If we are analyzing an epilogue we still want to check what its
++     versioning threshold would be.  If we decide to vectorize the epilogues we
++     will want to use the lowest versioning threshold of all epilogues and main
++     loop.  This will enable us to enter a vectorized epilogue even when
++     versioning the loop.  We can't simply check whether the epilogue requires
++     versioning though since we may have skipped some versioning checks when
++     analyzing the epilogue.  For instance, checks for alias versioning will be
++     skipped when dealing with epilogues as we assume we already checked them
++     for the main loop.  So instead we always check the 'orig_loop_vinfo'.  */
++  if (LOOP_REQUIRES_VERSIONING (orig_loop_vinfo))
+     {
+       poly_uint64 niters_th = 0;
++      unsigned int th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
+ 
+       if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
+ 	{
+@@ -2125,6 +2239,14 @@ start_over:
+       /* One additional iteration because of peeling for gap.  */
+       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+ 	niters_th += 1;
++
++      /*  Use the same condition as vect_transform_loop to decide when to use
++	  the cost to determine a versioning threshold.  */
++      if (th >= vect_vf_for_cost (loop_vinfo)
++	  && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
++	  && ordered_p (th, niters_th))
++	niters_th = ordered_max (poly_uint64 (th), niters_th);
++
+       LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
+     }
+ 
+@@ -2240,22 +2362,95 @@ again:
+   goto start_over;
+ }
+ 
++/* Return true if vectorizing a loop using NEW_LOOP_VINFO appears
++   to be better than vectorizing it using OLD_LOOP_VINFO.  Assume that
++   OLD_LOOP_VINFO is better unless something specifically indicates
++   otherwise.
++
++   Note that this deliberately isn't a partial order.  */
++
++static bool
++vect_better_loop_vinfo_p (loop_vec_info new_loop_vinfo,
++			  loop_vec_info old_loop_vinfo)
++{
++  struct loop *loop = LOOP_VINFO_LOOP (new_loop_vinfo);
++  gcc_assert (LOOP_VINFO_LOOP (old_loop_vinfo) == loop);
++
++  poly_int64 new_vf = LOOP_VINFO_VECT_FACTOR (new_loop_vinfo);
++  poly_int64 old_vf = LOOP_VINFO_VECT_FACTOR (old_loop_vinfo);
++
++  /* Always prefer a VF of loop->simdlen over any other VF.  */
++  if (loop->simdlen)
++    {
++      bool new_simdlen_p = known_eq (new_vf, loop->simdlen);
++      bool old_simdlen_p = known_eq (old_vf, loop->simdlen);
++      if (new_simdlen_p != old_simdlen_p)
++	return new_simdlen_p;
++    }
++
++  /* Limit the VFs to what is likely to be the maximum number of iterations,
++     to handle cases in which at least one loop_vinfo is fully-masked.  */
++  HOST_WIDE_INT estimated_max_niter = likely_max_stmt_executions_int (loop);
++  if (estimated_max_niter != -1)
++    {
++      if (known_le (estimated_max_niter, new_vf))
++	new_vf = estimated_max_niter;
++      if (known_le (estimated_max_niter, old_vf))
++	old_vf = estimated_max_niter;
++    }
++
++  /* Check whether the (fractional) cost per scalar iteration is lower
++     or higher: new_inside_cost / new_vf vs. old_inside_cost / old_vf.  */
++  poly_widest_int rel_new = (new_loop_vinfo->vec_inside_cost
++			     * poly_widest_int (old_vf));
++  poly_widest_int rel_old = (old_loop_vinfo->vec_inside_cost
++			     * poly_widest_int (new_vf));
++  if (maybe_lt (rel_old, rel_new))
++    return false;
++  if (known_lt (rel_new, rel_old))
++    return true;
++
++  /* If there's nothing to choose between the loop bodies, see whether
++     there's a difference in the prologue and epilogue costs.  */
++  if (new_loop_vinfo->vec_outside_cost != old_loop_vinfo->vec_outside_cost)
++    return new_loop_vinfo->vec_outside_cost < old_loop_vinfo->vec_outside_cost;
++
++  return false;
++}
++
++/* Decide whether to replace OLD_LOOP_VINFO with NEW_LOOP_VINFO.  Return
++   true if we should.  */
++
++static bool
++vect_joust_loop_vinfos (loop_vec_info new_loop_vinfo,
++			loop_vec_info old_loop_vinfo)
++{
++  if (!vect_better_loop_vinfo_p (new_loop_vinfo, old_loop_vinfo))
++    return false;
++
++  if (dump_enabled_p ())
++    dump_printf_loc (MSG_NOTE, vect_location,
++		     "***** Preferring vector mode %s to vector mode %s\n",
++		     GET_MODE_NAME (new_loop_vinfo->vector_mode),
++		     GET_MODE_NAME (old_loop_vinfo->vector_mode));
++  return true;
++}
++
+ /* Function vect_analyze_loop.
+ 
+    Apply a set of analyses on LOOP, and create a loop_vec_info struct
+    for it.  The different analyses will record information in the
+-   loop_vec_info struct.  If ORIG_LOOP_VINFO is not NULL epilogue must
+-   be vectorized.  */
++   loop_vec_info struct.  */
+ opt_loop_vec_info
+-vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
+-		   vec_info_shared *shared)
++vect_analyze_loop (struct loop *loop, vec_info_shared *shared)
+ {
+-  auto_vector_sizes vector_sizes;
++  auto_vector_modes vector_modes;
+ 
+   /* Autodetect first vector size we try.  */
+-  current_vector_size = 0;
+-  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+-  unsigned int next_size = 0;
++  unsigned int autovec_flags
++    = targetm.vectorize.autovectorize_vector_modes (&vector_modes,
++						    loop->simdlen != 0);
++  unsigned int mode_i = 0;
+ 
+   DUMP_VECT_SCOPE ("analyze_loop_nest");
+ 
+@@ -2272,58 +2467,221 @@ vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
+        " loops cannot be vectorized\n");
+ 
+   unsigned n_stmts = 0;
+-  poly_uint64 autodetected_vector_size = 0;
++  machine_mode autodetected_vector_mode = VOIDmode;
++  opt_loop_vec_info first_loop_vinfo = opt_loop_vec_info::success (NULL);
++  machine_mode next_vector_mode = VOIDmode;
++  poly_uint64 lowest_th = 0;
++  unsigned vectorized_loops = 0;
++  bool pick_lowest_cost_p = ((autovec_flags & VECT_COMPARE_COSTS)
++			     && !unlimited_cost_model (loop));
++
++  bool vect_epilogues = false;
++  opt_result res = opt_result::success ();
++  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
+   while (1)
+     {
+       /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
+-      opt_loop_vec_info loop_vinfo
+-	= vect_analyze_loop_form (loop, shared);
++      opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
+       if (!loop_vinfo)
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			     "bad loop form.\n");
++	  gcc_checking_assert (first_loop_vinfo == NULL);
+ 	  return loop_vinfo;
+ 	}
++      loop_vinfo->vector_mode = next_vector_mode;
+ 
+       bool fatal = false;
+ 
+-      if (orig_loop_vinfo)
+-	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
++      /* When pick_lowest_cost_p is true, we should in principle iterate
++	 over all the loop_vec_infos that LOOP_VINFO could replace and
++	 try to vectorize LOOP_VINFO under the same conditions.
++	 E.g. when trying to replace an epilogue loop, we should vectorize
++	 LOOP_VINFO as an epilogue loop with the same VF limit.  When trying
++	 to replace the main loop, we should vectorize LOOP_VINFO as a main
++	 loop too.
++
++	 However, autovectorize_vector_modes is usually sorted as follows:
++
++	 - Modes that naturally produce lower VFs usually follow modes that
++	   naturally produce higher VFs.
++
++	 - When modes naturally produce the same VF, maskable modes
++	   usually follow unmaskable ones, so that the maskable mode
++	   can be used to vectorize the epilogue of the unmaskable mode.
++
++	 This order is preferred because it leads to the maximum
++	 epilogue vectorization opportunities.  Targets should only use
++	 a different order if they want to make wide modes available while
++	 disparaging them relative to earlier, smaller modes.  The assumption
++	 in that case is that the wider modes are more expensive in some
++	 way that isn't reflected directly in the costs.
++
++	 There should therefore be few interesting cases in which
++	 LOOP_VINFO fails when treated as an epilogue loop, succeeds when
++	 treated as a standalone loop, and ends up being genuinely cheaper
++	 than FIRST_LOOP_VINFO.  */
++      if (vect_epilogues)
++	LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo;
++
++      res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
++      if (mode_i == 0)
++	autodetected_vector_mode = loop_vinfo->vector_mode;
++      if (dump_enabled_p ())
++	{
++	  if (res)
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "***** Analysis succeeded with vector mode %s\n",
++			     GET_MODE_NAME (loop_vinfo->vector_mode));
++	  else
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "***** Analysis failed with vector mode %s\n",
++			     GET_MODE_NAME (loop_vinfo->vector_mode));
++	}
++
++      loop->aux = NULL;
++
++      if (!fatal)
++	while (mode_i < vector_modes.length ()
++	       && vect_chooses_same_modes_p (loop_vinfo, vector_modes[mode_i]))
++	  {
++	    if (dump_enabled_p ())
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "***** The result for vector mode %s would"
++			       " be the same\n",
++			       GET_MODE_NAME (vector_modes[mode_i]));
++	    mode_i += 1;
++	  }
+ 
+-      opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
+       if (res)
+ 	{
+ 	  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
++	  vectorized_loops++;
+ 
+-	  return loop_vinfo;
+-	}
+-
+-      delete loop_vinfo;
++	  /* Once we hit the desired simdlen for the first time,
++	     discard any previous attempts.  */
++	  if (simdlen
++	      && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen))
++	    {
++	      delete first_loop_vinfo;
++	      first_loop_vinfo = opt_loop_vec_info::success (NULL);
++	      LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
++	      simdlen = 0;
++	    }
++	  else if (pick_lowest_cost_p && first_loop_vinfo)
++	    {
++	      /* Keep trying to roll back vectorization attempts while the
++		 loop_vec_infos they produced were worse than this one.  */
++	      vec<loop_vec_info> &vinfos = first_loop_vinfo->epilogue_vinfos;
++	      while (!vinfos.is_empty ()
++		     && vect_joust_loop_vinfos (loop_vinfo, vinfos.last ()))
++		{
++		  gcc_assert (vect_epilogues);
++		  delete vinfos.pop ();
++		}
++	      if (vinfos.is_empty ()
++		  && vect_joust_loop_vinfos (loop_vinfo, first_loop_vinfo))
++		{
++		  delete first_loop_vinfo;
++		  first_loop_vinfo = opt_loop_vec_info::success (NULL);
++		  LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
++		}
++	    }
+ 
+-      if (next_size == 0)
+-	autodetected_vector_size = current_vector_size;
++	  if (first_loop_vinfo == NULL)
++	    {
++	      first_loop_vinfo = loop_vinfo;
++	      lowest_th = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
++	    }
++	  else if (vect_epilogues
++		   /* For now only allow one epilogue loop.  */
++		   && first_loop_vinfo->epilogue_vinfos.is_empty ())
++	    {
++	      first_loop_vinfo->epilogue_vinfos.safe_push (loop_vinfo);
++	      poly_uint64 th = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
++	      gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
++			  || maybe_ne (lowest_th, 0U));
++	      /* Keep track of the known smallest versioning
++		 threshold.  */
++	      if (ordered_p (lowest_th, th))
++		lowest_th = ordered_min (lowest_th, th);
++	    }
++	  else
++	    delete loop_vinfo;
++
++	  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is
++	     enabled, SIMDUID is not set, it is the innermost loop and we have
++	     either already found the loop's SIMDLEN or there was no SIMDLEN to
++	     begin with.
++	     TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
++	  vect_epilogues = (!simdlen
++			    && loop->inner == NULL
++			    && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)
++			    && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
++			    && !loop->simduid
++			    /* For now only allow one epilogue loop, but allow
++			       pick_lowest_cost_p to replace it.  */
++			    && (first_loop_vinfo->epilogue_vinfos.is_empty ()
++				|| pick_lowest_cost_p));
++
++	  /* Commit to first_loop_vinfo if we have no reason to try
++	     alternatives.  */
++	  if (!simdlen && !vect_epilogues && !pick_lowest_cost_p)
++	    break;
++	}
++      else
++	{
++	  delete loop_vinfo;
++	  if (fatal)
++	    {
++	      gcc_checking_assert (first_loop_vinfo == NULL);
++	      break;
++	    }
++	}
+ 
+-      if (next_size < vector_sizes.length ()
+-	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
+-	next_size += 1;
++      if (mode_i < vector_modes.length ()
++	  && VECTOR_MODE_P (autodetected_vector_mode)
++	  && (related_vector_mode (vector_modes[mode_i],
++				   GET_MODE_INNER (autodetected_vector_mode))
++	      == autodetected_vector_mode)
++	  && (related_vector_mode (autodetected_vector_mode,
++				   GET_MODE_INNER (vector_modes[mode_i]))
++	      == vector_modes[mode_i]))
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "***** Skipping vector mode %s, which would"
++			     " repeat the analysis for %s\n",
++			     GET_MODE_NAME (vector_modes[mode_i]),
++			     GET_MODE_NAME (autodetected_vector_mode));
++	  mode_i += 1;
++	}
+ 
+-      if (fatal
+-	  || next_size == vector_sizes.length ()
+-	  || known_eq (current_vector_size, 0U))
+-	return opt_loop_vec_info::propagate_failure (res);
++      if (mode_i == vector_modes.length ()
++	  || autodetected_vector_mode == VOIDmode)
++	break;
+ 
+       /* Try the next biggest vector size.  */
+-      current_vector_size = vector_sizes[next_size++];
++      next_vector_mode = vector_modes[mode_i++];
+       if (dump_enabled_p ())
+-	{
+-	  dump_printf_loc (MSG_NOTE, vect_location,
+-			   "***** Re-trying analysis with "
+-			   "vector size ");
+-	  dump_dec (MSG_NOTE, current_vector_size);
+-	  dump_printf (MSG_NOTE, "\n");
+-	}
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "***** Re-trying analysis with vector mode %s\n",
++			 GET_MODE_NAME (next_vector_mode));
++    }
++
++  if (first_loop_vinfo)
++    {
++      loop->aux = (loop_vec_info) first_loop_vinfo;
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "***** Choosing vector mode %s\n",
++			 GET_MODE_NAME (first_loop_vinfo->vector_mode));
++      LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo) = lowest_th;
++      return first_loop_vinfo;
+     }
++
++  return opt_loop_vec_info::propagate_failure (res);
+ }
+ 
+ /* Return true if there is an in-order reduction function for CODE, storing
+@@ -2397,17 +2755,17 @@ reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
+ 
+ /* If there is a neutral value X such that SLP reduction NODE would not
+    be affected by the introduction of additional X elements, return that X,
+-   otherwise return null.  CODE is the code of the reduction.  REDUC_CHAIN
+-   is true if the SLP statements perform a single reduction, false if each
+-   statement performs an independent reduction.  */
++   otherwise return null.  CODE is the code of the reduction and VECTOR_TYPE
++   is the vector type that would hold element X.  REDUC_CHAIN is true if
++   the SLP statements perform a single reduction, false if each statement
++   performs an independent reduction.  */
+ 
+ static tree
+-neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
+-			      bool reduc_chain)
++neutral_op_for_slp_reduction (slp_tree slp_node, tree vector_type,
++			      tree_code code, bool reduc_chain)
+ {
+   vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+   stmt_vec_info stmt_vinfo = stmts[0];
+-  tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
+   tree scalar_type = TREE_TYPE (vector_type);
+   struct loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
+   gcc_assert (loop);
+@@ -2453,241 +2811,55 @@ report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
+   dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
+ }
+ 
+-/* DEF_STMT_INFO occurs in a loop that contains a potential reduction
+-   operation.  Return true if the results of DEF_STMT_INFO are something
+-   that can be accumulated by such a reduction.  */
++/* Return true if we need an in-order reduction for operation CODE
++   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
++   overflow must wrap.  */
+ 
+-static bool
+-vect_valid_reduction_input_p (stmt_vec_info def_stmt_info)
++bool
++needs_fold_left_reduction_p (tree type, tree_code code)
+ {
+-  return (is_gimple_assign (def_stmt_info->stmt)
+-	  || is_gimple_call (def_stmt_info->stmt)
+-	  || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
+-	  || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
+-	      && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
+-	      && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
+-}
++  /* CHECKME: check for !flag_finite_math_only too?  */
++  if (SCALAR_FLOAT_TYPE_P (type))
++    switch (code)
++      {
++      case MIN_EXPR:
++      case MAX_EXPR:
++	return false;
+ 
+-/* Detect SLP reduction of the form:
++      default:
++	return !flag_associative_math;
++      }
+ 
+-   #a1 = phi <a5, a0>
+-   a2 = operation (a1)
+-   a3 = operation (a2)
+-   a4 = operation (a3)
+-   a5 = operation (a4)
++  if (INTEGRAL_TYPE_P (type))
++    {
++      if (!operation_no_trapping_overflow (type, code))
++	return true;
++      return false;
++    }
+ 
+-   #a = phi <a5>
++  if (SAT_FIXED_POINT_TYPE_P (type))
++    return true;
+ 
+-   PHI is the reduction phi node (#a1 = phi <a5, a0> above)
+-   FIRST_STMT is the first reduction stmt in the chain
+-   (a2 = operation (a1)).
++  return false;
++}
+ 
+-   Return TRUE if a reduction chain was detected.  */
++/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
++   has a handled computation expression.  Store the main reduction
++   operation in *CODE.  */
+ 
+ static bool
+-vect_is_slp_reduction (loop_vec_info loop_info, gimple *phi,
+-		       gimple *first_stmt)
++check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
++		      tree loop_arg, enum tree_code *code,
++		      vec<std::pair<ssa_op_iter, use_operand_p> > &path)
+ {
+-  struct loop *loop = (gimple_bb (phi))->loop_father;
+-  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+-  enum tree_code code;
+-  gimple *loop_use_stmt = NULL;
+-  stmt_vec_info use_stmt_info;
+-  tree lhs;
+-  imm_use_iterator imm_iter;
+-  use_operand_p use_p;
+-  int nloop_uses, size = 0, n_out_of_loop_uses;
+-  bool found = false;
+-
+-  if (loop != vect_loop)
+-    return false;
+-
+-  auto_vec<stmt_vec_info, 8> reduc_chain;
+-  lhs = PHI_RESULT (phi);
+-  code = gimple_assign_rhs_code (first_stmt);
+-  while (1)
+-    {
+-      nloop_uses = 0;
+-      n_out_of_loop_uses = 0;
+-      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs)
+-        {
+-	  gimple *use_stmt = USE_STMT (use_p);
+-	  if (is_gimple_debug (use_stmt))
+-	    continue;
+-
+-          /* Check if we got back to the reduction phi.  */
+-	  if (use_stmt == phi)
+-            {
+-	      loop_use_stmt = use_stmt;
+-              found = true;
+-              break;
+-            }
+-
+-          if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)))
+-            {
+-	      loop_use_stmt = use_stmt;
+-	      nloop_uses++;
+-            }
+-           else
+-             n_out_of_loop_uses++;
+-
+-           /* There are can be either a single use in the loop or two uses in
+-              phi nodes.  */
+-           if (nloop_uses > 1 || (n_out_of_loop_uses && nloop_uses))
+-             return false;
+-        }
+-
+-      if (found)
+-        break;
+-
+-      /* We reached a statement with no loop uses.  */
+-      if (nloop_uses == 0)
+-	return false;
+-
+-      /* This is a loop exit phi, and we haven't reached the reduction phi.  */
+-      if (gimple_code (loop_use_stmt) == GIMPLE_PHI)
+-        return false;
+-
+-      if (!is_gimple_assign (loop_use_stmt)
+-	  || code != gimple_assign_rhs_code (loop_use_stmt)
+-	  || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
+-        return false;
+-
+-      /* Insert USE_STMT into reduction chain.  */
+-      use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
+-      reduc_chain.safe_push (use_stmt_info);
+-
+-      lhs = gimple_assign_lhs (loop_use_stmt);
+-      size++;
+-   }
+-
+-  if (!found || loop_use_stmt != phi || size < 2)
+-    return false;
+-
+-  /* Swap the operands, if needed, to make the reduction operand be the second
+-     operand.  */
+-  lhs = PHI_RESULT (phi);
+-  for (unsigned i = 0; i < reduc_chain.length (); ++i)
+-    {
+-      gassign *next_stmt = as_a <gassign *> (reduc_chain[i]->stmt);
+-      if (gimple_assign_rhs2 (next_stmt) == lhs)
+-	{
+-	  tree op = gimple_assign_rhs1 (next_stmt);
+-	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
+-
+-	  /* Check that the other def is either defined in the loop
+-	     ("vect_internal_def"), or it's an induction (defined by a
+-	     loop-header phi-node).  */
+-	  if (def_stmt_info
+-	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
+-	      && vect_valid_reduction_input_p (def_stmt_info))
+-	    {
+-	      lhs = gimple_assign_lhs (next_stmt);
+- 	      continue;
+-	    }
+-
+-	  return false;
+-	}
+-      else
+-	{
+-          tree op = gimple_assign_rhs2 (next_stmt);
+-	  stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
+-
+-          /* Check that the other def is either defined in the loop
+-            ("vect_internal_def"), or it's an induction (defined by a
+-            loop-header phi-node).  */
+-	  if (def_stmt_info
+-	      && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
+-	      && vect_valid_reduction_input_p (def_stmt_info))
+-  	    {
+-	      if (dump_enabled_p ())
+-		dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
+-				 next_stmt);
+-
+-	      swap_ssa_operands (next_stmt,
+-	 		         gimple_assign_rhs1_ptr (next_stmt),
+-                                 gimple_assign_rhs2_ptr (next_stmt));
+-	      update_stmt (next_stmt);
+-
+-	      if (CONSTANT_CLASS_P (gimple_assign_rhs1 (next_stmt)))
+-		LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
+-	    }
+-	  else
+-	    return false;
+-        }
+-
+-      lhs = gimple_assign_lhs (next_stmt);
+-    }
+-
+-  /* Build up the actual chain.  */
+-  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
+-    {
+-      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
+-      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
+-    }
+-  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
+-  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
+-
+-  /* Save the chain for further analysis in SLP detection.  */
+-  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
+-  REDUC_GROUP_SIZE (reduc_chain[0]) = size;
+-
+-  return true;
+-}
+-
+-/* Return true if we need an in-order reduction for operation CODE
+-   on type TYPE.  NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
+-   overflow must wrap.  */
+-
+-static bool
+-needs_fold_left_reduction_p (tree type, tree_code code,
+-			     bool need_wrapping_integral_overflow)
+-{
+-  /* CHECKME: check for !flag_finite_math_only too?  */
+-  if (SCALAR_FLOAT_TYPE_P (type))
+-    switch (code)
+-      {
+-      case MIN_EXPR:
+-      case MAX_EXPR:
+-	return false;
+-
+-      default:
+-	return !flag_associative_math;
+-      }
+-
+-  if (INTEGRAL_TYPE_P (type))
+-    {
+-      if (!operation_no_trapping_overflow (type, code))
+-	return true;
+-      if (need_wrapping_integral_overflow
+-	  && !TYPE_OVERFLOW_WRAPS (type)
+-	  && operation_can_overflow (code))
+-	return true;
+-      return false;
+-    }
+-
+-  if (SAT_FIXED_POINT_TYPE_P (type))
+-    return true;
+-
+-  return false;
+-}
+-
+-/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
+-   reduction operation CODE has a handled computation expression.  */
+-
+-bool
+-check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
+-		      tree loop_arg, enum tree_code code)
+-{
+-  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
+-  auto_bitmap visited;
+-  tree lookfor = PHI_RESULT (phi);
+-  ssa_op_iter curri;
+-  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
+-  while (USE_FROM_PTR (curr) != loop_arg)
+-    curr = op_iter_next_use (&curri);
+-  curri.i = curri.numops;
+-  do
++  auto_bitmap visited;
++  tree lookfor = PHI_RESULT (phi);
++  ssa_op_iter curri;
++  use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
++  while (USE_FROM_PTR (curr) != loop_arg)
++    curr = op_iter_next_use (&curri);
++  curri.i = curri.numops;
++  do
+     {
+       path.safe_push (std::make_pair (curri, curr));
+       tree use = USE_FROM_PTR (curr);
+@@ -2747,36 +2919,71 @@ pop:
+   /* Check whether the reduction path detected is valid.  */
+   bool fail = path.length () == 0;
+   bool neg = false;
++  *code = ERROR_MARK;
+   for (unsigned i = 1; i < path.length (); ++i)
+     {
+       gimple *use_stmt = USE_STMT (path[i].second);
+       tree op = USE_FROM_PTR (path[i].second);
+-      if (! has_single_use (op)
+-	  || ! is_gimple_assign (use_stmt))
++      if (! is_gimple_assign (use_stmt)
++	  /* The following make sure we can compute the operand index
++	     easily plus it mostly disallows chaining via COND_EXPR condition
++	     operands.  */
++	  || (gimple_assign_rhs1 (use_stmt) != op
++	      && gimple_assign_rhs2 (use_stmt) != op
++	      && gimple_assign_rhs3 (use_stmt) != op))
+ 	{
+ 	  fail = true;
+ 	  break;
+ 	}
+-      if (gimple_assign_rhs_code (use_stmt) != code)
++      /* Check there's only a single stmt the op is used on inside
++         of the loop.  */
++      imm_use_iterator imm_iter;
++      gimple *op_use_stmt;
++      unsigned cnt = 0;
++      FOR_EACH_IMM_USE_STMT (op_use_stmt, imm_iter, op)
++	if (!is_gimple_debug (op_use_stmt)
++	    && flow_bb_inside_loop_p (loop, gimple_bb (op_use_stmt)))
++	  cnt++;
++      if (cnt != 1)
+ 	{
+-	  if (code == PLUS_EXPR
+-	      && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
+-	    {
+-	      /* Track whether we negate the reduction value each iteration.  */
+-	      if (gimple_assign_rhs2 (use_stmt) == op)
+-		neg = ! neg;
+-	    }
+-	  else
+-	    {
+-	      fail = true;
+-	      break;
+-	    }
++	  fail = true;
++	  break;
++	}
++      tree_code use_code = gimple_assign_rhs_code (use_stmt);
++      if (use_code == MINUS_EXPR)
++	{
++	  use_code = PLUS_EXPR;
++	  /* Track whether we negate the reduction value each iteration.  */
++	  if (gimple_assign_rhs2 (use_stmt) == op)
++	    neg = ! neg;
++	}
++      if (CONVERT_EXPR_CODE_P (use_code)
++	  && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (use_stmt)),
++				    TREE_TYPE (gimple_assign_rhs1 (use_stmt))))
++	;
++      else if (*code == ERROR_MARK)
++	*code = use_code;
++      else if (use_code != *code)
++	{
++	  fail = true;
++	  break;
+ 	}
+     }
+-  return ! fail && ! neg;
++  return ! fail && ! neg && *code != ERROR_MARK;
++}
++
++bool
++check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
++		      tree loop_arg, enum tree_code code)
++{
++  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
++  enum tree_code code_;
++  return (check_reduction_path (loc, loop, phi, loop_arg, &code_, path)
++	  && code_ == code);
+ }
+ 
+ 
++
+ /* Function vect_is_simple_reduction
+ 
+    (1) Detect a cross-iteration def-use cycle that represents a simple
+@@ -2823,25 +3030,15 @@ pop:
+ 
+ static stmt_vec_info
+ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+-			  bool *double_reduc,
+-			  bool need_wrapping_integral_overflow,
+-			  enum vect_reduction_type *v_reduc_type)
++			  bool *double_reduc)
+ {
+   gphi *phi = as_a <gphi *> (phi_info->stmt);
+-  struct loop *loop = (gimple_bb (phi))->loop_father;
+-  struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
+-  bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
+   gimple *phi_use_stmt = NULL;
+-  enum tree_code orig_code, code;
+-  tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
+-  tree type;
+-  tree name;
+   imm_use_iterator imm_iter;
+   use_operand_p use_p;
+-  bool phi_def;
+ 
+   *double_reduc = false;
+-  *v_reduc_type = TREE_CODE_REDUCTION;
++  STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
+ 
+   tree phi_name = PHI_RESULT (phi);
+   /* ???  If there are no uses of the PHI result the inner loop reduction
+@@ -2850,6 +3047,7 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+      can be constant.  See PR60382.  */
+   if (has_zero_uses (phi_name))
+     return NULL;
++  class loop *loop = (gimple_bb (phi))->loop_father;
+   unsigned nphi_def_loop_uses = 0;
+   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_name)
+     {
+@@ -2870,44 +3068,26 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+       phi_use_stmt = use_stmt;
+     }
+ 
+-  edge latch_e = loop_latch_edge (loop);
+-  tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
+-  if (TREE_CODE (loop_arg) != SSA_NAME)
++  tree latch_def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
++  if (TREE_CODE (latch_def) != SSA_NAME)
+     {
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			 "reduction: not ssa_name: %T\n", loop_arg);
++			 "reduction: not ssa_name: %T\n", latch_def);
+       return NULL;
+     }
+ 
+-  stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
++  stmt_vec_info def_stmt_info = loop_info->lookup_def (latch_def);
+   if (!def_stmt_info
+       || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
+     return NULL;
+ 
+-  if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
+-    {
+-      name = gimple_assign_lhs (def_stmt);
+-      phi_def = false;
+-    }
+-  else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
+-    {
+-      name = PHI_RESULT (def_stmt);
+-      phi_def = true;
+-    }
+-  else
+-    {
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			 "reduction: unhandled reduction operation: %G",
+-			 def_stmt_info->stmt);
+-      return NULL;
+-    }
+-
++  bool nested_in_vect_loop
++    = flow_loop_nested_p (LOOP_VINFO_LOOP (loop_info), loop);
+   unsigned nlatch_def_loop_uses = 0;
+   auto_vec<gphi *, 3> lcphis;
+   bool inner_loop_of_double_reduc = false;
+-  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
++  FOR_EACH_IMM_USE_FAST (use_p, imm_iter, latch_def)
+     {
+       gimple *use_stmt = USE_STMT (use_p);
+       if (is_gimple_debug (use_stmt))
+@@ -2925,11 +3105,21 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+ 	}
+     }
+ 
++  /* If we are vectorizing an inner reduction we are executing that
++     in the original order only in case we are not dealing with a
++     double reduction.  */
++  if (nested_in_vect_loop && !inner_loop_of_double_reduc)
++    {
++      if (dump_enabled_p ())
++	report_vect_op (MSG_NOTE, def_stmt_info->stmt,
++			"detected nested cycle: ");
++      return def_stmt_info;
++    }
++
+   /* If this isn't a nested cycle or if the nested cycle reduction value
+      is used ouside of the inner loop we cannot handle uses of the reduction
+      value.  */
+-  if ((!nested_in_vect_loop || inner_loop_of_double_reduc)
+-      && (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1))
++  if (nlatch_def_loop_uses > 1 || nphi_def_loop_uses > 1)
+     {
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+@@ -2939,11 +3129,9 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+ 
+   /* If DEF_STMT is a phi node itself, we expect it to have a single argument
+      defined in the inner loop.  */
+-  if (phi_def)
++  if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
+     {
+-      gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
+-      op1 = PHI_ARG_DEF (def_stmt, 0);
+-
++      tree op1 = PHI_ARG_DEF (def_stmt, 0);
+       if (gimple_phi_num_args (def_stmt) != 1
+           || TREE_CODE (op1) != SSA_NAME)
+         {
+@@ -2974,290 +3162,74 @@ vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+       return NULL;
+     }
+ 
+-  /* If we are vectorizing an inner reduction we are executing that
+-     in the original order only in case we are not dealing with a
+-     double reduction.  */
+-  bool check_reduction = true;
+-  if (flow_loop_nested_p (vect_loop, loop))
+-    {
+-      gphi *lcphi;
+-      unsigned i;
+-      check_reduction = false;
+-      FOR_EACH_VEC_ELT (lcphis, i, lcphi)
+-	FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_phi_result (lcphi))
+-	  {
+-	    gimple *use_stmt = USE_STMT (use_p);
+-	    if (is_gimple_debug (use_stmt))
+-	      continue;
+-	    if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
+-	      check_reduction = true;
+-	  }
+-    }
+-
+-  gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
+-  code = orig_code = gimple_assign_rhs_code (def_stmt);
+-
+-  if (nested_in_vect_loop && !check_reduction)
+-    {
+-      /* FIXME: Even for non-reductions code generation is funneled
+-	 through vectorizable_reduction for the stmt defining the
+-	 PHI latch value.  So we have to artificially restrict ourselves
+-	 for the supported operations.  */
+-      switch (get_gimple_rhs_class (code))
+-	{
+-	case GIMPLE_BINARY_RHS:
+-	case GIMPLE_TERNARY_RHS:
+-	  break;
+-	default:
+-	  /* Not supported by vectorizable_reduction.  */
+-	  if (dump_enabled_p ())
+-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+-			    "nested cycle: not handled operation: ");
+-	  return NULL;
+-	}
+-      if (dump_enabled_p ())
+-	report_vect_op (MSG_NOTE, def_stmt, "detected nested cycle: ");
+-      return def_stmt_info;
+-    }
+-
+-  /* We can handle "res -= x[i]", which is non-associative by
+-     simply rewriting this into "res += -x[i]".  Avoid changing
+-     gimple instruction for the first simple tests and only do this
+-     if we're allowed to change code at all.  */
+-  if (code == MINUS_EXPR && gimple_assign_rhs2 (def_stmt) != phi_name)
+-    code = PLUS_EXPR;
+-
+-  if (code == COND_EXPR)
++  /* Look for the expression computing latch_def from then loop PHI result.  */
++  auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
++  enum tree_code code;
++  if (check_reduction_path (vect_location, loop, phi, latch_def, &code,
++			    path))
+     {
+-      if (! nested_in_vect_loop)
+-	*v_reduc_type = COND_REDUCTION;
++      STMT_VINFO_REDUC_CODE (phi_info) = code;
++      if (code == COND_EXPR && !nested_in_vect_loop)
++	STMT_VINFO_REDUC_TYPE (phi_info) = COND_REDUCTION;
+ 
+-      op3 = gimple_assign_rhs1 (def_stmt);
+-      if (COMPARISON_CLASS_P (op3))
+-        {
+-          op4 = TREE_OPERAND (op3, 1);
+-          op3 = TREE_OPERAND (op3, 0);
+-        }
+-      if (op3 == phi_name || op4 == phi_name)
++      /* Fill in STMT_VINFO_REDUC_IDX and gather stmts for an SLP
++	 reduction chain for which the additional restriction is that
++	 all operations in the chain are the same.  */
++      auto_vec<stmt_vec_info, 8> reduc_chain;
++      unsigned i;
++      bool is_slp_reduc = !nested_in_vect_loop && code != COND_EXPR;
++      for (i = path.length () - 1; i >= 1; --i)
+ 	{
+-	  if (dump_enabled_p ())
+-	    report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+-			    "reduction: condition depends on previous"
+-			    " iteration: ");
+-	  return NULL;
++	  gimple *stmt = USE_STMT (path[i].second);
++	  stmt_vec_info stmt_info = loop_info->lookup_stmt (stmt);
++	  STMT_VINFO_REDUC_IDX (stmt_info)
++	    = path[i].second->use - gimple_assign_rhs1_ptr (stmt);
++	  enum tree_code stmt_code = gimple_assign_rhs_code (stmt);
++	  bool leading_conversion = (CONVERT_EXPR_CODE_P (stmt_code)
++				     && (i == 1 || i == path.length () - 1));
++	  if ((stmt_code != code && !leading_conversion)
++	      /* We can only handle the final value in epilogue
++		 generation for reduction chains.  */
++	      || (i != 1 && !has_single_use (gimple_assign_lhs (stmt))))
++	    is_slp_reduc = false;
++	  /* For reduction chains we support a trailing/leading
++	     conversions.  We do not store those in the actual chain.  */
++	  if (leading_conversion)
++	    continue;
++	  reduc_chain.safe_push (stmt_info);
+ 	}
+-
+-      op1 = gimple_assign_rhs2 (def_stmt);
+-      op2 = gimple_assign_rhs3 (def_stmt);
+-    }
+-  else if (!commutative_tree_code (code) || !associative_tree_code (code))
+-    {
+-      if (dump_enabled_p ())
+-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+-			"reduction: not commutative/associative: ");
+-      return NULL;
+-    }
+-  else if (get_gimple_rhs_class (code) == GIMPLE_BINARY_RHS)
+-    {
+-      op1 = gimple_assign_rhs1 (def_stmt);
+-      op2 = gimple_assign_rhs2 (def_stmt);
+-    }
+-  else
+-    {
+-      if (dump_enabled_p ())
+-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+-			"reduction: not handled operation: ");
+-      return NULL;
+-    }
+-
+-  if (TREE_CODE (op1) != SSA_NAME && TREE_CODE (op2) != SSA_NAME)
+-    {
+-      if (dump_enabled_p ())
+-	report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+-			"reduction: both uses not ssa_names: ");
+-
+-      return NULL;
+-    }
+-
+-  type = TREE_TYPE (gimple_assign_lhs (def_stmt));
+-  if ((TREE_CODE (op1) == SSA_NAME
+-       && !types_compatible_p (type,TREE_TYPE (op1)))
+-      || (TREE_CODE (op2) == SSA_NAME
+-          && !types_compatible_p (type, TREE_TYPE (op2)))
+-      || (op3 && TREE_CODE (op3) == SSA_NAME
+-          && !types_compatible_p (type, TREE_TYPE (op3)))
+-      || (op4 && TREE_CODE (op4) == SSA_NAME
+-          && !types_compatible_p (type, TREE_TYPE (op4))))
+-    {
+-      if (dump_enabled_p ())
+-        {
+-          dump_printf_loc (MSG_NOTE, vect_location,
+-			   "reduction: multiple types: operation type: "
+-			   "%T, operands types: %T,%T",
+-			   type,  TREE_TYPE (op1), TREE_TYPE (op2));
+-          if (op3)
+-	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
+-
+-          if (op4)
+-	    dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
+-          dump_printf (MSG_NOTE, "\n");
+-        }
+-
+-      return NULL;
+-    }
+-
+-  /* Check whether it's ok to change the order of the computation.
+-     Generally, when vectorizing a reduction we change the order of the
+-     computation.  This may change the behavior of the program in some
+-     cases, so we need to check that this is ok.  One exception is when
+-     vectorizing an outer-loop: the inner-loop is executed sequentially,
+-     and therefore vectorizing reductions in the inner-loop during
+-     outer-loop vectorization is safe.  */
+-  if (check_reduction
+-      && *v_reduc_type == TREE_CODE_REDUCTION
+-      && needs_fold_left_reduction_p (type, code,
+-				      need_wrapping_integral_overflow))
+-    *v_reduc_type = FOLD_LEFT_REDUCTION;
+-
+-  /* Reduction is safe. We're dealing with one of the following:
+-     1) integer arithmetic and no trapv
+-     2) floating point arithmetic, and special flags permit this optimization
+-     3) nested cycle (i.e., outer loop vectorization).  */
+-  stmt_vec_info def1_info = loop_info->lookup_def (op1);
+-  stmt_vec_info def2_info = loop_info->lookup_def (op2);
+-  if (code != COND_EXPR && !def1_info && !def2_info)
+-    {
+-      if (dump_enabled_p ())
+-	report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
+-      return NULL;
+-    }
+-
+-  /* Check that one def is the reduction def, defined by PHI,
+-     the other def is either defined in the loop ("vect_internal_def"),
+-     or it's an induction (defined by a loop-header phi-node).  */
+-
+-  if (def2_info
+-      && def2_info->stmt == phi
+-      && (code == COND_EXPR
+-	  || !def1_info
+-	  || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
+-	  || vect_valid_reduction_input_p (def1_info)))
+-    {
+-      if (dump_enabled_p ())
+-	report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
+-      return def_stmt_info;
+-    }
+-
+-  if (def1_info
+-      && def1_info->stmt == phi
+-      && (code == COND_EXPR
+-	  || !def2_info
+-	  || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
+-	  || vect_valid_reduction_input_p (def2_info)))
+-    {
+-      if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
++      if (is_slp_reduc && reduc_chain.length () > 1)
+ 	{
+-	  /* Check if we can swap operands (just for simplicity - so that
+-	     the rest of the code can assume that the reduction variable
+-	     is always the last (second) argument).  */
+-	  if (code == COND_EXPR)
++	  for (unsigned i = 0; i < reduc_chain.length () - 1; ++i)
+ 	    {
+-	      /* Swap cond_expr by inverting the condition.  */
+-	      tree cond_expr = gimple_assign_rhs1 (def_stmt);
+-	      enum tree_code invert_code = ERROR_MARK;
+-	      enum tree_code cond_code = TREE_CODE (cond_expr);
+-
+-	      if (TREE_CODE_CLASS (cond_code) == tcc_comparison)
+-		{
+-		  bool honor_nans = HONOR_NANS (TREE_OPERAND (cond_expr, 0));
+-		  invert_code = invert_tree_comparison (cond_code, honor_nans);
+-		}
+-	      if (invert_code != ERROR_MARK)
+-		{
+-		  TREE_SET_CODE (cond_expr, invert_code);
+-		  swap_ssa_operands (def_stmt,
+-				     gimple_assign_rhs2_ptr (def_stmt),
+-				     gimple_assign_rhs3_ptr (def_stmt));
+-		}
+-	      else
+-		{
+-		  if (dump_enabled_p ())
+-		    report_vect_op (MSG_NOTE, def_stmt,
+-				    "detected reduction: cannot swap operands "
+-				    "for cond_expr");
+-		  return NULL;
+-		}
++	      REDUC_GROUP_FIRST_ELEMENT (reduc_chain[i]) = reduc_chain[0];
++	      REDUC_GROUP_NEXT_ELEMENT (reduc_chain[i]) = reduc_chain[i+1];
+ 	    }
+-	  else
+-	    swap_ssa_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt),
+-			       gimple_assign_rhs2_ptr (def_stmt));
+-
+-	  if (dump_enabled_p ())
+-	    report_vect_op (MSG_NOTE, def_stmt,
+-			    "detected reduction: need to swap operands: ");
+-
+-	  if (CONSTANT_CLASS_P (gimple_assign_rhs1 (def_stmt)))
+-	    LOOP_VINFO_OPERANDS_SWAPPED (loop_info) = true;
+-        }
+-      else
+-        {
+-          if (dump_enabled_p ())
+-            report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
+-        }
++	  REDUC_GROUP_FIRST_ELEMENT (reduc_chain.last ()) = reduc_chain[0];
++	  REDUC_GROUP_NEXT_ELEMENT (reduc_chain.last ()) = NULL;
+ 
+-      return def_stmt_info;
+-    }
++	  /* Save the chain for further analysis in SLP detection.  */
++	  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
++	  REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
+ 
+-  /* Try to find SLP reduction chain.  */
+-  if (! nested_in_vect_loop
+-      && code != COND_EXPR
+-      && orig_code != MINUS_EXPR
+-      && vect_is_slp_reduction (loop_info, phi, def_stmt))
+-    {
+-      if (dump_enabled_p ())
+-        report_vect_op (MSG_NOTE, def_stmt,
+-			"reduction: detected reduction chain: ");
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			    "reduction: detected reduction chain\n");
++	}
++      else if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "reduction: detected reduction\n");
+ 
+       return def_stmt_info;
+     }
+ 
+-  /* Look for the expression computing loop_arg from loop PHI result.  */
+-  if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
+-    return def_stmt_info;
+-
+   if (dump_enabled_p ())
+-    {
+-      report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
+-		      "reduction: unknown pattern: ");
+-    }
++    dump_printf_loc (MSG_NOTE, vect_location,
++		     "reduction: unknown pattern\n");
+ 
+   return NULL;
+ }
+ 
+-/* Wrapper around vect_is_simple_reduction, which will modify code
+-   in-place if it enables detection of more reductions.  Arguments
+-   as there.  */
+-
+-stmt_vec_info
+-vect_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
+-			     bool *double_reduc,
+-			     bool need_wrapping_integral_overflow)
+-{
+-  enum vect_reduction_type v_reduc_type;
+-  stmt_vec_info def_info
+-    = vect_is_simple_reduction (loop_info, phi_info, double_reduc,
+-				need_wrapping_integral_overflow,
+-				&v_reduc_type);
+-  if (def_info)
+-    {
+-      STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
+-      STMT_VINFO_REDUC_DEF (phi_info) = def_info;
+-      STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
+-      STMT_VINFO_REDUC_DEF (def_info) = phi_info;
+-    }
+-  return def_info;
+-}
+-
+ /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times.  */
+ int
+ vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
+@@ -3601,7 +3573,11 @@ vect_estimate_min_profitable_iters (loop_vec_info loop_vinfo,
+ 	       &vec_inside_cost, &vec_epilogue_cost);
+ 
+   vec_outside_cost = (int)(vec_prologue_cost + vec_epilogue_cost);
+-  
++
++  /* Stash the costs so that we can compare two loop_vec_infos.  */
++  loop_vinfo->vec_inside_cost = vec_inside_cost;
++  loop_vinfo->vec_outside_cost = vec_outside_cost;
++
+   if (dump_enabled_p ())
+     {
+       dump_printf_loc (MSG_NOTE, vect_location, "Cost model analysis: \n");
+@@ -3846,6 +3822,7 @@ have_whole_vector_shift (machine_mode mode)
+ 
+ static void
+ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
++			   vect_reduction_type reduction_type,
+ 			   int ncopies, stmt_vector_for_cost *cost_vec)
+ {
+   int prologue_cost = 0, epilogue_cost = 0, inside_cost;
+@@ -3860,8 +3837,6 @@ vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
+     loop = LOOP_VINFO_LOOP (loop_vinfo);
+ 
+   /* Condition reductions generate two reductions in the loop.  */
+-  vect_reduction_type reduction_type
+-    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+   if (reduction_type == COND_REDUCTION)
+     ncopies *= 2;
+ 
+@@ -4080,15 +4055,15 @@ vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
+ 
+    A cost model should help decide between these two schemes.  */
+ 
+-tree
+-get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
++static tree
++get_initial_def_for_reduction (stmt_vec_info stmt_vinfo,
++			       enum tree_code code, tree init_val,
+                                tree *adjustment_def)
+ {
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
+   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+   tree scalar_type = TREE_TYPE (init_val);
+-  tree vectype = get_vectype_for_scalar_type (scalar_type);
+-  enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
++  tree vectype = get_vectype_for_scalar_type (loop_vinfo, scalar_type);
+   tree def_for_init;
+   tree init_def;
+   REAL_VALUE_TYPE real_init_val = dconst0;
+@@ -4103,8 +4078,10 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
+   gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)
+ 	      || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father);
+ 
+-  vect_reduction_type reduction_type
+-    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
++  /* ADJUSTMENT_DEF is NULL when called from
++     vect_create_epilog_for_reduction to vectorize double reduction.  */
++  if (adjustment_def)
++    *adjustment_def = NULL;
+ 
+   switch (code)
+     {
+@@ -4118,11 +4095,6 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
+     case MULT_EXPR:
+     case BIT_AND_EXPR:
+       {
+-        /* ADJUSTMENT_DEF is NULL when called from
+-           vect_create_epilog_for_reduction to vectorize double reduction.  */
+-        if (adjustment_def)
+-	  *adjustment_def = init_val;
+-
+         if (code == MULT_EXPR)
+           {
+             real_init_val = dconst1;
+@@ -4137,10 +4109,14 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
+         else
+           def_for_init = build_int_cst (scalar_type, int_init_val);
+ 
+-	if (adjustment_def)
+-	  /* Option1: the first element is '0' or '1' as well.  */
+-	  init_def = gimple_build_vector_from_val (&stmts, vectype,
+-						   def_for_init);
++	if (adjustment_def || operand_equal_p (def_for_init, init_val, 0))
++	  {
++	    /* Option1: the first element is '0' or '1' as well.  */
++	    if (!operand_equal_p (def_for_init, init_val, 0))
++	      *adjustment_def = init_val;
++	    init_def = gimple_build_vector_from_val (&stmts, vectype,
++						     def_for_init);
++	  }
+ 	else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
+ 	  {
+ 	    /* Option2 (variable length): the first element is INIT_VAL.  */
+@@ -4164,16 +4140,6 @@ get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
+     case MAX_EXPR:
+     case COND_EXPR:
+       {
+-	if (adjustment_def)
+-          {
+-	    *adjustment_def = NULL_TREE;
+-	    if (reduction_type != COND_REDUCTION
+-		&& reduction_type != EXTRACT_LAST_REDUCTION)
+-	      {
+-		init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo);
+-		break;
+-	      }
+-	  }
+ 	init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
+ 	init_def = gimple_build_vector_from_val (&stmts, vectype, init_val);
+       }
+@@ -4201,6 +4167,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
+ {
+   vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+   stmt_vec_info stmt_vinfo = stmts[0];
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   unsigned HOST_WIDE_INT nunits;
+   unsigned j, number_of_places_left_in_vector;
+   tree vector_type;
+@@ -4293,7 +4260,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
+ 	    {
+ 	      /* First time round, duplicate ELTS to fill the
+ 		 required number of vectors.  */
+-	      duplicate_and_interleave (&ctor_seq, vector_type, elts,
++	      duplicate_and_interleave (vinfo, &ctor_seq, vector_type, elts,
+ 					number_of_vectors, *vec_oprnds);
+ 	      break;
+ 	    }
+@@ -4309,42 +4276,47 @@ get_initial_defs_for_reduction (slp_tree slp_node,
+     gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
+ }
+ 
++/* For a statement STMT_INFO taking part in a reduction operation return
++   the stmt_vec_info the meta information is stored on.  */
+ 
+-/* Function vect_create_epilog_for_reduction
+-
+-   Create code at the loop-epilog to finalize the result of a reduction
++stmt_vec_info
++info_for_reduction (stmt_vec_info stmt_info)
++{
++  stmt_info = vect_orig_stmt (stmt_info);
++  gcc_assert (STMT_VINFO_REDUC_DEF (stmt_info));
++  if (!is_a <gphi *> (stmt_info->stmt))
++    stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
++  gphi *phi = as_a <gphi *> (stmt_info->stmt);
++  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
++    {
++      if (gimple_phi_num_args (phi) == 1)
++	stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
++    }
++  else if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
++    {
++      edge pe = loop_preheader_edge (gimple_bb (phi)->loop_father);
++      stmt_vec_info info
++	  = stmt_info->vinfo->lookup_def (PHI_ARG_DEF_FROM_EDGE (phi, pe));
++      if (info && STMT_VINFO_DEF_TYPE (info) == vect_double_reduction_def)
++	stmt_info = info;
++    }
++  return stmt_info;
++}
++
++/* Function vect_create_epilog_for_reduction
++
++   Create code at the loop-epilog to finalize the result of a reduction
+    computation. 
+   
+-   VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector 
+-     reduction statements. 
+    STMT_INFO is the scalar reduction stmt that is being vectorized.
+-   NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
+-     number of elements that we can fit in a vectype (nunits).  In this case
+-     we have to generate more than one vector stmt - i.e - we need to "unroll"
+-     the vector stmt by a factor VF/nunits.  For more details see documentation
+-     in vectorizable_operation.
+-   REDUC_FN is the internal function for the epilog reduction.
+-   REDUCTION_PHIS is a list of the phi-nodes that carry the reduction 
+-     computation.
+-   REDUC_INDEX is the index of the operand in the right hand side of the 
+-     statement that is defined by REDUCTION_PHI.
+-   DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
+    SLP_NODE is an SLP node containing a group of reduction statements. The 
+      first one in this group is STMT_INFO.
+-   INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
+-     when the COND_EXPR is never true in the loop.  For MAX_EXPR, it needs to
+-     be smaller than any value of the IV in the loop, for MIN_EXPR larger than
+-     any value of the IV in the loop.
+-   INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION.
+-   NEUTRAL_OP is the value given by neutral_op_for_slp_reduction; it is
+-     null if this is not an SLP reduction
++   SLP_NODE_INSTANCE is the SLP node instance containing SLP_NODE
++   REDUC_INDEX says which rhs operand of the STMT_INFO is the reduction phi
++     (counting from 0)
+ 
+    This function:
+-   1. Creates the reduction def-use cycles: sets the arguments for 
+-      REDUCTION_PHIS:
+-      The loop-entry argument is the vectorized initial-value of the reduction.
+-      The loop-latch argument is taken from VECT_DEFS - the vector of partial 
+-      sums.
++   1. Completes the reduction def-use cycles.
+    2. "Reduces" each vector of partial results VECT_DEFS into a single result,
+       by calling the function specified by REDUC_FN if available, or by
+       other means (whole-vector shifts or a scalar loop).
+@@ -4354,7 +4326,7 @@ get_initial_defs_for_reduction (slp_tree slp_node,
+      The flow at the entry to this function:
+ 
+         loop:
+-          vec_def = phi <null, null>            # REDUCTION_PHI
++          vec_def = phi <vec_init, null>        # REDUCTION_PHI
+           VECT_DEF = vector_stmt                # vectorized form of STMT_INFO
+           s_loop = scalar_stmt                  # (scalar) STMT_INFO
+         loop_exit:
+@@ -4379,21 +4351,34 @@ get_initial_defs_for_reduction (slp_tree slp_node,
+ */
+ 
+ static void
+-vect_create_epilog_for_reduction (vec<tree> vect_defs,
+-				  stmt_vec_info stmt_info,
+-				  gimple *reduc_def_stmt,
+-				  int ncopies, internal_fn reduc_fn,
+-				  vec<stmt_vec_info> reduction_phis,
+-                                  bool double_reduc, 
++vect_create_epilog_for_reduction (stmt_vec_info stmt_info,
+ 				  slp_tree slp_node,
+-				  slp_instance slp_node_instance,
+-				  tree induc_val, enum tree_code induc_code,
+-				  tree neutral_op)
++				  slp_instance slp_node_instance)
+ {
++  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
++  gcc_assert (reduc_info->is_reduc_info);
++  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
++  /* For double reductions we need to get at the inner loop reduction
++     stmt which has the meta info attached.  Our stmt_info is that of the
++     loop-closed PHI of the inner loop which we remember as
++     def for the reduction PHI generation.  */
++  bool double_reduc = false;
++  stmt_vec_info rdef_info = stmt_info;
++  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
++    {
++      gcc_assert (!slp_node);
++      double_reduc = true;
++      stmt_info = loop_vinfo->lookup_def (gimple_phi_arg_def
++					    (stmt_info->stmt, 0));
++      stmt_info = vect_stmt_to_vectorize (stmt_info);
++    }
++  gphi *reduc_def_stmt
++    = as_a <gphi *> (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))->stmt);
++  enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
++  internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
+   stmt_vec_info prev_phi_info;
+   tree vectype;
+   machine_mode mode;
+-  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
+   basic_block exit_bb;
+   tree scalar_dest;
+@@ -4401,32 +4386,24 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+   gimple *new_phi = NULL, *phi;
+   stmt_vec_info phi_info;
+   gimple_stmt_iterator exit_gsi;
+-  tree vec_dest;
+-  tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
++  tree new_temp = NULL_TREE, new_name, new_scalar_dest;
+   gimple *epilog_stmt = NULL;
+-  enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
+   gimple *exit_phi;
+   tree bitsize;
+-  tree adjustment_def = NULL;
+-  tree vec_initial_def = NULL;
+-  tree expr, def, initial_def = NULL;
++  tree def;
+   tree orig_name, scalar_result;
+   imm_use_iterator imm_iter, phi_imm_iter;
+   use_operand_p use_p, phi_use_p;
+   gimple *use_stmt;
+-  stmt_vec_info reduction_phi_info = NULL;
+   bool nested_in_vect_loop = false;
+   auto_vec<gimple *> new_phis;
+-  auto_vec<stmt_vec_info> inner_phis;
+   int j, i;
+   auto_vec<tree> scalar_results;
+-  unsigned int group_size = 1, k, ratio;
+-  auto_vec<tree> vec_initial_defs;
++  unsigned int group_size = 1, k;
+   auto_vec<gimple *> phis;
+   bool slp_reduc = false;
+   bool direct_slp_reduc;
+   tree new_phi_result;
+-  stmt_vec_info inner_phi = NULL;
+   tree induction_index = NULL_TREE;
+ 
+   if (slp_node)
+@@ -4439,127 +4416,53 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       nested_in_vect_loop = true;
+       gcc_assert (!slp_node);
+     }
++  gcc_assert (!nested_in_vect_loop || double_reduc);
+ 
+-  vectype = STMT_VINFO_VECTYPE (stmt_info);
++  vectype = STMT_VINFO_REDUC_VECTYPE (reduc_info);
+   gcc_assert (vectype);
+   mode = TYPE_MODE (vectype);
+ 
+-  /* 1. Create the reduction def-use cycle:
+-     Set the arguments of REDUCTION_PHIS, i.e., transform
+-
+-        loop:
+-          vec_def = phi <null, null>            # REDUCTION_PHI
+-          VECT_DEF = vector_stmt                # vectorized form of STMT
+-          ...
+-
+-     into:
+-
+-        loop:
+-          vec_def = phi <vec_init, VECT_DEF>    # REDUCTION_PHI
+-          VECT_DEF = vector_stmt                # vectorized form of STMT
+-          ...
+-
+-     (in case of SLP, do it for all the phis). */
+-
+-  /* Get the loop-entry arguments.  */
+-  enum vect_def_type initial_def_dt = vect_unknown_def_type;
++  tree initial_def = NULL;
++  tree induc_val = NULL_TREE;
++  tree adjustment_def = NULL;
+   if (slp_node)
+-    {
+-      unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+-      vec_initial_defs.reserve (vec_num);
+-      get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
+-				      &vec_initial_defs, vec_num,
+-				      REDUC_GROUP_FIRST_ELEMENT (stmt_info),
+-				      neutral_op);
+-    }
++    ;
+   else
+     {
+       /* Get at the scalar def before the loop, that defines the initial value
+ 	 of the reduction variable.  */
+       initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
+ 					   loop_preheader_edge (loop));
+-      /* Optimize: if initial_def is for REDUC_MAX smaller than the base
+-	 and we can't use zero for induc_val, use initial_def.  Similarly
+-	 for REDUC_MIN and initial_def larger than the base.  */
+-      if (TREE_CODE (initial_def) == INTEGER_CST
+-	  && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-	      == INTEGER_INDUC_COND_REDUCTION)
+-	  && !integer_zerop (induc_val)
+-	  && ((induc_code == MAX_EXPR
+-	       && tree_int_cst_lt (initial_def, induc_val))
+-	      || (induc_code == MIN_EXPR
+-		  && tree_int_cst_lt (induc_val, initial_def))))
+-	induc_val = initial_def;
+-
+-      if (double_reduc)
+-	/* In case of double reduction we only create a vector variable
+-	   to be put in the reduction phi node.  The actual statement
+-	   creation is done later in this function.  */
+-	vec_initial_def = vect_create_destination_var (initial_def, vectype);
++      /* Optimize: for induction condition reduction, if we can't use zero
++         for induc_val, use initial_def.  */
++      if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
++	induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
++      else if (double_reduc)
++	;
+       else if (nested_in_vect_loop)
+-	{
+-	  /* Do not use an adjustment def as that case is not supported
+-	     correctly if ncopies is not one.  */
+-	  vect_is_simple_use (initial_def, loop_vinfo, &initial_def_dt);
+-	  vec_initial_def = vect_get_vec_def_for_operand (initial_def,
+-							  stmt_info);
+-	}
++	;
+       else
+-	vec_initial_def
+-	  = get_initial_def_for_reduction (stmt_info, initial_def,
+-					   &adjustment_def);
+-      vec_initial_defs.create (1);
+-      vec_initial_defs.quick_push (vec_initial_def);
++	adjustment_def = STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info);
+     }
+ 
+-  /* Set phi nodes arguments.  */
+-  FOR_EACH_VEC_ELT (reduction_phis, i, phi_info)
++  unsigned vec_num;
++  int ncopies;
++  if (slp_node)
+     {
+-      tree vec_init_def = vec_initial_defs[i];
+-      tree def = vect_defs[i];
+-      for (j = 0; j < ncopies; j++)
+-        {
+-	  if (j != 0)
+-	    {
+-	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
+-	      if (nested_in_vect_loop)
+-		vec_init_def
+-		  = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_init_def);
+-	    }
+-
+-	  /* Set the loop-entry arg of the reduction-phi.  */
+-
+-	  gphi *phi = as_a <gphi *> (phi_info->stmt);
+-	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-	      == INTEGER_INDUC_COND_REDUCTION)
+-	    {
+-	      /* Initialise the reduction phi to zero.  This prevents initial
+-		 values of non-zero interferring with the reduction op.  */
+-	      gcc_assert (ncopies == 1);
+-	      gcc_assert (i == 0);
+-
+-	      tree vec_init_def_type = TREE_TYPE (vec_init_def);
+-	      tree induc_val_vec
+-		= build_vector_from_val (vec_init_def_type, induc_val);
+-
+-	      add_phi_arg (phi, induc_val_vec, loop_preheader_edge (loop),
+-			   UNKNOWN_LOCATION);
+-	    }
+-	  else
+-	    add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
+-			 UNKNOWN_LOCATION);
+-
+-          /* Set the loop-latch arg for the reduction-phi.  */
+-          if (j > 0)
+-	    def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
+-
+-	  add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
+-
+-          if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_NOTE, vect_location,
+-			     "transform reduction: created def-use cycle: %G%G",
+-			     phi, SSA_NAME_DEF_STMT (def));
+-        }
++      vec_num = SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis).length ();
++      ncopies = 1;
++    }
++  else
++    {
++      vec_num = 1;
++      ncopies = 0;
++      phi_info = STMT_VINFO_VEC_STMT (loop_vinfo->lookup_stmt (reduc_def_stmt));
++      do
++	{
++	  ncopies++;
++	  phi_info = STMT_VINFO_RELATED_STMT (phi_info);
++	}
++      while (phi_info);
+     }
+ 
+   /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
+@@ -4569,7 +4472,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+      The first match will be a 1 to allow 0 to be used for non-matching
+      indexes.  If there are no matches at all then the vector will be all
+      zeroes.  */
+-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
++  if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION)
+     {
+       tree indx_before_incr, indx_after_incr;
+       poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
+@@ -4627,11 +4530,17 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       tree ccompare = unshare_expr (gimple_assign_rhs1 (vec_stmt));
+ 
+       /* Create a conditional, where the condition is taken from vec_stmt
+-	 (CCOMPARE), then is the induction index (INDEX_BEFORE_INCR) and
+-	 else is the phi (NEW_PHI_TREE).  */
+-      tree index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
+-				     ccompare, indx_before_incr,
+-				     new_phi_tree);
++	 (CCOMPARE).  The then and else values mirror the main VEC_COND_EXPR:
++	 the reduction phi corresponds to NEW_PHI_TREE and the new values
++	 correspond to INDEX_BEFORE_INCR.  */
++      gcc_assert (STMT_VINFO_REDUC_IDX (stmt_info) >= 1);
++      tree index_cond_expr;
++      if (STMT_VINFO_REDUC_IDX (stmt_info) == 2)
++	index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
++				  ccompare, indx_before_incr, new_phi_tree);
++      else
++	index_cond_expr = build3 (VEC_COND_EXPR, cr_index_vector_type,
++				  ccompare, new_phi_tree, indx_before_incr);
+       induction_index = make_ssa_name (cr_index_vector_type);
+       gimple *index_condition = gimple_build_assign (induction_index,
+ 						     index_cond_expr);
+@@ -4674,12 +4583,17 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+   /* 2.1 Create new loop-exit-phis to preserve loop-closed form:
+          v_out1 = phi <VECT_DEF> 
+          Store them in NEW_PHIS.  */
+-
++  if (double_reduc)
++    loop = outer_loop;
+   exit_bb = single_exit (loop)->dest;
+   prev_phi_info = NULL;
+-  new_phis.create (vect_defs.length ());
+-  FOR_EACH_VEC_ELT (vect_defs, i, def)
++  new_phis.create (slp_node ? vec_num : ncopies);
++  for (unsigned i = 0; i < vec_num; i++)
+     {
++      if (slp_node)
++	def = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt);
++      else
++	def = gimple_get_lhs (STMT_VINFO_VEC_STMT (rdef_info)->stmt);
+       for (j = 0; j < ncopies; j++)
+         {
+ 	  tree new_def = copy_ssa_name (def);
+@@ -4698,37 +4612,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+         }
+     }
+ 
+-  /* The epilogue is created for the outer-loop, i.e., for the loop being
+-     vectorized.  Create exit phis for the outer loop.  */
+-  if (double_reduc)
+-    {
+-      loop = outer_loop;
+-      exit_bb = single_exit (loop)->dest;
+-      inner_phis.create (vect_defs.length ());
+-      FOR_EACH_VEC_ELT (new_phis, i, phi)
+-	{
+-	  stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
+-	  tree new_result = copy_ssa_name (PHI_RESULT (phi));
+-	  gphi *outer_phi = create_phi_node (new_result, exit_bb);
+-	  SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+-			   PHI_RESULT (phi));
+-	  prev_phi_info = loop_vinfo->add_stmt (outer_phi);
+-	  inner_phis.quick_push (phi_info);
+-	  new_phis[i] = outer_phi;
+-	  while (STMT_VINFO_RELATED_STMT (phi_info))
+-            {
+-	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
+-	      new_result = copy_ssa_name (PHI_RESULT (phi_info->stmt));
+-	      outer_phi = create_phi_node (new_result, exit_bb);
+-	      SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
+-			       PHI_RESULT (phi_info->stmt));
+-	      stmt_vec_info outer_phi_info = loop_vinfo->add_stmt (outer_phi);
+-	      STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi_info;
+-	      prev_phi_info = outer_phi_info;
+-	    }
+-	}
+-    }
+-
+   exit_gsi = gsi_after_labels (exit_bb);
+ 
+   /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
+@@ -4747,12 +4630,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
+       gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
+     }
+-
+-  code = gimple_assign_rhs_code (orig_stmt_info->stmt);
+-  /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
+-     partial results are added and not subtracted.  */
+-  if (code == MINUS_EXPR) 
+-    code = PLUS_EXPR;
+   
+   scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt);
+   scalar_type = TREE_TYPE (scalar_dest);
+@@ -4760,15 +4637,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+   new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
+   bitsize = TYPE_SIZE (scalar_type);
+ 
+-  /* In case this is a reduction in an inner-loop while vectorizing an outer
+-     loop - we don't need to extract a single scalar result at the end of the
+-     inner-loop (unless it is double reduction, i.e., the use of reduction is
+-     outside the outer-loop).  The final vector of partial results will be used
+-     in the vectorized outer-loop, or reduced to a scalar result at the end of
+-     the outer-loop.  */
+-  if (nested_in_vect_loop && !double_reduc)
+-    goto vect_finalize_reduction;
+-
+   /* SLP reduction without reduction chain, e.g.,
+      # a1 = phi <a2, a0>
+      # b1 = phi <b2, b0>
+@@ -4791,53 +4659,48 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+      one vector.  */
+   if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
+     {
++      gimple_seq stmts = NULL;
+       tree first_vect = PHI_RESULT (new_phis[0]);
+-      gassign *new_vec_stmt = NULL;
+-      vec_dest = vect_create_destination_var (scalar_dest, vectype);
++      first_vect = gimple_convert (&stmts, vectype, first_vect);
+       for (k = 1; k < new_phis.length (); k++)
+         {
+ 	  gimple *next_phi = new_phis[k];
+           tree second_vect = PHI_RESULT (next_phi);
+-          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
+-          new_vec_stmt = gimple_build_assign (tem, code,
+-					      first_vect, second_vect);
+-          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
+-	  first_vect = tem;
++	  second_vect = gimple_convert (&stmts, vectype, second_vect);
++          first_vect = gimple_build (&stmts, code, vectype,
++				     first_vect, second_vect);
+         }
++      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ 
+       new_phi_result = first_vect;
+-      if (new_vec_stmt)
+-        {
+-          new_phis.truncate (0);
+-          new_phis.safe_push (new_vec_stmt);
+-        }
++      new_phis.truncate (0);
++      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
+     }
+   /* Likewise if we couldn't use a single defuse cycle.  */
+   else if (ncopies > 1)
+     {
+       gcc_assert (new_phis.length () == 1);
++      gimple_seq stmts = NULL;
+       tree first_vect = PHI_RESULT (new_phis[0]);
+-      gassign *new_vec_stmt = NULL;
+-      vec_dest = vect_create_destination_var (scalar_dest, vectype);
++      first_vect = gimple_convert (&stmts, vectype, first_vect);
+       stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
+       for (int k = 1; k < ncopies; ++k)
+ 	{
+ 	  next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
+ 	  tree second_vect = PHI_RESULT (next_phi_info->stmt);
+-          tree tem = make_ssa_name (vec_dest, new_vec_stmt);
+-          new_vec_stmt = gimple_build_assign (tem, code,
+-					      first_vect, second_vect);
+-          gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
+-	  first_vect = tem;
++	  second_vect = gimple_convert (&stmts, vectype, second_vect);
++	  first_vect = gimple_build (&stmts, code, vectype,
++				     first_vect, second_vect);
+ 	}
++      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+       new_phi_result = first_vect;
+       new_phis.truncate (0);
+-      new_phis.safe_push (new_vec_stmt);
++      new_phis.safe_push (SSA_NAME_DEF_STMT (first_vect));
+     }
+   else
+     new_phi_result = PHI_RESULT (new_phis[0]);
+ 
+-  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
++  if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
+       && reduc_fn != IFN_LAST)
+     {
+       /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
+@@ -4852,8 +4715,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       tree index_vec_type = TREE_TYPE (induction_index);
+       gcc_checking_assert (TYPE_UNSIGNED (index_vec_type));
+       tree index_scalar_type = TREE_TYPE (index_vec_type);
+-      tree index_vec_cmp_type = build_same_sized_truth_vector_type
+-	(index_vec_type);
++      tree index_vec_cmp_type = truth_type_for (index_vec_type);
+ 
+       /* Get an unsigned integer version of the type of the data vector.  */
+       int scalar_precision
+@@ -4946,7 +4808,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+       scalar_results.safe_push (new_temp);
+     }
+-  else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
++  else if (STMT_VINFO_REDUC_TYPE (reduc_info) == COND_REDUCTION
+ 	   && reduc_fn == IFN_LAST)
+     {
+       /* Condition reduction without supported IFN_REDUC_MAX.  Generate
+@@ -4989,7 +4851,6 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 	  if (off != 0)
+ 	    {
+ 	      tree new_idx_val = idx_val;
+-	      tree new_val = val;
+ 	      if (off != v_size - el_size)
+ 		{
+ 		  new_idx_val = make_ssa_name (idx_eltype);
+@@ -4998,7 +4859,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 						     old_idx_val);
+ 		  gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ 		}
+-	      new_val = make_ssa_name (data_eltype);
++	      tree new_val = make_ssa_name (data_eltype);
+ 	      epilog_stmt = gimple_build_assign (new_val,
+ 						 COND_EXPR,
+ 						 build2 (GT_EXPR,
+@@ -5060,9 +4921,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       gimple_set_lhs (epilog_stmt, new_temp);
+       gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ 
+-      if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-	   == INTEGER_INDUC_COND_REDUCTION)
+-	  && !operand_equal_p (initial_def, induc_val, 0))
++      if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
++	  && induc_val)
+ 	{
+ 	  /* Earlier we set the initial value to be a vector if induc_val
+ 	     values.  Check the result and if it is induc_val then replace
+@@ -5100,7 +4960,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       tree index = build_index_vector (vectype, 0, 1);
+       tree index_type = TREE_TYPE (index);
+       tree index_elt_type = TREE_TYPE (index_type);
+-      tree mask_type = build_same_sized_truth_vector_type (index_type);
++      tree mask_type = truth_type_for (index_type);
+ 
+       /* Create a vector that, for each element, identifies which of
+ 	 the REDUC_GROUP_SIZE results should use it.  */
+@@ -5112,6 +4972,14 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 	 scalar value if we have one, otherwise the initial scalar value
+ 	 is itself a neutral value.  */
+       tree vector_identity = NULL_TREE;
++      tree neutral_op = NULL_TREE;
++      if (slp_node)
++	{
++	  stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
++	  neutral_op
++	    = neutral_op_for_slp_reduction (slp_node_instance->reduc_phis,
++					    vectype, code, first != NULL);
++	}
+       if (neutral_op)
+ 	vector_identity = gimple_build_vector_from_val (&seq, vectype,
+ 							neutral_op);
+@@ -5161,32 +5029,19 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       bool reduce_with_shift;
+       tree vec_temp;
+ 
+-      /* COND reductions all do the final reduction with MAX_EXPR
+-	 or MIN_EXPR.  */
+-      if (code == COND_EXPR)
+-	{
+-	  if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-	      == INTEGER_INDUC_COND_REDUCTION)
+-	    code = induc_code;
+-	  else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-		   == CONST_COND_REDUCTION)
+-	    code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
+-	  else
+-	    code = MAX_EXPR;
+-	}
+-
+       /* See if the target wants to do the final (shift) reduction
+ 	 in a vector mode of smaller size and first reduce upper/lower
+ 	 halves against each other.  */
+       enum machine_mode mode1 = mode;
+-      tree vectype1 = vectype;
+-      unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
+-      unsigned sz1 = sz;
++      tree stype = TREE_TYPE (vectype);
++      unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
++      unsigned nunits1 = nunits;
+       if (!slp_reduc
+ 	  && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
+-	sz1 = GET_MODE_SIZE (mode1).to_constant ();
++	nunits1 = GET_MODE_NUNITS (mode1).to_constant ();
+ 
+-      vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
++      tree vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
++							   stype, nunits1);
+       reduce_with_shift = have_whole_vector_shift (mode1);
+       if (!VECTOR_MODE_P (mode1))
+ 	reduce_with_shift = false;
+@@ -5200,11 +5055,13 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+       /* First reduce the vector to the desired vector size we should
+ 	 do shift reduction on by combining upper and lower halves.  */
+       new_temp = new_phi_result;
+-      while (sz > sz1)
++      while (nunits > nunits1)
+ 	{
+ 	  gcc_assert (!slp_reduc);
+-	  sz /= 2;
+-	  vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
++	  nunits /= 2;
++	  vectype1 = get_related_vectype_for_scalar_type (TYPE_MODE (vectype),
++							  stype, nunits);
++	  unsigned int bitsize = tree_to_uhwi (TYPE_SIZE (vectype1));
+ 
+ 	  /* The target has to make sure we support lowpart/highpart
+ 	     extraction, either via direct vector extract or through
+@@ -5229,15 +5086,14 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 		  = gimple_build_assign (dst2, BIT_FIELD_REF,
+ 					 build3 (BIT_FIELD_REF, vectype1,
+ 						 new_temp, TYPE_SIZE (vectype1),
+-						 bitsize_int (sz * BITS_PER_UNIT)));
++						 bitsize_int (bitsize)));
+ 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ 	    }
+ 	  else
+ 	    {
+ 	      /* Extract via punning to appropriately sized integer mode
+ 		 vector.  */
+-	      tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
+-							    1);
++	      tree eltype = build_nonstandard_integer_type (bitsize, 1);
+ 	      tree etype = build_vector_type (eltype, 2);
+ 	      gcc_assert (convert_optab_handler (vec_extract_optab,
+ 						 TYPE_MODE (etype),
+@@ -5266,7 +5122,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 		  = gimple_build_assign (tem, BIT_FIELD_REF,
+ 					 build3 (BIT_FIELD_REF, eltype,
+ 						 new_temp, TYPE_SIZE (eltype),
+-						 bitsize_int (sz * BITS_PER_UNIT)));
++						 bitsize_int (bitsize)));
+ 	      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+ 	      dst2 =  make_ssa_name (vectype1);
+ 	      epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
+@@ -5307,8 +5163,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+             dump_printf_loc (MSG_NOTE, vect_location,
+ 			     "Reduce using vector shifts\n");
+ 
+-	  mode1 = TYPE_MODE (vectype1);
+-          vec_dest = vect_create_destination_var (scalar_dest, vectype1);
++	  gimple_seq stmts = NULL;
++	  new_temp = gimple_convert (&stmts, vectype1, new_temp);
+           for (elt_offset = nelements / 2;
+                elt_offset >= 1;
+                elt_offset /= 2)
+@@ -5316,18 +5172,12 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 	      calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
+ 	      indices.new_vector (sel, 2, nelements);
+ 	      tree mask = vect_gen_perm_mask_any (vectype1, indices);
+-	      epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
+-						 new_temp, zero_vec, mask);
+-              new_name = make_ssa_name (vec_dest, epilog_stmt);
+-              gimple_assign_set_lhs (epilog_stmt, new_name);
+-              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
+-
+-	      epilog_stmt = gimple_build_assign (vec_dest, code, new_name,
+-						 new_temp);
+-              new_temp = make_ssa_name (vec_dest, epilog_stmt);
+-              gimple_assign_set_lhs (epilog_stmt, new_temp);
+-              gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
++	      new_name = gimple_build (&stmts, VEC_PERM_EXPR, vectype1,
++				       new_temp, zero_vec, mask);
++	      new_temp = gimple_build (&stmts, code,
++				       vectype1, new_name, new_temp);
+             }
++	  gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+ 
+ 	  /* 2.4  Extract the final scalar result.  Create:
+ 	     s_out3 = extract_field <v_out2, bitpos>  */
+@@ -5439,9 +5289,8 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+             scalar_results.safe_push (new_temp);
+         }
+ 
+-      if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-	   == INTEGER_INDUC_COND_REDUCTION)
+-	  && !operand_equal_p (initial_def, induc_val, 0))
++      if ((STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
++	  && induc_val)
+ 	{
+ 	  /* Earlier we set the initial value to be a vector if induc_val
+ 	     values.  Check the result and if it is induc_val then replace
+@@ -5457,12 +5306,7 @@ vect_create_epilog_for_reduction (vec<tree> vect_defs,
+ 	  scalar_results[0] = tmp;
+ 	}
+     }
+-  
+-vect_finalize_reduction:
+-
+-  if (double_reduc)
+-    loop = loop->inner;
+-
++ 
+   /* 2.5 Adjust the final result by the initial value of the reduction
+ 	 variable. (When such adjustment is not needed, then
+ 	 'adjustment_def' is zero).  For example, if code is PLUS we create:
+@@ -5471,25 +5315,26 @@ vect_finalize_reduction:
+   if (adjustment_def)
+     {
+       gcc_assert (!slp_reduc);
++      gimple_seq stmts = NULL;
+       if (nested_in_vect_loop)
+ 	{
+           new_phi = new_phis[0];
+-	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) == VECTOR_TYPE);
+-	  expr = build2 (code, vectype, PHI_RESULT (new_phi), adjustment_def);
+-	  new_dest = vect_create_destination_var (scalar_dest, vectype);
++	  gcc_assert (VECTOR_TYPE_P (TREE_TYPE (adjustment_def)));
++	  adjustment_def = gimple_convert (&stmts, vectype, adjustment_def);
++	  new_temp = gimple_build (&stmts, code, vectype,
++				   PHI_RESULT (new_phi), adjustment_def);
+ 	}
+       else
+ 	{
+           new_temp = scalar_results[0];
+ 	  gcc_assert (TREE_CODE (TREE_TYPE (adjustment_def)) != VECTOR_TYPE);
+-	  expr = build2 (code, scalar_type, new_temp, adjustment_def);
+-	  new_dest = vect_create_destination_var (scalar_dest, scalar_type);
++	  adjustment_def = gimple_convert (&stmts, scalar_type, adjustment_def);
++	  new_temp = gimple_build (&stmts, code, scalar_type,
++				   new_temp, adjustment_def);
+ 	}
+ 
+-      epilog_stmt = gimple_build_assign (new_dest, expr);
+-      new_temp = make_ssa_name (new_dest, epilog_stmt);
+-      gimple_assign_set_lhs (epilog_stmt, new_temp);
+-      gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
++      epilog_stmt = gimple_seq_last_stmt (stmts);
++      gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
+       if (nested_in_vect_loop)
+         {
+ 	  stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt);
+@@ -5507,6 +5352,9 @@ vect_finalize_reduction:
+       new_phis[0] = epilog_stmt;
+     }
+ 
++  if (double_reduc)
++    loop = loop->inner;
++
+   /* 2.6  Handle the loop-exit phis.  Replace the uses of scalar loop-exit
+           phis with new adjusted scalar results, i.e., replace use <s_out0>
+           with use <s_out4>.        
+@@ -5552,24 +5400,10 @@ vect_finalize_reduction:
+      correspond to the first vector stmt, etc.
+      (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)).  */
+   if (group_size > new_phis.length ())
+-    {
+-      ratio = group_size / new_phis.length ();
+-      gcc_assert (!(group_size % new_phis.length ()));
+-    }
+-  else
+-    ratio = 1;
++    gcc_assert (!(group_size % new_phis.length ()));
+ 
+-  stmt_vec_info epilog_stmt_info = NULL;
+   for (k = 0; k < group_size; k++)
+     {
+-      if (k % ratio == 0)
+-        {
+-	  epilog_stmt_info = loop_vinfo->lookup_stmt (new_phis[k / ratio]);
+-	  reduction_phi_info = reduction_phis[k / ratio];
+-	  if (double_reduc)
+-	    inner_phi = inner_phis[k / ratio];
+-        }
+-
+       if (slp_reduc)
+         {
+ 	  stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
+@@ -5580,121 +5414,12 @@ vect_finalize_reduction:
+ 	  scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
+         }
+ 
+-      phis.create (3);
+-      /* Find the loop-closed-use at the loop exit of the original scalar
+-         result.  (The reduction result is expected to have two immediate uses -
+-         one at the latch block, and one at the loop exit).  */
+-      FOR_EACH_IMM_USE_FAST (use_p, imm_iter, scalar_dest)
+-        if (!flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p)))
+-	    && !is_gimple_debug (USE_STMT (use_p)))
+-          phis.safe_push (USE_STMT (use_p));
+-
+-      /* While we expect to have found an exit_phi because of loop-closed-ssa
+-         form we can end up without one if the scalar cycle is dead.  */
+-
+-      FOR_EACH_VEC_ELT (phis, i, exit_phi)
+-        {
+-          if (outer_loop)
+-            {
+-	      stmt_vec_info exit_phi_vinfo
+-		= loop_vinfo->lookup_stmt (exit_phi);
+-              gphi *vect_phi;
+-
+-	      if (double_reduc)
+-		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
+-	      else
+-		STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt_info;
+-              if (!double_reduc
+-                  || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
+-                      != vect_double_reduction_def)
+-                continue;
+-
+-              /* Handle double reduction:
+-
+-                 stmt1: s1 = phi <s0, s2>  - double reduction phi (outer loop)
+-                 stmt2:   s3 = phi <s1, s4> - (regular) reduc phi (inner loop)
+-                 stmt3:   s4 = use (s3)     - (regular) reduc stmt (inner loop)
+-                 stmt4: s2 = phi <s4>      - double reduction stmt (outer loop)
+-
+-                 At that point the regular reduction (stmt2 and stmt3) is
+-                 already vectorized, as well as the exit phi node, stmt4.
+-                 Here we vectorize the phi node of double reduction, stmt1, and
+-                 update all relevant statements.  */
+-
+-              /* Go through all the uses of s2 to find double reduction phi
+-                 node, i.e., stmt1 above.  */
+-              orig_name = PHI_RESULT (exit_phi);
+-              FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
+-                {
+-                  stmt_vec_info use_stmt_vinfo;
+-                  tree vect_phi_init, preheader_arg, vect_phi_res;
+-                  basic_block bb = gimple_bb (use_stmt);
+-
+-                  /* Check that USE_STMT is really double reduction phi
+-                     node.  */
+-                  if (gimple_code (use_stmt) != GIMPLE_PHI
+-                      || gimple_phi_num_args (use_stmt) != 2
+-                      || bb->loop_father != outer_loop)
+-                    continue;
+-		  use_stmt_vinfo = loop_vinfo->lookup_stmt (use_stmt);
+-                  if (!use_stmt_vinfo
+-                      || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
+-                          != vect_double_reduction_def)
+-		    continue;
+-
+-                  /* Create vector phi node for double reduction:
+-                     vs1 = phi <vs0, vs2>
+-                     vs1 was created previously in this function by a call to
+-                       vect_get_vec_def_for_operand and is stored in
+-                       vec_initial_def;
+-                     vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
+-                     vs0 is created here.  */
+-
+-                  /* Create vector phi node.  */
+-                  vect_phi = create_phi_node (vec_initial_def, bb);
+-		  loop_vec_info_for_loop (outer_loop)->add_stmt (vect_phi);
+-
+-                  /* Create vs0 - initial def of the double reduction phi.  */
+-                  preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
+-                                             loop_preheader_edge (outer_loop));
+-                  vect_phi_init = get_initial_def_for_reduction
+-		    (stmt_info, preheader_arg, NULL);
+-
+-                  /* Update phi node arguments with vs0 and vs2.  */
+-                  add_phi_arg (vect_phi, vect_phi_init,
+-                               loop_preheader_edge (outer_loop),
+-                               UNKNOWN_LOCATION);
+-		  add_phi_arg (vect_phi, PHI_RESULT (inner_phi->stmt),
+-			       loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
+-                  if (dump_enabled_p ())
+-		    dump_printf_loc (MSG_NOTE, vect_location,
+-				     "created double reduction phi node: %G",
+-				     vect_phi);
+-
+-                  vect_phi_res = PHI_RESULT (vect_phi);
+-
+-                  /* Replace the use, i.e., set the correct vs1 in the regular
+-                     reduction phi node.  FORNOW, NCOPIES is always 1, so the
+-                     loop is redundant.  */
+-		  stmt_vec_info use_info = reduction_phi_info;
+-		  for (j = 0; j < ncopies; j++)
+-		    {
+-		      edge pr_edge = loop_preheader_edge (loop);
+-		      SET_PHI_ARG_DEF (as_a <gphi *> (use_info->stmt),
+-				       pr_edge->dest_idx, vect_phi_res);
+-		      use_info = STMT_VINFO_RELATED_STMT (use_info);
+-		    }
+-                }
+-            }
+-        }
+-
+-      phis.release ();
+       if (nested_in_vect_loop)
+         {
+           if (double_reduc)
+             loop = outer_loop;
+           else
+-            continue;
++	    gcc_unreachable ();
+         }
+ 
+       phis.create (3);
+@@ -5824,9 +5549,6 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
+   gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
+   gcc_assert (ncopies == 1);
+   gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
+-  gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
+-  gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-	      == FOLD_LEFT_REDUCTION);
+ 
+   if (slp_node)
+     gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
+@@ -5840,10 +5562,7 @@ vectorize_fold_left_reduction (stmt_vec_info stmt_info,
+   if (slp_node)
+     {
+       auto_vec<vec<tree> > vec_defs (2);
+-      auto_vec<tree> sops(2);
+-      sops.quick_push (ops[0]);
+-      sops.quick_push (ops[1]);
+-      vect_get_slp_defs (sops, slp_node, &vec_defs);
++      vect_get_slp_defs (slp_node, &vec_defs);
+       vec_oprnds0.safe_splice (vec_defs[1 - reduc_index]);
+       vec_defs[0].release ();
+       vec_defs[1].release ();
+@@ -5984,6 +5703,55 @@ is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
+ 	  <= TYPE_PRECISION (lhs_type));
+ }
+ 
++/* Check if masking can be supported by inserting a conditional expression.
++   CODE is the code for the operation.  COND_FN is the conditional internal
++   function, if it exists.  VECTYPE_IN is the type of the vector input.  */
++static bool
++use_mask_by_cond_expr_p (enum tree_code code, internal_fn cond_fn,
++			 tree vectype_in)
++{
++  if (cond_fn != IFN_LAST
++      && direct_internal_fn_supported_p (cond_fn, vectype_in,
++					 OPTIMIZE_FOR_SPEED))
++    return false;
++
++  switch (code)
++    {
++    case DOT_PROD_EXPR:
++      return true;
++
++    default:
++      return false;
++    }
++}
++
++/* Insert a conditional expression to enable masked vectorization.  CODE is the
++   code for the operation.  VOP is the array of operands.  MASK is the loop
++   mask.  GSI is a statement iterator used to place the new conditional
++   expression.  */
++static void
++build_vect_cond_expr (enum tree_code code, tree vop[3], tree mask,
++		      gimple_stmt_iterator *gsi)
++{
++  switch (code)
++    {
++    case DOT_PROD_EXPR:
++      {
++	tree vectype = TREE_TYPE (vop[1]);
++	tree zero = build_zero_cst (vectype);
++	tree masked_op1 = make_temp_ssa_name (vectype, NULL, "masked_op1");
++	gassign *select = gimple_build_assign (masked_op1, VEC_COND_EXPR,
++					       mask, vop[1], zero);
++	gsi_insert_before (gsi, select, GSI_SAME_STMT);
++	vop[1] = masked_op1;
++	break;
++      }
++
++    default:
++      gcc_unreachable ();
++    }
++}
++
+ /* Function vectorizable_reduction.
+ 
+    Check if STMT_INFO performs a reduction operation that can be vectorized.
+@@ -6027,182 +5795,163 @@ is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
+    corresponds to the type of arguments to the reduction stmt, and should *NOT*
+    be used to create the vectorized stmt.  The right vectype for the vectorized
+    stmt is obtained from the type of the result X:
+-        get_vectype_for_scalar_type (TREE_TYPE (X))
++      get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
+ 
+    This means that, contrary to "regular" reductions (or "regular" stmts in
+    general), the following equation:
+-      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
++      STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (vinfo, TREE_TYPE (X))
+    does *NOT* necessarily hold for reduction patterns.  */
+ 
+ bool
+-vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+-			stmt_vec_info *vec_stmt, slp_tree slp_node,
++vectorizable_reduction (stmt_vec_info stmt_info, slp_tree slp_node,
+ 			slp_instance slp_node_instance,
+ 			stmt_vector_for_cost *cost_vec)
+ {
+-  tree vec_dest;
+   tree scalar_dest;
+-  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
+   tree vectype_in = NULL_TREE;
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+-  enum tree_code code, orig_code;
+-  internal_fn reduc_fn;
+-  machine_mode vec_mode;
+-  int op_type;
+-  optab optab;
+-  tree new_temp = NULL_TREE;
+-  enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
++  enum vect_def_type cond_reduc_dt = vect_unknown_def_type;
+   stmt_vec_info cond_stmt_vinfo = NULL;
+-  enum tree_code cond_reduc_op_code = ERROR_MARK;
+   tree scalar_type;
+-  bool is_simple_use;
+   int i;
+   int ncopies;
+-  int epilog_copies;
+-  stmt_vec_info prev_stmt_info, prev_phi_info;
+   bool single_defuse_cycle = false;
+-  stmt_vec_info new_stmt_info = NULL;
+-  int j;
+-  tree ops[3];
+-  enum vect_def_type dts[3];
+-  bool nested_cycle = false, found_nested_cycle_def = false;
++  bool nested_cycle = false;
+   bool double_reduc = false;
+-  basic_block def_bb;
+-  struct loop * def_stmt_loop;
+-  tree def_arg;
+-  auto_vec<tree> vec_oprnds0;
+-  auto_vec<tree> vec_oprnds1;
+-  auto_vec<tree> vec_oprnds2;
+-  auto_vec<tree> vect_defs;
+-  auto_vec<stmt_vec_info> phis;
+   int vec_num;
+-  tree def0, tem;
++  tree tem;
+   tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
+   tree cond_reduc_val = NULL_TREE;
+ 
+   /* Make sure it was already recognized as a reduction computation.  */
+   if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
++      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def
+       && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
+     return false;
+ 
+-  if (nested_in_vect_loop_p (loop, stmt_info))
++  /* The stmt we store reduction analysis meta on.  */
++  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
++  reduc_info->is_reduc_info = true;
++
++  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
+     {
+-      loop = loop->inner;
+-      nested_cycle = true;
++      if (is_a <gphi *> (stmt_info->stmt))
++	{
++	  /* Analysis for double-reduction is done on the outer
++	     loop PHI, nested cycles have no further restrictions.  */
++	  STMT_VINFO_TYPE (stmt_info) = cycle_phi_info_type;
++	  /* For nested cycles we want to let regular vectorizable_*
++	     routines handle code-generation.  */
++	  if (STMT_VINFO_DEF_TYPE (reduc_info) != vect_double_reduction_def)
++	    {
++	      stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
++	      STMT_VINFO_DEF_TYPE (stmt_info) = vect_internal_def;
++	      STMT_VINFO_DEF_TYPE (vect_stmt_to_vectorize (stmt_info))
++		= vect_internal_def;
++	    }
++	}
++      else
++	STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
++      return true;
+     }
+ 
+-  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+-    gcc_assert (slp_node
+-		&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
+-
+-  if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt))
++  stmt_vec_info orig_stmt_of_analysis = stmt_info;
++  stmt_vec_info phi_info = stmt_info;
++  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
++      || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
+     {
+-      tree phi_result = gimple_phi_result (phi);
+-      /* Analysis is fully done on the reduction stmt invocation.  */
+-      if (! vec_stmt)
++      if (!is_a <gphi *> (stmt_info->stmt))
+ 	{
+-	  if (slp_node)
+-	    slp_node_instance->reduc_phis = slp_node;
+-
+ 	  STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+ 	  return true;
+ 	}
+-
+-      if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
+-	/* Leave the scalar phi in place.  Note that checking
+-	   STMT_VINFO_VEC_REDUCTION_TYPE (as below) only works
+-	   for reductions involving a single statement.  */
+-	return true;
+-
+-      stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
+-      reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
+-
+-      if (STMT_VINFO_VEC_REDUCTION_TYPE (reduc_stmt_info)
+-	  == EXTRACT_LAST_REDUCTION)
+-	/* Leave the scalar phi in place.  */
+-	return true;
+-
+-      gassign *reduc_stmt = as_a <gassign *> (reduc_stmt_info->stmt);
+-      code = gimple_assign_rhs_code (reduc_stmt);
+-      for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
++      if (slp_node)
+ 	{
+-	  tree op = gimple_op (reduc_stmt, k);
+-	  if (op == phi_result)
+-	    continue;
+-	  if (k == 1 && code == COND_EXPR)
+-	    continue;
+-	  bool is_simple_use = vect_is_simple_use (op, loop_vinfo, &dt);
+-	  gcc_assert (is_simple_use);
+-	  if (dt == vect_constant_def || dt == vect_external_def)
+-	    continue;
+-	  if (!vectype_in
+-	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+-		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
+-	    vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
+-	  break;
++	  slp_node_instance->reduc_phis = slp_node;
++	  /* ???  We're leaving slp_node to point to the PHIs, we only
++	     need it to get at the number of vector stmts which wasn't
++	     yet initialized for the instance root.  */
+ 	}
+-      /* For a nested cycle we might end up with an operation like
+-         phi_result * phi_result.  */
+-      if (!vectype_in)
+-	vectype_in = STMT_VINFO_VECTYPE (stmt_info);
+-      gcc_assert (vectype_in);
+-
+-      if (slp_node)
+-	ncopies = 1;
+-      else
+-	ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
+-
+-      stmt_vec_info use_stmt_info;
+-      if (ncopies > 1
+-	  && STMT_VINFO_RELEVANT (reduc_stmt_info) <= vect_used_only_live
+-	  && (use_stmt_info = loop_vinfo->lookup_single_use (phi_result))
+-	  && vect_stmt_to_vectorize (use_stmt_info) == reduc_stmt_info)
+-	single_defuse_cycle = true;
+-
+-      /* Create the destination vector  */
+-      scalar_dest = gimple_assign_lhs (reduc_stmt);
+-      vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+-
+-      if (slp_node)
+-	/* The size vect_schedule_slp_instance computes is off for us.  */
+-	vec_num = vect_get_num_vectors
+-	  (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
+-	   * SLP_TREE_SCALAR_STMTS (slp_node).length (),
+-	   vectype_in);
+-      else
+-	vec_num = 1;
++      if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
++	stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (stmt_info));
++      else /* STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def */
++	{
++	  use_operand_p use_p;
++	  gimple *use_stmt;
++	  bool res = single_imm_use (gimple_phi_result (stmt_info->stmt),
++				     &use_p, &use_stmt);
++	  gcc_assert (res);
++	  phi_info = loop_vinfo->lookup_stmt (use_stmt);
++	  stmt_info = vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
++	}
++    }
+ 
+-      /* Generate the reduction PHIs upfront.  */
+-      prev_phi_info = NULL;
+-      for (j = 0; j < ncopies; j++)
++  /* PHIs should not participate in patterns.  */
++  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
++  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
++
++  /* Verify following REDUC_IDX from the latch def leads us back to the PHI
++     and compute the reduction chain length.  */
++  tree reduc_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
++					  loop_latch_edge (loop));
++  unsigned reduc_chain_length = 0;
++  bool only_slp_reduc_chain = true;
++  stmt_info = NULL;
++  while (reduc_def != PHI_RESULT (reduc_def_phi))
++    {
++      stmt_vec_info def = loop_vinfo->lookup_def (reduc_def);
++      stmt_vec_info vdef = vect_stmt_to_vectorize (def);
++      if (STMT_VINFO_REDUC_IDX (vdef) == -1)
+ 	{
+-	  if (j == 0 || !single_defuse_cycle)
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "reduction chain broken by patterns.\n");
++	  return false;
++	}
++      if (!REDUC_GROUP_FIRST_ELEMENT (vdef))
++	only_slp_reduc_chain = false;
++      /* ???  For epilogue generation live members of the chain need
++         to point back to the PHI via their original stmt for
++	 info_for_reduction to work.  */
++      if (STMT_VINFO_LIVE_P (vdef))
++	STMT_VINFO_REDUC_DEF (def) = phi_info;
++      if (CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (vdef->stmt)))
++	{
++	  if (!tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (vdef->stmt)),
++				      TREE_TYPE (gimple_assign_rhs1 (vdef->stmt))))
+ 	    {
+-	      for (i = 0; i < vec_num; i++)
+-		{
+-		  /* Create the reduction-phi that defines the reduction
+-		     operand.  */
+-		  gimple *new_phi = create_phi_node (vec_dest, loop->header);
+-		  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
+-
+-		  if (slp_node)
+-		    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
+-		  else
+-		    {
+-		      if (j == 0)
+-			STMT_VINFO_VEC_STMT (stmt_info)
+-			  = *vec_stmt = new_phi_info;
+-		      else
+-			STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
+-		      prev_phi_info = new_phi_info;
+-		    }
+-		}
++	      if (dump_enabled_p ())
++		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				 "conversion in the reduction chain.\n");
++	      return false;
+ 	    }
+ 	}
++      else if (!stmt_info)
++	/* First non-conversion stmt.  */
++	stmt_info = vdef;
++      reduc_def = gimple_op (vdef->stmt, 1 + STMT_VINFO_REDUC_IDX (vdef));
++      reduc_chain_length++;
++    }
++  /* PHIs should not participate in patterns.  */
++  gcc_assert (!STMT_VINFO_RELATED_STMT (phi_info));
+ 
+-      return true;
++  if (nested_in_vect_loop_p (loop, stmt_info))
++    {
++      loop = loop->inner;
++      nested_cycle = true;
++    }
++
++  /* STMT_VINFO_REDUC_DEF doesn't point to the first but the last
++     element.  */
++  if (slp_node && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
++    {
++      gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (stmt_info));
++      stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
+     }
++  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
++    gcc_assert (slp_node
++		&& REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
+ 
+   /* 1. Is vectorizable reduction?  */
+   /* Not supportable if the reduction variable is used in the loop, unless
+@@ -6235,37 +5984,13 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+         inside the loop body. The last operand is the reduction variable,
+         which is defined by the loop-header-phi.  */
+ 
++  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
++  STMT_VINFO_REDUC_VECTYPE (reduc_info) = vectype_out;
+   gassign *stmt = as_a <gassign *> (stmt_info->stmt);
+-
+-  /* Flatten RHS.  */
+-  switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
+-    {
+-    case GIMPLE_BINARY_RHS:
+-      code = gimple_assign_rhs_code (stmt);
+-      op_type = TREE_CODE_LENGTH (code);
+-      gcc_assert (op_type == binary_op);
+-      ops[0] = gimple_assign_rhs1 (stmt);
+-      ops[1] = gimple_assign_rhs2 (stmt);
+-      break;
+-
+-    case GIMPLE_TERNARY_RHS:
+-      code = gimple_assign_rhs_code (stmt);
+-      op_type = TREE_CODE_LENGTH (code);
+-      gcc_assert (op_type == ternary_op);
+-      ops[0] = gimple_assign_rhs1 (stmt);
+-      ops[1] = gimple_assign_rhs2 (stmt);
+-      ops[2] = gimple_assign_rhs3 (stmt);
+-      break;
+-
+-    case GIMPLE_UNARY_RHS:
+-      return false;
+-
+-    default:
+-      gcc_unreachable ();
+-    }
+-
+-  if (code == COND_EXPR && slp_node)
+-    return false;
++  enum tree_code code = gimple_assign_rhs_code (stmt);
++  bool lane_reduc_code_p
++    = (code == DOT_PROD_EXPR || code == WIDEN_SUM_EXPR || code == SAD_EXPR);
++  int op_type = TREE_CODE_LENGTH (code);
+ 
+   scalar_dest = gimple_assign_lhs (stmt);
+   scalar_type = TREE_TYPE (scalar_dest);
+@@ -6277,67 +6002,65 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   if (!type_has_mode_precision_p (scalar_type))
+     return false;
+ 
++  /* For lane-reducing ops we're reducing the number of reduction PHIs
++     which means the only use of that may be in the lane-reducing operation.  */
++  if (lane_reduc_code_p
++      && reduc_chain_length != 1
++      && !only_slp_reduc_chain)
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "lane-reducing reduction with extra stmts.\n");
++      return false;
++    }
++
+   /* All uses but the last are expected to be defined in the loop.
+      The last use is the reduction variable.  In case of nested cycle this
+      assumption is not true: we use reduc_index to record the index of the
+      reduction variable.  */
+-  stmt_vec_info reduc_def_info;
+-  if (orig_stmt_info)
+-    reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info);
+-  else
+-    reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info);
+-  gcc_assert (reduc_def_info);
+-  gphi *reduc_def_phi = as_a <gphi *> (reduc_def_info->stmt);
+-  tree reduc_def = PHI_RESULT (reduc_def_phi);
+-  int reduc_index = -1;
++  reduc_def = PHI_RESULT (reduc_def_phi);
+   for (i = 0; i < op_type; i++)
+     {
++      tree op = gimple_op (stmt, i + 1);
+       /* The condition of COND_EXPR is checked in vectorizable_condition().  */
+       if (i == 0 && code == COND_EXPR)
+         continue;
+ 
+       stmt_vec_info def_stmt_info;
+-      is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dts[i], &tem,
+-					  &def_stmt_info);
+-      dt = dts[i];
+-      gcc_assert (is_simple_use);
+-      if (dt == vect_reduction_def
+-	  && ops[i] == reduc_def)
+-	{
+-	  reduc_index = i;
+-	  continue;
+-	}
+-      else if (tem)
++      enum vect_def_type dt;
++      if (!vect_is_simple_use (op, loop_vinfo, &dt, &tem,
++			       &def_stmt_info))
+ 	{
+-	  /* To properly compute ncopies we are interested in the widest
+-	     input type in case we're looking at a widening accumulation.  */
+-	  if (!vectype_in
+-	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
+-		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
+-	    vectype_in = tem;
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "use not simple.\n");
++	  return false;
+ 	}
++      if (i == STMT_VINFO_REDUC_IDX (stmt_info))
++	continue;
+ 
+-      if (dt != vect_internal_def
+-	  && dt != vect_external_def
+-	  && dt != vect_constant_def
+-	  && dt != vect_induction_def
+-          && !(dt == vect_nested_cycle && nested_cycle))
++      /* There should be only one cycle def in the stmt, the one
++         leading to reduc_def.  */
++      if (VECTORIZABLE_CYCLE_DEF (dt))
+ 	return false;
+ 
+-      if (dt == vect_nested_cycle
+-	  && ops[i] == reduc_def)
+-	{
+-	  found_nested_cycle_def = true;
+-	  reduc_index = i;
+-	}
++      /* To properly compute ncopies we are interested in the widest
++	 non-reduction input type in case we're looking at a widening
++	 accumulation that we later handle in vect_transform_reduction.  */
++      if (lane_reduc_code_p
++	  && tem
++	  && (!vectype_in
++	      || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
++		  < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem))))))
++	vectype_in = tem;
+ 
+-      if (i == 1 && code == COND_EXPR)
++      if (code == COND_EXPR)
+ 	{
+-	  /* Record how value of COND_EXPR is defined.  */
++	  /* Record how the non-reduction-def value of COND_EXPR is defined.  */
+ 	  if (dt == vect_constant_def)
+ 	    {
+ 	      cond_reduc_dt = dt;
+-	      cond_reduc_val = ops[i];
++	      cond_reduc_val = op;
+ 	    }
+ 	  if (dt == vect_induction_def
+ 	      && def_stmt_info
+@@ -6348,93 +6071,35 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	    }
+ 	}
+     }
+-
+   if (!vectype_in)
+-    vectype_in = vectype_out;
+-
+-  /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
+-     directy used in stmt.  */
+-  if (reduc_index == -1)
+-    {
+-      if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
+-	{
+-	  if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "in-order reduction chain without SLP.\n");
+-	  return false;
+-	}
+-    }
+-
+-  if (!(reduc_index == -1
+-	|| dts[reduc_index] == vect_reduction_def
+-	|| dts[reduc_index] == vect_nested_cycle
+-	|| ((dts[reduc_index] == vect_internal_def
+-	     || dts[reduc_index] == vect_external_def
+-	     || dts[reduc_index] == vect_constant_def
+-	     || dts[reduc_index] == vect_induction_def)
+-	    && nested_cycle && found_nested_cycle_def)))
+-    {
+-      /* For pattern recognized stmts, orig_stmt might be a reduction,
+-	 but some helper statements for the pattern might not, or
+-	 might be COND_EXPRs with reduction uses in the condition.  */
+-      gcc_assert (orig_stmt_info);
+-      return false;
+-    }
+-
+-  /* PHIs should not participate in patterns.  */
+-  gcc_assert (!STMT_VINFO_RELATED_STMT (reduc_def_info));
+-  enum vect_reduction_type v_reduc_type
+-    = STMT_VINFO_REDUC_TYPE (reduc_def_info);
+-  stmt_vec_info tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
++    vectype_in = STMT_VINFO_VECTYPE (phi_info);
++  STMT_VINFO_REDUC_VECTYPE_IN (reduc_info) = vectype_in;
+ 
+-  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
++  enum vect_reduction_type v_reduc_type = STMT_VINFO_REDUC_TYPE (phi_info);
++  STMT_VINFO_REDUC_TYPE (reduc_info) = v_reduc_type;
+   /* If we have a condition reduction, see if we can simplify it further.  */
+   if (v_reduc_type == COND_REDUCTION)
+     {
+-      /* TODO: We can't yet handle reduction chains, since we need to treat
+-	 each COND_EXPR in the chain specially, not just the last one.
+-	 E.g. for:
+-
+-	    x_1 = PHI <x_3, ...>
+-	    x_2 = a_2 ? ... : x_1;
+-	    x_3 = a_3 ? ... : x_2;
++      if (slp_node)
++	return false;
+ 
+-	 we're interested in the last element in x_3 for which a_2 || a_3
+-	 is true, whereas the current reduction chain handling would
+-	 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
+-	 as a reduction operation.  */
+-      if (reduc_index == -1)
++      /* When the condition uses the reduction value in the condition, fail.  */
++      if (STMT_VINFO_REDUC_IDX (stmt_info) == 0)
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "conditional reduction chains not supported\n");
++			     "condition depends on previous iteration\n");
+ 	  return false;
+ 	}
+ 
+-      /* vect_is_simple_reduction ensured that operand 2 is the
+-	 loop-carried operand.  */
+-      gcc_assert (reduc_index == 2);
+-
+-      /* Loop peeling modifies initial value of reduction PHI, which
+-	 makes the reduction stmt to be transformed different to the
+-	 original stmt analyzed.  We need to record reduction code for
+-	 CONST_COND_REDUCTION type reduction at analyzing stage, thus
+-	 it can be used directly at transform stage.  */
+-      if (STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MAX_EXPR
+-	  || STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info) == MIN_EXPR)
+-	{
+-	  /* Also set the reduction type to CONST_COND_REDUCTION.  */
+-	  gcc_assert (cond_reduc_dt == vect_constant_def);
+-	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
+-	}
+-      else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
+-					       vectype_in, OPTIMIZE_FOR_SPEED))
++      if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
++					  vectype_in, OPTIMIZE_FOR_SPEED))
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			     "optimizing condition reduction with"
+ 			     " FOLD_EXTRACT_LAST.\n");
+-	  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
++	  STMT_VINFO_REDUC_TYPE (reduc_info) = EXTRACT_LAST_REDUCTION;
+ 	}
+       else if (cond_reduc_dt == vect_induction_def)
+ 	{
+@@ -6445,6 +6110,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  gcc_assert (TREE_CODE (base) == INTEGER_CST
+ 		      && TREE_CODE (step) == INTEGER_CST);
+ 	  cond_reduc_val = NULL_TREE;
++	  enum tree_code cond_reduc_op_code = ERROR_MARK;
+ 	  tree res = PHI_RESULT (STMT_VINFO_STMT (cond_stmt_vinfo));
+ 	  if (!types_compatible_p (TREE_TYPE (res), TREE_TYPE (base)))
+ 	    ;
+@@ -6477,16 +6143,17 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		dump_printf_loc (MSG_NOTE, vect_location,
+ 				 "condition expression based on "
+ 				 "integer induction.\n");
+-	      STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-		= INTEGER_INDUC_COND_REDUCTION;
++	      STMT_VINFO_REDUC_CODE (reduc_info) = cond_reduc_op_code;
++	      STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info)
++		= cond_reduc_val;
++	      STMT_VINFO_REDUC_TYPE (reduc_info) = INTEGER_INDUC_COND_REDUCTION;
+ 	    }
+ 	}
+       else if (cond_reduc_dt == vect_constant_def)
+ 	{
+ 	  enum vect_def_type cond_initial_dt;
+-	  gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
+ 	  tree cond_initial_val
+-	    = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
++	    = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, loop_preheader_edge (loop));
+ 
+ 	  gcc_assert (cond_reduc_val != NULL_TREE);
+ 	  vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt);
+@@ -6503,25 +6170,15 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 				     "condition expression based on "
+ 				     "compile time constant.\n");
+ 		  /* Record reduction code at analysis stage.  */
+-		  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info)
++		  STMT_VINFO_REDUC_CODE (reduc_info)
+ 		    = integer_onep (e) ? MAX_EXPR : MIN_EXPR;
+-		  STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
+-		    = CONST_COND_REDUCTION;
++		  STMT_VINFO_REDUC_TYPE (reduc_info) = CONST_COND_REDUCTION;
+ 		}
+ 	    }
+ 	}
+     }
+ 
+-  if (orig_stmt_info)
+-    gcc_assert (tmp == orig_stmt_info
+-		|| REDUC_GROUP_FIRST_ELEMENT (tmp) == orig_stmt_info);
+-  else
+-    /* We changed STMT to be the first stmt in reduction chain, hence we
+-       check that in this case the first element in the chain is STMT.  */
+-    gcc_assert (tmp == stmt_info
+-		|| REDUC_GROUP_FIRST_ELEMENT (tmp) == stmt_info);
+-
+-  if (STMT_VINFO_LIVE_P (reduc_def_info))
++  if (STMT_VINFO_LIVE_P (phi_info))
+     return false;
+ 
+   if (slp_node)
+@@ -6531,102 +6188,13 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 
+   gcc_assert (ncopies >= 1);
+ 
+-  vec_mode = TYPE_MODE (vectype_in);
+   poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
+ 
+   if (nested_cycle)
+     {
+-      def_bb = gimple_bb (reduc_def_phi);
+-      def_stmt_loop = def_bb->loop_father;
+-      def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
+-                                       loop_preheader_edge (def_stmt_loop));
+-      stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
+-      if (def_arg_stmt_info
+-	  && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
+-	      == vect_double_reduction_def))
+-        double_reduc = true;
+-    }
+-
+-  vect_reduction_type reduction_type
+-    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+-  if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
+-      && ncopies > 1)
+-    {
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			 "multiple types in double reduction or condition "
+-			 "reduction.\n");
+-      return false;
+-    }
+-
+-  if (code == COND_EXPR)
+-    {
+-      /* Only call during the analysis stage, otherwise we'll lose
+-	 STMT_VINFO_TYPE.  */
+-      if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
+-						true, NULL, cost_vec))
+-        {
+-          if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "unsupported condition in reduction\n");
+-	  return false;
+-        }
+-    }
+-  else if (code == LSHIFT_EXPR || code == RSHIFT_EXPR
+-	   || code == LROTATE_EXPR || code == RROTATE_EXPR)
+-    {
+-      /* Only call during the analysis stage, otherwise we'll lose
+-	 STMT_VINFO_TYPE.  We only support this for nested cycles
+-	 without double reductions at the moment.  */
+-      if (!nested_cycle
+-	  || double_reduc
+-	  || (!vec_stmt && !vectorizable_shift (stmt_info, gsi, NULL,
+-						NULL, cost_vec)))
+-	{
+-          if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "unsupported shift or rotation in reduction\n");
+-	  return false;
+-	}
+-    }
+-  else
+-    {
+-      /* 4. Supportable by target?  */
+-
+-      /* 4.1. check support for the operation in the loop  */
+-      optab = optab_for_tree_code (code, vectype_in, optab_default);
+-      if (!optab)
+-        {
+-          if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "no optab.\n");
+-
+-          return false;
+-        }
+-
+-      if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
+-        {
+-          if (dump_enabled_p ())
+-            dump_printf (MSG_NOTE, "op not supported by target.\n");
+-
+-	  if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
+-	      || !vect_worthwhile_without_simd_p (loop_vinfo, code))
+-            return false;
+-
+-          if (dump_enabled_p ())
+-  	    dump_printf (MSG_NOTE, "proceeding using word mode.\n");
+-        }
+-
+-      /* Worthwhile without SIMD support?  */
+-      if (!VECTOR_MODE_P (TYPE_MODE (vectype_in))
+-	  && !vect_worthwhile_without_simd_p (loop_vinfo, code))
+-        {
+-          if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "not worthwhile without SIMD support.\n");
+-
+-          return false;
+-        }
++      gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info)
++		  == vect_double_reduction_def);
++      double_reduc = true;
+     }
+ 
+   /* 4.2. Check support for the epilog operation.
+@@ -6664,38 +6232,55 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+           (and also the same tree-code) when generating the epilog code and
+           when generating the code inside the loop.  */
+ 
+-  if (orig_stmt_info
+-      && (reduction_type == TREE_CODE_REDUCTION
+-	  || reduction_type == FOLD_LEFT_REDUCTION))
+-    {
+-      /* This is a reduction pattern: get the vectype from the type of the
+-         reduction variable, and get the tree-code from orig_stmt.  */
+-      orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt);
+-      gcc_assert (vectype_out);
+-      vec_mode = TYPE_MODE (vectype_out);
+-    }
+-  else
+-    {
+-      /* Regular reduction: use the same vectype and tree-code as used for
+-         the vector code inside the loop can be used for the epilog code. */
+-      orig_code = code;
+-
+-      if (code == MINUS_EXPR)
+-	orig_code = PLUS_EXPR;
++  enum tree_code orig_code = STMT_VINFO_REDUC_CODE (phi_info);
++  STMT_VINFO_REDUC_CODE (reduc_info) = orig_code;
+ 
+-      /* For simple condition reductions, replace with the actual expression
+-	 we want to base our reduction around.  */
+-      if (reduction_type == CONST_COND_REDUCTION)
++  vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
++  if (reduction_type == TREE_CODE_REDUCTION)
++    {
++      /* Check whether it's ok to change the order of the computation.
++	 Generally, when vectorizing a reduction we change the order of the
++	 computation.  This may change the behavior of the program in some
++	 cases, so we need to check that this is ok.  One exception is when
++	 vectorizing an outer-loop: the inner-loop is executed sequentially,
++	 and therefore vectorizing reductions in the inner-loop during
++	 outer-loop vectorization is safe.  */
++      if (needs_fold_left_reduction_p (scalar_type, orig_code))
++	{
++	  /* When vectorizing a reduction chain w/o SLP the reduction PHI
++	     is not directy used in stmt.  */
++	  if (!only_slp_reduc_chain
++	      && reduc_chain_length != 1)
++	    {
++	      if (dump_enabled_p ())
++		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				 "in-order reduction chain without SLP.\n");
++	      return false;
++	    }
++	  STMT_VINFO_REDUC_TYPE (reduc_info)
++	    = reduction_type = FOLD_LEFT_REDUCTION;
++	}
++      else if (!commutative_tree_code (orig_code)
++	       || !associative_tree_code (orig_code))
+ 	{
+-	  orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
+-	  gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			    "reduction: not commutative/associative");
++	  return false;
+ 	}
+-      else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
+-	orig_code = cond_reduc_op_code;
+     }
+ 
+-  reduc_fn = IFN_LAST;
++  if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
++      && ncopies > 1)
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "multiple types in double reduction or condition "
++			 "reduction or fold-left reduction.\n");
++      return false;
++    }
+ 
++  internal_fn reduc_fn = IFN_LAST;
+   if (reduction_type == TREE_CODE_REDUCTION
+       || reduction_type == FOLD_LEFT_REDUCTION
+       || reduction_type == INTEGER_INDUC_COND_REDUCTION
+@@ -6740,6 +6325,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 					  OPTIMIZE_FOR_SPEED))
+ 	reduc_fn = IFN_REDUC_MAX;
+     }
++  STMT_VINFO_REDUC_FN (reduc_info) = reduc_fn;
+ 
+   if (reduction_type != EXTRACT_LAST_REDUCTION
+       && (!nested_cycle || double_reduc)
+@@ -6757,7 +6343,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   tree neutral_op = NULL_TREE;
+   if (slp_node)
+     neutral_op = neutral_op_for_slp_reduction
+-      (slp_node_instance->reduc_phis, code,
++      (slp_node_instance->reduc_phis, vectype_out, orig_code,
+        REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
+ 
+   if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
+@@ -6822,10 +6408,11 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	 which each SLP statement has its own initial value and in which
+ 	 that value needs to be repeated for every instance of the
+ 	 statement within the initial vector.  */
+-      unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
++      unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
+       scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
+       if (!neutral_op
+-	  && !can_duplicate_and_interleave_p (group_size, elt_mode))
++	  && !can_duplicate_and_interleave_p (loop_vinfo, group_size,
++					      elt_mode))
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+@@ -6848,26 +6435,6 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	}
+     }
+ 
+-  /* In case of widenning multiplication by a constant, we update the type
+-     of the constant to be the type of the other operand.  We check that the
+-     constant fits the type in the pattern recognition pass.  */
+-  if (code == DOT_PROD_EXPR
+-      && !types_compatible_p (TREE_TYPE (ops[0]), TREE_TYPE (ops[1])))
+-    {
+-      if (TREE_CODE (ops[0]) == INTEGER_CST)
+-        ops[0] = fold_convert (TREE_TYPE (ops[1]), ops[0]);
+-      else if (TREE_CODE (ops[1]) == INTEGER_CST)
+-        ops[1] = fold_convert (TREE_TYPE (ops[0]), ops[1]);
+-      else
+-        {
+-          if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "invalid types in dot-prod\n");
+-
+-          return false;
+-        }
+-    }
+-
+   if (reduction_type == COND_REDUCTION)
+     {
+       widest_int ni;
+@@ -6925,26 +6492,68 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+    This only works when we see both the reduction PHI and its only consumer
+    in vectorizable_reduction and there are no intermediate stmts
+    participating.  */
+-  stmt_vec_info use_stmt_info;
+-  tree reduc_phi_result = gimple_phi_result (reduc_def_phi);
+   if (ncopies > 1
+       && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
+-      && (use_stmt_info = loop_vinfo->lookup_single_use (reduc_phi_result))
+-      && vect_stmt_to_vectorize (use_stmt_info) == stmt_info)
++      && reduc_chain_length == 1)
++    single_defuse_cycle = true;
++
++  if (single_defuse_cycle || lane_reduc_code_p)
+     {
+-      single_defuse_cycle = true;
+-      epilog_copies = 1;
++      gcc_assert (code != COND_EXPR);
++
++      /* 4. Supportable by target?  */
++      bool ok = true;
++
++      /* 4.1. check support for the operation in the loop  */
++      optab optab = optab_for_tree_code (code, vectype_in, optab_vector);
++      if (!optab)
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "no optab.\n");
++	  ok = false;
++        }
++
++      machine_mode vec_mode = TYPE_MODE (vectype_in);
++      if (ok && optab_handler (optab, vec_mode) == CODE_FOR_nothing)
++        {
++          if (dump_enabled_p ())
++            dump_printf (MSG_NOTE, "op not supported by target.\n");
++	  if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
++	      || !vect_worthwhile_without_simd_p (loop_vinfo, code))
++	    ok = false;
++	  else
++	    if (dump_enabled_p ())
++	      dump_printf (MSG_NOTE, "proceeding using word mode.\n");
++        }
++
++      /* Worthwhile without SIMD support?  */
++      if (ok
++	  && !VECTOR_MODE_P (TYPE_MODE (vectype_in))
++	  && !vect_worthwhile_without_simd_p (loop_vinfo, code))
++        {
++          if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "not worthwhile without SIMD support.\n");
++	  ok = false;
++        }
++
++      /* lane-reducing operations have to go through vect_transform_reduction.
++         For the other cases try without the single cycle optimization.  */
++      if (!ok)
++	{
++	  if (lane_reduc_code_p)
++	    return false;
++	  else
++	    single_defuse_cycle = false;
++	}
+     }
+-  else
+-    epilog_copies = ncopies;
++  STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info) = single_defuse_cycle;
+ 
+   /* If the reduction stmt is one of the patterns that have lane
+      reduction embedded we cannot handle the case of ! single_defuse_cycle.  */
+-  if ((ncopies > 1
+-       && ! single_defuse_cycle)
+-      && (code == DOT_PROD_EXPR
+-	  || code == WIDEN_SUM_EXPR
+-	  || code == SAD_EXPR))
++  if ((ncopies > 1 && ! single_defuse_cycle)
++      && lane_reduc_code_p)
+     {
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+@@ -6958,46 +6567,130 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   else
+     vec_num = 1;
+ 
++  vect_model_reduction_cost (stmt_info, reduc_fn, reduction_type, ncopies,
++			     cost_vec);
++  if (dump_enabled_p ()
++      && reduction_type == FOLD_LEFT_REDUCTION)
++    dump_printf_loc (MSG_NOTE, vect_location,
++		     "using an in-order (fold-left) reduction.\n");
++  STMT_VINFO_TYPE (orig_stmt_of_analysis) = cycle_phi_info_type;
++  /* All but single defuse-cycle optimized, lane-reducing and fold-left
++     reductions go through their own vectorizable_* routines.  */
++  if (!single_defuse_cycle
++      && code != DOT_PROD_EXPR
++      && code != WIDEN_SUM_EXPR
++      && code != SAD_EXPR
++      && reduction_type != FOLD_LEFT_REDUCTION)
++    {
++      stmt_vec_info tem
++	= vect_stmt_to_vectorize (STMT_VINFO_REDUC_DEF (phi_info));
++      if (slp_node && REDUC_GROUP_FIRST_ELEMENT (tem))
++	{
++	  gcc_assert (!REDUC_GROUP_NEXT_ELEMENT (tem));
++	  tem = REDUC_GROUP_FIRST_ELEMENT (tem);
++	}
++      STMT_VINFO_DEF_TYPE (vect_orig_stmt (tem)) = vect_internal_def;
++      STMT_VINFO_DEF_TYPE (tem) = vect_internal_def;
++    }
++  else if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
++    {
++      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
++      internal_fn cond_fn = get_conditional_internal_fn (code);
++
++      if (reduction_type != FOLD_LEFT_REDUCTION
++	  && !use_mask_by_cond_expr_p (code, cond_fn, vectype_in)
++	  && (cond_fn == IFN_LAST
++	      || !direct_internal_fn_supported_p (cond_fn, vectype_in,
++						  OPTIMIZE_FOR_SPEED)))
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "can't use a fully-masked loop because no"
++			     " conditional operation is available.\n");
++	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
++	}
++      else
++	vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
++			       vectype_in, NULL);
++    }
++  return true;
++}
++
++/* Transform the definition stmt STMT_INFO of a reduction PHI backedge
++   value.  */
++
++bool
++vect_transform_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
++			  stmt_vec_info *vec_stmt, slp_tree slp_node)
++{
++  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
++  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
++  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
++  int i;
++  int ncopies;
++  int j;
++  int vec_num;
++
++  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
++  gcc_assert (reduc_info->is_reduc_info);
++
++  if (nested_in_vect_loop_p (loop, stmt_info))
++    {
++      loop = loop->inner;
++      gcc_assert (STMT_VINFO_DEF_TYPE (reduc_info) == vect_double_reduction_def);
++    }
++
++  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
++  enum tree_code code = gimple_assign_rhs_code (stmt);
++  int op_type = TREE_CODE_LENGTH (code);
++
++  /* Flatten RHS.  */
++  tree ops[3];
++  switch (get_gimple_rhs_class (code))
++    {
++    case GIMPLE_TERNARY_RHS:
++      ops[2] = gimple_assign_rhs3 (stmt);
++      /* Fall thru.  */
++    case GIMPLE_BINARY_RHS:
++      ops[0] = gimple_assign_rhs1 (stmt);
++      ops[1] = gimple_assign_rhs2 (stmt);
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  /* All uses but the last are expected to be defined in the loop.
++     The last use is the reduction variable.  In case of nested cycle this
++     assumption is not true: we use reduc_index to record the index of the
++     reduction variable.  */
++  stmt_vec_info phi_info = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info));
++  gphi *reduc_def_phi = as_a <gphi *> (phi_info->stmt);
++  int reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
++  tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
++
++  if (slp_node)
++    {
++      ncopies = 1;
++      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
++    }
++  else
++    {
++      ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
++      vec_num = 1;
++    }
++
+   internal_fn cond_fn = get_conditional_internal_fn (code);
+   vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
+-
+-  if (!vec_stmt) /* transformation not required.  */
+-    {
+-      vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
+-      if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+-	{
+-	  if (reduction_type != FOLD_LEFT_REDUCTION
+-	      && (cond_fn == IFN_LAST
+-		  || !direct_internal_fn_supported_p (cond_fn, vectype_in,
+-						      OPTIMIZE_FOR_SPEED)))
+-	    {
+-	      if (dump_enabled_p ())
+-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-				 "can't use a fully-masked loop because no"
+-				 " conditional operation is available.\n");
+-	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+-	    }
+-	  else if (reduc_index == -1)
+-	    {
+-	      if (dump_enabled_p ())
+-		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-				 "can't use a fully-masked loop for chained"
+-				 " reductions.\n");
+-	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
+-	    }
+-	  else
+-	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
+-				   vectype_in);
+-	}
+-      if (dump_enabled_p ()
+-	  && reduction_type == FOLD_LEFT_REDUCTION)
+-	dump_printf_loc (MSG_NOTE, vect_location,
+-			 "using an in-order (fold-left) reduction.\n");
+-      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+-      return true;
+-    }
++  bool mask_by_cond_expr = use_mask_by_cond_expr_p (code, cond_fn, vectype_in);
+ 
+   /* Transform.  */
++  stmt_vec_info new_stmt_info = NULL;
++  stmt_vec_info prev_stmt_info;
++  tree new_temp = NULL_TREE;
++  auto_vec<tree> vec_oprnds0;
++  auto_vec<tree> vec_oprnds1;
++  auto_vec<tree> vec_oprnds2;
++  tree def0;
+ 
+   if (dump_enabled_p ())
+     dump_printf_loc (MSG_NOTE, vect_location, "transform reduction.\n");
+@@ -7008,23 +6701,26 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 
+   bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
+ 
++  vect_reduction_type reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
+   if (reduction_type == FOLD_LEFT_REDUCTION)
+-    return vectorize_fold_left_reduction
+-      (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
+-       reduc_fn, ops, vectype_in, reduc_index, masks);
+-
+-  if (reduction_type == EXTRACT_LAST_REDUCTION)
+     {
+-      gcc_assert (!slp_node);
+-      return vectorizable_condition (stmt_info, gsi, vec_stmt,
+-				     true, NULL, NULL);
++      internal_fn reduc_fn = STMT_VINFO_REDUC_FN (reduc_info);
++      return vectorize_fold_left_reduction
++	  (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
++	   reduc_fn, ops, vectype_in, reduc_index, masks);
+     }
+ 
++  bool single_defuse_cycle = STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info);
++  gcc_assert (single_defuse_cycle
++	      || code == DOT_PROD_EXPR
++	      || code == WIDEN_SUM_EXPR
++	      || code == SAD_EXPR);
++
+   /* Create the destination vector  */
+-  vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
++  tree scalar_dest = gimple_assign_lhs (stmt);
++  tree vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
+ 
+   prev_stmt_info = NULL;
+-  prev_phi_info = NULL;
+   if (!slp_node)
+     {
+       vec_oprnds0.create (1);
+@@ -7033,32 +6729,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+         vec_oprnds2.create (1);
+     }
+ 
+-  phis.create (vec_num);
+-  vect_defs.create (vec_num);
+-  if (!slp_node)
+-    vect_defs.quick_push (NULL_TREE);
+-
+-  if (slp_node)
+-    phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis));
+-  else
+-    phis.quick_push (STMT_VINFO_VEC_STMT (reduc_def_info));
+-
+   for (j = 0; j < ncopies; j++)
+     {
+-      if (code == COND_EXPR)
+-        {
+-          gcc_assert (!slp_node);
+-	  vectorizable_condition (stmt_info, gsi, vec_stmt,
+-				  true, NULL, NULL);
+-          break;
+-        }
+-      if (code == LSHIFT_EXPR
+-	  || code == RSHIFT_EXPR)
+-	{
+-	  vectorizable_shift (stmt_info, gsi, vec_stmt, slp_node, NULL);
+-	  break;
+-	}
+-
+       /* Handle uses.  */
+       if (j == 0)
+         {
+@@ -7066,16 +6738,8 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	    {
+ 	      /* Get vec defs for all the operands except the reduction index,
+ 		 ensuring the ordering of the ops in the vector is kept.  */
+-	      auto_vec<tree, 3> slp_ops;
+ 	      auto_vec<vec<tree>, 3> vec_defs;
+-
+-	      slp_ops.quick_push (ops[0]);
+-	      slp_ops.quick_push (ops[1]);
+-	      if (op_type == ternary_op)
+-		slp_ops.quick_push (ops[2]);
+-
+-	      vect_get_slp_defs (slp_ops, slp_node, &vec_defs);
+-
++	      vect_get_slp_defs (slp_node, &vec_defs);
+ 	      vec_oprnds0.safe_splice (vec_defs[0]);
+ 	      vec_defs[0].release ();
+ 	      vec_oprnds1.safe_splice (vec_defs[1]);
+@@ -7130,7 +6794,7 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
+         {
+ 	  tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
+-	  if (masked_loop_p)
++	  if (masked_loop_p && !mask_by_cond_expr)
+ 	    {
+ 	      /* Make sure that the reduction accumulator is vop[0].  */
+ 	      if (reduc_index == 1)
+@@ -7154,6 +6818,14 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	      if (op_type == ternary_op)
+ 		vop[2] = vec_oprnds2[i];
+ 
++	      if (masked_loop_p && mask_by_cond_expr)
++		{
++		  tree mask = vect_get_loop_mask (gsi, masks,
++						  vec_num * ncopies,
++						  vectype_in, i * ncopies + j);
++		  build_vect_cond_expr (code, vop, mask, gsi);
++		}
++
+ 	      gassign *new_stmt = gimple_build_assign (vec_dest, code,
+ 						       vop[0], vop[1], vop[2]);
+ 	      new_temp = make_ssa_name (vec_dest, new_stmt);
+@@ -7163,15 +6835,10 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	    }
+ 
+           if (slp_node)
+-            {
+-	      SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+-              vect_defs.quick_push (new_temp);
+-            }
+-          else
+-            vect_defs[0] = new_temp;
++	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+         }
+ 
+-      if (slp_node)
++      if (slp_node || single_defuse_cycle)
+         continue;
+ 
+       if (j == 0)
+@@ -7182,20 +6849,244 @@ vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       prev_stmt_info = new_stmt_info;
+     }
+ 
+-  /* Finalize the reduction-phi (set its arguments) and create the
+-     epilog reduction code.  */
+-  if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
+-    vect_defs[0] = gimple_get_lhs ((*vec_stmt)->stmt);
++  if (single_defuse_cycle && !slp_node)
++    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
++
++  return true;
++}
++
++/* Transform phase of a cycle PHI.  */
++
++bool
++vect_transform_cycle_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
++			  slp_tree slp_node, slp_instance slp_node_instance)
++{
++  tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
++  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
++  class loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
++  int i;
++  int ncopies;
++  stmt_vec_info prev_phi_info;
++  int j;
++  bool nested_cycle = false;
++  int vec_num;
++
++  if (nested_in_vect_loop_p (loop, stmt_info))
++    {
++      loop = loop->inner;
++      nested_cycle = true;
++    }
++
++  stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
++  reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
++  stmt_vec_info reduc_info = info_for_reduction (stmt_info);
++  gcc_assert (reduc_info->is_reduc_info);
++
++  if (STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION
++      || STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION)
++    /* Leave the scalar phi in place.  */
++    return true;
++
++  tree vectype_in = STMT_VINFO_REDUC_VECTYPE_IN (reduc_info);
++  /* For a nested cycle we do not fill the above.  */
++  if (!vectype_in)
++    vectype_in = STMT_VINFO_VECTYPE (stmt_info);
++  gcc_assert (vectype_in);
++
++  if (slp_node)
++    {
++      /* The size vect_schedule_slp_instance computes is off for us.  */
++      vec_num = vect_get_num_vectors
++	  (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
++	   * SLP_TREE_SCALAR_STMTS (slp_node).length (), vectype_in);
++      ncopies = 1;
++    }
++  else
++    {
++      vec_num = 1;
++      ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
++    }
++
++  /* Check whether we should use a single PHI node and accumulate
++     vectors to one before the backedge.  */
++  if (STMT_VINFO_FORCE_SINGLE_CYCLE (reduc_info))
++    ncopies = 1;
++
++  /* Create the destination vector  */
++  gphi *phi = as_a <gphi *> (stmt_info->stmt);
++  tree vec_dest = vect_create_destination_var (gimple_phi_result (phi),
++					       vectype_out);
++
++  /* Get the loop-entry arguments.  */
++  tree vec_initial_def;
++  auto_vec<tree> vec_initial_defs;
++  if (slp_node)
++    {
++      vec_initial_defs.reserve (vec_num);
++      gcc_assert (slp_node == slp_node_instance->reduc_phis);
++      stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info);
++      tree neutral_op
++	= neutral_op_for_slp_reduction (slp_node, vectype_out,
++					STMT_VINFO_REDUC_CODE (reduc_info),
++					first != NULL);
++      get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
++				      &vec_initial_defs, vec_num,
++				      first != NULL, neutral_op);
++    }
++  else
++    {
++      /* Get at the scalar def before the loop, that defines the initial
++	 value of the reduction variable.  */
++      tree initial_def = PHI_ARG_DEF_FROM_EDGE (phi,
++						loop_preheader_edge (loop));
++      /* Optimize: if initial_def is for REDUC_MAX smaller than the base
++	 and we can't use zero for induc_val, use initial_def.  Similarly
++	 for REDUC_MIN and initial_def larger than the base.  */
++      if (STMT_VINFO_REDUC_TYPE (reduc_info) == INTEGER_INDUC_COND_REDUCTION)
++	{
++	  tree induc_val = STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info);
++	  if (TREE_CODE (initial_def) == INTEGER_CST
++	      && !integer_zerop (induc_val)
++	      && ((STMT_VINFO_REDUC_CODE (reduc_info) == MAX_EXPR
++		   && tree_int_cst_lt (initial_def, induc_val))
++		  || (STMT_VINFO_REDUC_CODE (reduc_info) == MIN_EXPR
++		      && tree_int_cst_lt (induc_val, initial_def))))
++	    {
++	      induc_val = initial_def;
++	      /* Communicate we used the initial_def to epilouge
++		 generation.  */
++	      STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL (reduc_info) = NULL_TREE;
++	    }
++	  vec_initial_def = build_vector_from_val (vectype_out, induc_val);
++	}
++      else if (nested_cycle)
++	{
++	  /* Do not use an adjustment def as that case is not supported
++	     correctly if ncopies is not one.  */
++	  vec_initial_def = vect_get_vec_def_for_operand (initial_def,
++							  reduc_stmt_info);
++	}
++      else
++	{
++	  tree adjustment_def = NULL_TREE;
++	  tree *adjustment_defp = &adjustment_def;
++	  enum tree_code code = STMT_VINFO_REDUC_CODE (reduc_info);
++	  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
++	    adjustment_defp = NULL;
++	  vec_initial_def
++	    = get_initial_def_for_reduction (reduc_stmt_info, code,
++					     initial_def, adjustment_defp);
++	  STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT (reduc_info) = adjustment_def;
++	}
++      vec_initial_defs.create (1);
++      vec_initial_defs.quick_push (vec_initial_def);
++    }
++
++  /* Generate the reduction PHIs upfront.  */
++  prev_phi_info = NULL;
++  for (i = 0; i < vec_num; i++)
++    {
++      tree vec_init_def = vec_initial_defs[i];
++      for (j = 0; j < ncopies; j++)
++	{
++	  /* Create the reduction-phi that defines the reduction
++	     operand.  */
++	  gphi *new_phi = create_phi_node (vec_dest, loop->header);
++	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
++
++	  /* Set the loop-entry arg of the reduction-phi.  */
++	  if (j != 0 && nested_cycle)
++	    vec_init_def = vect_get_vec_def_for_stmt_copy (loop_vinfo,
++							   vec_init_def);
++	  add_phi_arg (new_phi, vec_init_def, loop_preheader_edge (loop),
++		       UNKNOWN_LOCATION);
++
++	  /* The loop-latch arg is set in epilogue processing.  */
++
++	  if (slp_node)
++	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
++	  else
++	    {
++	      if (j == 0)
++		STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
++	      else
++		STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
++	      prev_phi_info = new_phi_info;
++	    }
++	}
++    }
++
++  return true;
++}
++
++/* Vectorizes LC PHIs.  */
++
++bool
++vectorizable_lc_phi (stmt_vec_info stmt_info, stmt_vec_info *vec_stmt,
++		     slp_tree slp_node)
++{
++  loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
++  if (!loop_vinfo
++      || !is_a <gphi *> (stmt_info->stmt)
++      || gimple_phi_num_args (stmt_info->stmt) != 1)
++    return false;
++
++  if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
++      && STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
++    return false;
++
++  if (!vec_stmt) /* transformation not required.  */
++    {
++      STMT_VINFO_TYPE (stmt_info) = lc_phi_info_type;
++      return true;
++    }
+ 
+-  vect_create_epilog_for_reduction (vect_defs, stmt_info, reduc_def_phi,
+-				    epilog_copies, reduc_fn, phis,
+-				    double_reduc, slp_node, slp_node_instance,
+-				    cond_reduc_val, cond_reduc_op_code,
+-				    neutral_op);
++  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
++  tree scalar_dest = gimple_phi_result (stmt_info->stmt);
++  basic_block bb = gimple_bb (stmt_info->stmt);
++  edge e = single_pred_edge (bb);
++  tree vec_dest = vect_create_destination_var (scalar_dest, vectype);
++  vec<tree> vec_oprnds = vNULL;
++  vect_get_vec_defs (gimple_phi_arg_def (stmt_info->stmt, 0), NULL_TREE,
++		     stmt_info, &vec_oprnds, NULL, slp_node);
++  if (slp_node)
++    {
++      unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
++      gcc_assert (vec_oprnds.length () == vec_num);
++      for (unsigned i = 0; i < vec_num; i++)
++	{
++	  /* Create the vectorized LC PHI node.  */
++	  gphi *new_phi = create_phi_node (vec_dest, bb);
++	  add_phi_arg (new_phi, vec_oprnds[i], e, UNKNOWN_LOCATION);
++	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
++	  SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
++	}
++    }
++  else
++    {
++      unsigned ncopies = vect_get_num_copies (loop_vinfo, vectype);
++      stmt_vec_info prev_phi_info = NULL;
++      for (unsigned i = 0; i < ncopies; i++)
++	{
++	  if (i != 0)
++	    vect_get_vec_defs_for_stmt_copy (loop_vinfo, &vec_oprnds, NULL);
++	  /* Create the vectorized LC PHI node.  */
++	  gphi *new_phi = create_phi_node (vec_dest, bb);
++	  add_phi_arg (new_phi, vec_oprnds[0], e, UNKNOWN_LOCATION);
++	  stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
++	  if (i == 0)
++	    STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi_info;
++	  else
++	    STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
++	  prev_phi_info = new_phi_info;
++	}
++    }
++  vec_oprnds.release ();
+ 
+   return true;
+ }
+ 
++
+ /* Function vect_min_worthwhile_factor.
+ 
+    For a loop where we could vectorize the operation indicated by CODE,
+@@ -7789,8 +7680,8 @@ vectorizable_induction (stmt_vec_info stmt_info,
+ bool
+ vectorizable_live_operation (stmt_vec_info stmt_info,
+ 			     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
+-			     slp_tree slp_node, int slp_index,
+-			     stmt_vec_info *vec_stmt,
++			     slp_tree slp_node, slp_instance slp_node_instance,
++			     int slp_index, stmt_vec_info *vec_stmt,
+ 			     stmt_vector_for_cost *)
+ {
+   loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
+@@ -7807,8 +7698,33 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
+ 
+   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
+ 
+-  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
+-    return false;
++  /* If a stmt of a reduction is live, vectorize it via
++     vect_create_epilog_for_reduction.  vectorizable_reduction assessed
++     validity so just trigger the transform here.  */
++  if (STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)))
++    {
++      if (!vec_stmt)
++	return true;
++      if (slp_node)
++	{
++	  /* For reduction chains the meta-info is attached to
++	     the group leader.  */
++	  if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
++	    stmt_info = REDUC_GROUP_FIRST_ELEMENT (stmt_info);
++	  /* For SLP reductions we vectorize the epilogue for
++	     all involved stmts together.  */
++	  else if (slp_index != 0)
++	    return true;
++	}
++      stmt_vec_info reduc_info = info_for_reduction (stmt_info);
++      gcc_assert (reduc_info->is_reduc_info);
++      if (STMT_VINFO_REDUC_TYPE (reduc_info) == FOLD_LEFT_REDUCTION
++	  || STMT_VINFO_REDUC_TYPE (reduc_info) == EXTRACT_LAST_REDUCTION)
++	return true;
++      vect_create_epilog_for_reduction (stmt_info, slp_node,
++					slp_node_instance);
++      return true;
++    }
+ 
+   /* FORNOW.  CHECKME.  */
+   if (nested_in_vect_loop_p (loop, stmt_info))
+@@ -7892,7 +7808,7 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
+ 	      gcc_assert (ncopies == 1 && !slp_node);
+ 	      vect_record_loop_mask (loop_vinfo,
+ 				     &LOOP_VINFO_MASKS (loop_vinfo),
+-				     1, vectype);
++				     1, vectype, NULL);
+ 	    }
+ 	}
+       return true;
+@@ -8071,31 +7987,34 @@ loop_niters_no_overflow (loop_vec_info loop_vinfo)
+   return false;
+ }
+ 
+-/* Return a mask type with half the number of elements as TYPE.  */
++/* Return a mask type with half the number of elements as OLD_TYPE,
++   given that it should have mode NEW_MODE.  */
+ 
+ tree
+-vect_halve_mask_nunits (tree type)
++vect_halve_mask_nunits (tree old_type, machine_mode new_mode)
+ {
+-  poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (type), 2);
+-  return build_truth_vector_type (nunits, current_vector_size);
++  poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (old_type), 2);
++  return build_truth_vector_type_for_mode (nunits, new_mode);
+ }
+ 
+-/* Return a mask type with twice as many elements as TYPE.  */
++/* Return a mask type with twice as many elements as OLD_TYPE,
++   given that it should have mode NEW_MODE.  */
+ 
+ tree
+-vect_double_mask_nunits (tree type)
++vect_double_mask_nunits (tree old_type, machine_mode new_mode)
+ {
+-  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type) * 2;
+-  return build_truth_vector_type (nunits, current_vector_size);
++  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (old_type) * 2;
++  return build_truth_vector_type_for_mode (nunits, new_mode);
+ }
+ 
+ /* Record that a fully-masked version of LOOP_VINFO would need MASKS to
+    contain a sequence of NVECTORS masks that each control a vector of type
+-   VECTYPE.  */
++   VECTYPE.  If SCALAR_MASK is nonnull, the fully-masked loop would AND
++   these vector masks with the vector version of SCALAR_MASK.  */
+ 
+ void
+ vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
+-		       unsigned int nvectors, tree vectype)
++		       unsigned int nvectors, tree vectype, tree scalar_mask)
+ {
+   gcc_assert (nvectors != 0);
+   if (masks->length () < nvectors)
+@@ -8106,10 +8025,17 @@ vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
+   unsigned int nscalars_per_iter
+     = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
+ 		 LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
++
++  if (scalar_mask)
++    {
++      scalar_cond_masked_key cond (scalar_mask, nvectors);
++      loop_vinfo->scalar_cond_masked_set.add (cond);
++    }
++
+   if (rgm->max_nscalars_per_iter < nscalars_per_iter)
+     {
+       rgm->max_nscalars_per_iter = nscalars_per_iter;
+-      rgm->mask_type = build_same_sized_truth_vector_type (vectype);
++      rgm->mask_type = truth_type_for (vectype);
+     }
+ }
+ 
+@@ -8154,7 +8080,7 @@ vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
+       gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
+ 			      TYPE_VECTOR_SUBPARTS (vectype)));
+       gimple_seq seq = NULL;
+-      mask_type = build_same_sized_truth_vector_type (vectype);
++      mask_type = truth_type_for (vectype);
+       mask = gimple_build (&seq, VIEW_CONVERT_EXPR, mask_type, mask);
+       if (seq)
+ 	gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
+@@ -8242,6 +8168,186 @@ vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
+     *seen_store = stmt_info;
+ }
+ 
++/* Helper function to pass to simplify_replace_tree to enable replacing tree's
++   in the hash_map with its corresponding values.  */
++
++static tree
++find_in_mapping (tree t, void *context)
++{
++  hash_map<tree,tree>* mapping = (hash_map<tree, tree>*) context;
++
++  tree *value = mapping->get (t);
++  return value ? *value : t;
++}
++
++/* Update EPILOGUE's loop_vec_info.  EPILOGUE was constructed as a copy of the
++   original loop that has now been vectorized.
++
++   The inits of the data_references need to be advanced with the number of
++   iterations of the main loop.  This has been computed in vect_do_peeling and
++   is stored in parameter ADVANCE.  We first restore the data_references
++   initial offset with the values recored in ORIG_DRS_INIT.
++
++   Since the loop_vec_info of this EPILOGUE was constructed for the original
++   loop, its stmt_vec_infos all point to the original statements.  These need
++   to be updated to point to their corresponding copies as well as the SSA_NAMES
++   in their PATTERN_DEF_SEQs and RELATED_STMTs.
++
++   The data_reference's connections also need to be updated.  Their
++   corresponding dr_vec_info need to be reconnected to the EPILOGUE's
++   stmt_vec_infos, their statements need to point to their corresponding copy,
++   if they are gather loads or scatter stores then their reference needs to be
++   updated to point to its corresponding copy and finally we set
++   'base_misaligned' to false as we have already peeled for alignment in the
++   prologue of the main loop.  */
++
++static void
++update_epilogue_loop_vinfo (class loop *epilogue, tree advance,
++			    drs_init_vec &orig_drs_init)
++{
++  loop_vec_info epilogue_vinfo = loop_vec_info_for_loop (epilogue);
++  auto_vec<gimple *> stmt_worklist;
++  hash_map<tree,tree> mapping;
++  gimple *orig_stmt, *new_stmt;
++  gimple_stmt_iterator epilogue_gsi;
++  gphi_iterator epilogue_phi_gsi;
++  stmt_vec_info stmt_vinfo = NULL, related_vinfo;
++  basic_block *epilogue_bbs = get_loop_body (epilogue);
++
++  LOOP_VINFO_BBS (epilogue_vinfo) = epilogue_bbs;
++
++  /* Restore original data_reference's offset, before the previous loop and its
++     prologue.  */
++  std::pair<data_reference*, tree> *dr_init;
++  unsigned i;
++  for (i = 0; orig_drs_init.iterate (i, &dr_init); i++)
++    DR_OFFSET (dr_init->first) = dr_init->second;
++
++  /* Advance data_reference's with the number of iterations of the previous
++     loop and its prologue.  */
++  vect_update_inits_of_drs (epilogue_vinfo, advance, PLUS_EXPR);
++
++
++  /* The EPILOGUE loop is a copy of the original loop so they share the same
++     gimple UIDs.  In this loop we update the loop_vec_info of the EPILOGUE to
++     point to the copied statements.  We also create a mapping of all LHS' in
++     the original loop and all the LHS' in the EPILOGUE and create worklists to
++     update teh STMT_VINFO_PATTERN_DEF_SEQs and STMT_VINFO_RELATED_STMTs.  */
++  for (unsigned i = 0; i < epilogue->num_nodes; ++i)
++    {
++      for (epilogue_phi_gsi = gsi_start_phis (epilogue_bbs[i]);
++	   !gsi_end_p (epilogue_phi_gsi); gsi_next (&epilogue_phi_gsi))
++	{
++	  new_stmt = epilogue_phi_gsi.phi ();
++
++	  gcc_assert (gimple_uid (new_stmt) > 0);
++	  stmt_vinfo
++	    = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
++
++	  orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
++	  STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
++
++	  mapping.put (gimple_phi_result (orig_stmt),
++		       gimple_phi_result (new_stmt));
++	  /* PHI nodes can not have patterns or related statements.  */
++	  gcc_assert (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo) == NULL
++		      && STMT_VINFO_RELATED_STMT (stmt_vinfo) == NULL);
++	}
++
++      for (epilogue_gsi = gsi_start_bb (epilogue_bbs[i]);
++	   !gsi_end_p (epilogue_gsi); gsi_next (&epilogue_gsi))
++	{
++	  new_stmt = gsi_stmt (epilogue_gsi);
++
++	  gcc_assert (gimple_uid (new_stmt) > 0);
++	  stmt_vinfo
++	    = epilogue_vinfo->stmt_vec_infos[gimple_uid (new_stmt) - 1];
++
++	  orig_stmt = STMT_VINFO_STMT (stmt_vinfo);
++	  STMT_VINFO_STMT (stmt_vinfo) = new_stmt;
++
++	  if (tree old_lhs = gimple_get_lhs (orig_stmt))
++	    mapping.put (old_lhs, gimple_get_lhs (new_stmt));
++
++	  if (STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo))
++	    {
++	      gimple_seq seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_vinfo);
++	      for (gimple_stmt_iterator gsi = gsi_start (seq);
++		   !gsi_end_p (gsi); gsi_next (&gsi))
++		stmt_worklist.safe_push (gsi_stmt (gsi));
++	    }
++
++	  related_vinfo = STMT_VINFO_RELATED_STMT (stmt_vinfo);
++	  if (related_vinfo != NULL && related_vinfo != stmt_vinfo)
++	    {
++	      gimple *stmt = STMT_VINFO_STMT (related_vinfo);
++	      stmt_worklist.safe_push (stmt);
++	      /* Set BB such that the assert in
++		'get_initial_def_for_reduction' is able to determine that
++		the BB of the related stmt is inside this loop.  */
++	      gimple_set_bb (stmt,
++			     gimple_bb (new_stmt));
++	      related_vinfo = STMT_VINFO_RELATED_STMT (related_vinfo);
++	      gcc_assert (related_vinfo == NULL
++			  || related_vinfo == stmt_vinfo);
++	    }
++	}
++    }
++
++  /* The PATTERN_DEF_SEQs and RELATED_STMTs in the epilogue were constructed
++     using the original main loop and thus need to be updated to refer to the
++     cloned variables used in the epilogue.  */
++  for (unsigned i = 0; i < stmt_worklist.length (); ++i)
++    {
++      gimple *stmt = stmt_worklist[i];
++      tree *new_op;
++
++      for (unsigned j = 1; j < gimple_num_ops (stmt); ++j)
++	{
++	  tree op = gimple_op (stmt, j);
++	  if ((new_op = mapping.get(op)))
++	    gimple_set_op (stmt, j, *new_op);
++	  else
++	    {
++	      op = simplify_replace_tree (op, NULL_TREE, NULL_TREE,
++				     &find_in_mapping, &mapping);
++	      gimple_set_op (stmt, j, op);
++	    }
++	}
++    }
++
++  struct data_reference *dr;
++  vec<data_reference_p> datarefs = epilogue_vinfo->shared->datarefs;
++  FOR_EACH_VEC_ELT (datarefs, i, dr)
++    {
++      orig_stmt = DR_STMT (dr);
++      gcc_assert (gimple_uid (orig_stmt) > 0);
++      stmt_vinfo = epilogue_vinfo->stmt_vec_infos[gimple_uid (orig_stmt) - 1];
++      /* Data references for gather loads and scatter stores do not use the
++	 updated offset we set using ADVANCE.  Instead we have to make sure the
++	 reference in the data references point to the corresponding copy of
++	 the original in the epilogue.  */
++      if (STMT_VINFO_GATHER_SCATTER_P (stmt_vinfo))
++	{
++	  DR_REF (dr)
++	    = simplify_replace_tree (DR_REF (dr), NULL_TREE, NULL_TREE,
++				     &find_in_mapping, &mapping);
++	  DR_BASE_ADDRESS (dr)
++	    = simplify_replace_tree (DR_BASE_ADDRESS (dr), NULL_TREE, NULL_TREE,
++				     &find_in_mapping, &mapping);
++	}
++      DR_STMT (dr) = STMT_VINFO_STMT (stmt_vinfo);
++      stmt_vinfo->dr_aux.stmt = stmt_vinfo;
++      /* The vector size of the epilogue is smaller than that of the main loop
++	 so the alignment is either the same or lower. This means the dr will
++	 thus by definition be aligned.  */
++      STMT_VINFO_DR_INFO (stmt_vinfo)->base_misaligned = false;
++    }
++
++  epilogue_vinfo->shared->datarefs_copy.release ();
++  epilogue_vinfo->shared->save_datarefs ();
++}
++
+ /* Function vect_transform_loop.
+ 
+    The analysis phase has determined that the loop is vectorizable.
+@@ -8279,11 +8385,11 @@ vect_transform_loop (loop_vec_info loop_vinfo)
+   if (th >= vect_vf_for_cost (loop_vinfo)
+       && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+     {
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_NOTE, vect_location,
+-			 "Profitability threshold is %d loop iterations.\n",
+-                         th);
+-      check_profitability = true;
++	if (dump_enabled_p ())
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "Profitability threshold is %d loop iterations.\n",
++			   th);
++	check_profitability = true;
+     }
+ 
+   /* Make sure there exists a single-predecessor exit bb.  Do this before 
+@@ -8301,18 +8407,8 @@ vect_transform_loop (loop_vec_info loop_vinfo)
+ 
+   if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
+     {
+-      poly_uint64 versioning_threshold
+-	= LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
+-      if (check_profitability
+-	  && ordered_p (poly_uint64 (th), versioning_threshold))
+-	{
+-	  versioning_threshold = ordered_max (poly_uint64 (th),
+-					      versioning_threshold);
+-	  check_profitability = false;
+-	}
+       struct loop *sloop
+-	= vect_loop_versioning (loop_vinfo, th, check_profitability,
+-				versioning_threshold);
++	= vect_loop_versioning (loop_vinfo);
+       sloop->force_vectorize = false;
+       check_profitability = false;
+     }
+@@ -8337,9 +8433,13 @@ vect_transform_loop (loop_vec_info loop_vinfo)
+   LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
+   tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
+   bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
++  tree advance;
++  drs_init_vec orig_drs_init;
++
+   epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
+ 			      &step_vector, &niters_vector_mult_vf, th,
+-			      check_profitability, niters_no_overflow);
++			      check_profitability, niters_no_overflow,
++			      &advance, orig_drs_init);
+ 
+   if (niters_vector == NULL_TREE)
+     {
+@@ -8413,7 +8513,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
+ 
+ 	  if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
+ 	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
+-	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
++	       || STMT_VINFO_DEF_TYPE (stmt_info) == vect_internal_def)
+ 	      && ! PURE_SLP_STMT (stmt_info))
+ 	    {
+ 	      if (dump_enabled_p ())
+@@ -8565,12 +8667,9 @@ vect_transform_loop (loop_vec_info loop_vinfo)
+ 	  dump_printf (MSG_NOTE, "\n");
+ 	}
+       else
+-	{
+-	  dump_printf_loc (MSG_NOTE, vect_location,
+-			   "LOOP EPILOGUE VECTORIZED (VS=");
+-	  dump_dec (MSG_NOTE, current_vector_size);
+-	  dump_printf (MSG_NOTE, ")\n");
+-	}
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "LOOP EPILOGUE VECTORIZED (MODE=%s)\n",
++			 GET_MODE_NAME (loop_vinfo->vector_mode));
+     }
+ 
+   /* Loops vectorized with a variable factor won't benefit from
+@@ -8592,57 +8691,14 @@ vect_transform_loop (loop_vec_info loop_vinfo)
+      since vectorized loop can have loop-carried dependencies.  */
+   loop->safelen = 0;
+ 
+-  /* Don't vectorize epilogue for epilogue.  */
+-  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
+-    epilogue = NULL;
+-
+-  if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
+-    epilogue = NULL;
+-
+   if (epilogue)
+     {
+-      auto_vector_sizes vector_sizes;
+-      targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+-      unsigned int next_size = 0;
+-
+-      /* Note LOOP_VINFO_NITERS_KNOWN_P and LOOP_VINFO_INT_NITERS work
+-         on niters already ajusted for the iterations of the prologue.  */
+-      if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+-	  && known_eq (vf, lowest_vf))
+-	{
+-	  unsigned HOST_WIDE_INT eiters
+-	    = (LOOP_VINFO_INT_NITERS (loop_vinfo)
+-	       - LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo));
+-	  eiters
+-	    = eiters % lowest_vf + LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo);
+-	  epilogue->nb_iterations_upper_bound = eiters - 1;
+-	  epilogue->any_upper_bound = true;
+-
+-	  unsigned int ratio;
+-	  while (next_size < vector_sizes.length ()
+-		 && !(constant_multiple_p (current_vector_size,
+-					   vector_sizes[next_size], &ratio)
+-		      && eiters >= lowest_vf / ratio))
+-	    next_size += 1;
+-	}
+-      else
+-	while (next_size < vector_sizes.length ()
+-	       && maybe_lt (current_vector_size, vector_sizes[next_size]))
+-	  next_size += 1;
+-
+-      if (next_size == vector_sizes.length ())
+-	epilogue = NULL;
+-    }
++      update_epilogue_loop_vinfo (epilogue, advance, orig_drs_init);
+ 
+-  if (epilogue)
+-    {
++      epilogue->simduid = loop->simduid;
+       epilogue->force_vectorize = loop->force_vectorize;
+       epilogue->safelen = loop->safelen;
+       epilogue->dont_vectorize = false;
+-
+-      /* We may need to if-convert epilogue to vectorize it.  */
+-      if (LOOP_VINFO_SCALAR_LOOP (loop_vinfo))
+-	tree_if_conversion (epilogue);
+     }
+ 
+   return epilogue;
+diff --git a/gcc/tree-vect-patterns.c b/gcc/tree-vect-patterns.c
+index badf4e7104e..6356ecd692f 100644
+--- a/gcc/tree-vect-patterns.c
++++ b/gcc/tree-vect-patterns.c
+@@ -46,6 +46,8 @@ along with GCC; see the file COPYING3.  If not see
+ #include "cgraph.h"
+ #include "omp-simd-clone.h"
+ #include "predict.h"
++#include "tree-vector-builder.h"
++#include "vec-perm-indices.h"
+ 
+ /* Return true if we have a useful VR_RANGE range for VAR, storing it
+    in *MIN_VALUE and *MAX_VALUE if so.  Note the range in the dump files.  */
+@@ -185,15 +187,15 @@ vect_get_external_def_edge (vec_info *vinfo, tree var)
+    is nonnull.  */
+ 
+ static bool
+-vect_supportable_direct_optab_p (tree otype, tree_code code,
++vect_supportable_direct_optab_p (vec_info *vinfo, tree otype, tree_code code,
+ 				 tree itype, tree *vecotype_out,
+ 				 tree *vecitype_out = NULL)
+ {
+-  tree vecitype = get_vectype_for_scalar_type (itype);
++  tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
+   if (!vecitype)
+     return false;
+ 
+-  tree vecotype = get_vectype_for_scalar_type (otype);
++  tree vecotype = get_vectype_for_scalar_type (vinfo, otype);
+   if (!vecotype)
+     return false;
+ 
+@@ -632,6 +634,7 @@ static bool
+ vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
+ 		      gimple *stmt1, tree vectype)
+ {
++  vec_info *vinfo = stmt2_info->vinfo;
+   if (is_pattern_stmt_p (stmt2_info))
+     {
+       /* STMT2_INFO is part of a pattern.  Get the statement to which
+@@ -675,7 +678,7 @@ vect_split_statement (stmt_vec_info stmt2_info, tree new_rhs,
+ 	 two-statement pattern now.  */
+       gcc_assert (!STMT_VINFO_RELATED_STMT (stmt2_info));
+       tree lhs_type = TREE_TYPE (gimple_get_lhs (stmt2_info->stmt));
+-      tree lhs_vectype = get_vectype_for_scalar_type (lhs_type);
++      tree lhs_vectype = get_vectype_for_scalar_type (vinfo, lhs_type);
+       if (!lhs_vectype)
+ 	return false;
+ 
+@@ -712,6 +715,8 @@ static tree
+ vect_convert_input (stmt_vec_info stmt_info, tree type,
+ 		    vect_unpromoted_value *unprom, tree vectype)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
++
+   /* Check for a no-op conversion.  */
+   if (types_compatible_p (type, TREE_TYPE (unprom->op)))
+     return unprom->op;
+@@ -749,7 +754,7 @@ vect_convert_input (stmt_vec_info stmt_info, tree type,
+ 	     unsigned promotion.  */
+ 	  tree midtype = build_nonstandard_integer_type
+ 	    (TYPE_PRECISION (type), TYPE_UNSIGNED (unprom->type));
+-	  tree vec_midtype = get_vectype_for_scalar_type (midtype);
++	  tree vec_midtype = get_vectype_for_scalar_type (vinfo, midtype);
+ 	  if (vec_midtype)
+ 	    {
+ 	      input = vect_recog_temp_ssa_var (midtype, NULL);
+@@ -830,17 +835,8 @@ vect_convert_output (stmt_vec_info stmt_info, tree type, gimple *pattern_stmt,
+ /* Return true if STMT_VINFO describes a reduction for which reassociation
+    is allowed.  If STMT_INFO is part of a group, assume that it's part of
+    a reduction chain and optimistically assume that all statements
+-   except the last allow reassociation.  */
+-
+-static bool
+-vect_reassociating_reduction_p (stmt_vec_info stmt_vinfo)
+-{
+-  return (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+-	  ? STMT_VINFO_REDUC_TYPE (stmt_vinfo) != FOLD_LEFT_REDUCTION
+-	  : REDUC_GROUP_FIRST_ELEMENT (stmt_vinfo) != NULL);
+-}
+-
+-/* As above, but also require it to have code CODE and to be a reduction
++   except the last allow reassociation.
++   Also require it to have code CODE and to be a reduction
+    in the outermost loop.  When returning true, store the operands in
+    *OP0_OUT and *OP1_OUT.  */
+ 
+@@ -862,11 +858,19 @@ vect_reassociating_reduction_p (stmt_vec_info stmt_info, tree_code code,
+   if (loop && nested_in_vect_loop_p (loop, stmt_info))
+     return false;
+ 
+-  if (!vect_reassociating_reduction_p (stmt_info))
++  if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
++    {
++      if (needs_fold_left_reduction_p (TREE_TYPE (gimple_assign_lhs (assign)),
++				       code))
++	return false;
++    }
++  else if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) == NULL)
+     return false;
+ 
+   *op0_out = gimple_assign_rhs1 (assign);
+   *op1_out = gimple_assign_rhs2 (assign);
++  if (commutative_tree_code (code) && STMT_VINFO_REDUC_IDX (stmt_info) == 0)
++    std::swap (*op0_out, *op1_out);
+   return true;
+ }
+ 
+@@ -983,7 +987,7 @@ vect_recog_dot_prod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   vect_pattern_detected ("vect_recog_dot_prod_pattern", last_stmt);
+ 
+   tree half_vectype;
+-  if (!vect_supportable_direct_optab_p (type, DOT_PROD_EXPR, half_type,
++  if (!vect_supportable_direct_optab_p (vinfo, type, DOT_PROD_EXPR, half_type,
+ 					type_out, &half_vectype))
+     return NULL;
+ 
+@@ -1141,7 +1145,7 @@ vect_recog_sad_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   vect_pattern_detected ("vect_recog_sad_pattern", last_stmt);
+ 
+   tree half_vectype;
+-  if (!vect_supportable_direct_optab_p (sum_type, SAD_EXPR, half_type,
++  if (!vect_supportable_direct_optab_p (vinfo, sum_type, SAD_EXPR, half_type,
+ 					type_out, &half_vectype))
+     return NULL;
+ 
+@@ -1187,6 +1191,7 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
+ 			     tree_code orig_code, tree_code wide_code,
+ 			     bool shift_p, const char *name)
+ {
++  vec_info *vinfo = last_stmt_info->vinfo;
+   gimple *last_stmt = last_stmt_info->stmt;
+ 
+   vect_unpromoted_value unprom[2];
+@@ -1206,8 +1211,8 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
+ 					    TYPE_UNSIGNED (half_type));
+ 
+   /* Check target support  */
+-  tree vectype = get_vectype_for_scalar_type (half_type);
+-  tree vecitype = get_vectype_for_scalar_type (itype);
++  tree vectype = get_vectype_for_scalar_type (vinfo, half_type);
++  tree vecitype = get_vectype_for_scalar_type (vinfo, itype);
+   enum tree_code dummy_code;
+   int dummy_int;
+   auto_vec<tree> dummy_vec;
+@@ -1219,7 +1224,7 @@ vect_recog_widen_op_pattern (stmt_vec_info last_stmt_info, tree *type_out,
+ 					  &dummy_int, &dummy_vec))
+     return NULL;
+ 
+-  *type_out = get_vectype_for_scalar_type (type);
++  *type_out = get_vectype_for_scalar_type (vinfo, type);
+   if (!*type_out)
+     return NULL;
+ 
+@@ -1271,6 +1276,7 @@ vect_recog_widen_mult_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+ static gimple *
+ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ {
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   gimple *last_stmt = stmt_vinfo->stmt;
+   tree base, exp;
+   gimple *stmt;
+@@ -1339,7 +1345,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 		  if (node->simd_clones == NULL)
+ 		    return NULL;
+ 		}
+-	      *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
++	      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
+ 	      if (!*type_out)
+ 		return NULL;
+ 	      tree def = vect_recog_temp_ssa_var (TREE_TYPE (base), NULL);
+@@ -1364,7 +1370,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       || (TREE_CODE (exp) == REAL_CST
+           && real_equal (&TREE_REAL_CST (exp), &dconst2)))
+     {
+-      if (!vect_supportable_direct_optab_p (TREE_TYPE (base), MULT_EXPR,
++      if (!vect_supportable_direct_optab_p (vinfo, TREE_TYPE (base), MULT_EXPR,
+ 					    TREE_TYPE (base), type_out))
+ 	return NULL;
+ 
+@@ -1377,7 +1383,7 @@ vect_recog_pow_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   if (TREE_CODE (exp) == REAL_CST
+       && real_equal (&TREE_REAL_CST (exp), &dconsthalf))
+     {
+-      *type_out = get_vectype_for_scalar_type (TREE_TYPE (base));
++      *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (base));
+       if (*type_out
+ 	  && direct_internal_fn_supported_p (IFN_SQRT, *type_out,
+ 					     OPTIMIZE_FOR_SPEED))
+@@ -1470,8 +1476,8 @@ vect_recog_widen_sum_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 
+   vect_pattern_detected ("vect_recog_widen_sum_pattern", last_stmt);
+ 
+-  if (!vect_supportable_direct_optab_p (type, WIDEN_SUM_EXPR, unprom0.type,
+-					type_out))
++  if (!vect_supportable_direct_optab_p (vinfo, type, WIDEN_SUM_EXPR,
++					unprom0.type, type_out))
+     return NULL;
+ 
+   var = vect_recog_temp_ssa_var (type, NULL);
+@@ -1662,7 +1668,7 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+ 
+   vect_pattern_detected ("vect_recog_over_widening_pattern", last_stmt);
+ 
+-  *type_out = get_vectype_for_scalar_type (type);
++  *type_out = get_vectype_for_scalar_type (vinfo, type);
+   if (!*type_out)
+     return NULL;
+ 
+@@ -1683,8 +1689,8 @@ vect_recog_over_widening_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+      wants to rewrite anyway.  If targets have a minimum element size
+      for some optabs, we should pattern-match smaller ops to larger ops
+      where beneficial.  */
+-  tree new_vectype = get_vectype_for_scalar_type (new_type);
+-  tree op_vectype = get_vectype_for_scalar_type (op_type);
++  tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
++  tree op_vectype = get_vectype_for_scalar_type (vinfo, op_type);
+   if (!new_vectype || !op_vectype)
+     return NULL;
+ 
+@@ -1842,7 +1848,7 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+ 					       TYPE_UNSIGNED (new_type));
+ 
+   /* Check for target support.  */
+-  tree new_vectype = get_vectype_for_scalar_type (new_type);
++  tree new_vectype = get_vectype_for_scalar_type (vinfo, new_type);
+   if (!new_vectype
+       || !direct_internal_fn_supported_p (ifn, new_vectype,
+ 					  OPTIMIZE_FOR_SPEED))
+@@ -1850,7 +1856,7 @@ vect_recog_average_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+ 
+   /* The IR requires a valid vector type for the cast result, even though
+      it's likely to be discarded.  */
+-  *type_out = get_vectype_for_scalar_type (type);
++  *type_out = get_vectype_for_scalar_type (vinfo, type);
+   if (!*type_out)
+     return NULL;
+ 
+@@ -1936,7 +1942,7 @@ vect_recog_cast_forwprop_pattern (stmt_vec_info last_stmt_info, tree *type_out)
+      the unnecessary widening and narrowing.  */
+   vect_pattern_detected ("vect_recog_cast_forwprop_pattern", last_stmt);
+ 
+-  *type_out = get_vectype_for_scalar_type (lhs_type);
++  *type_out = get_vectype_for_scalar_type (vinfo, lhs_type);
+   if (!*type_out)
+     return NULL;
+ 
+@@ -1996,24 +2002,107 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   enum vect_def_type dt;
+   optab optab1, optab2;
+   edge ext_def = NULL;
++  bool bswap16_p = false;
+ 
+-  if (!is_gimple_assign (last_stmt))
+-    return NULL;
++  if (is_gimple_assign (last_stmt))
++    {
++      rhs_code = gimple_assign_rhs_code (last_stmt);
++      switch (rhs_code)
++	{
++	case LROTATE_EXPR:
++	case RROTATE_EXPR:
++	  break;
++	default:
++	  return NULL;
++	}
+ 
+-  rhs_code = gimple_assign_rhs_code (last_stmt);
+-  switch (rhs_code)
++      lhs = gimple_assign_lhs (last_stmt);
++      oprnd0 = gimple_assign_rhs1 (last_stmt);
++      type = TREE_TYPE (oprnd0);
++      oprnd1 = gimple_assign_rhs2 (last_stmt);
++    }
++  else if (gimple_call_builtin_p (last_stmt, BUILT_IN_BSWAP16))
+     {
+-    case LROTATE_EXPR:
+-    case RROTATE_EXPR:
+-      break;
+-    default:
+-      return NULL;
++      /* __builtin_bswap16 (x) is another form of x r>> 8.
++	 The vectorizer has bswap support, but only if the argument isn't
++	 promoted.  */
++      lhs = gimple_call_lhs (last_stmt);
++      oprnd0 = gimple_call_arg (last_stmt, 0);
++      type = TREE_TYPE (oprnd0);
++      if (TYPE_PRECISION (TREE_TYPE (lhs)) != 16
++	  || TYPE_PRECISION (type) <= 16
++	  || TREE_CODE (oprnd0) != SSA_NAME
++	  || BITS_PER_UNIT != 8
++	  || !TYPE_UNSIGNED (TREE_TYPE (lhs)))
++	return NULL;
++
++      stmt_vec_info def_stmt_info;
++      if (!vect_is_simple_use (oprnd0, vinfo, &dt, &def_stmt_info, &def_stmt))
++	return NULL;
++
++      if (dt != vect_internal_def)
++	return NULL;
++
++      if (gimple_assign_cast_p (def_stmt))
++	{
++	  def = gimple_assign_rhs1 (def_stmt);
++	  if (INTEGRAL_TYPE_P (TREE_TYPE (def))
++	      && TYPE_PRECISION (TREE_TYPE (def)) == 16)
++	    oprnd0 = def;
++	}
++
++      type = TREE_TYPE (lhs);
++      vectype = get_vectype_for_scalar_type (vinfo, type);
++      if (vectype == NULL_TREE)
++	return NULL;
++
++      if (tree char_vectype = get_same_sized_vectype (char_type_node, vectype))
++	{
++	  /* The encoding uses one stepped pattern for each byte in the
++	     16-bit word.  */
++	  vec_perm_builder elts (TYPE_VECTOR_SUBPARTS (char_vectype), 2, 3);
++	  for (unsigned i = 0; i < 3; ++i)
++	    for (unsigned j = 0; j < 2; ++j)
++	      elts.quick_push ((i + 1) * 2 - j - 1);
++
++	  vec_perm_indices indices (elts, 1,
++				    TYPE_VECTOR_SUBPARTS (char_vectype));
++	  if (can_vec_perm_const_p (TYPE_MODE (char_vectype), indices))
++	    {
++	      /* vectorizable_bswap can handle the __builtin_bswap16 if we
++		 undo the argument promotion.  */
++	      if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
++		{
++		  def = vect_recog_temp_ssa_var (type, NULL);
++		  def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
++		  append_pattern_def_seq (stmt_vinfo, def_stmt);
++		  oprnd0 = def;
++		}
++
++	      /* Pattern detected.  */
++	      vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
++
++	      *type_out = vectype;
++
++	      /* Pattern supported.  Create a stmt to be used to replace the
++		 pattern, with the unpromoted argument.  */
++	      var = vect_recog_temp_ssa_var (type, NULL);
++	      pattern_stmt = gimple_build_call (gimple_call_fndecl (last_stmt),
++						1, oprnd0);
++	      gimple_call_set_lhs (pattern_stmt, var);
++	      gimple_call_set_fntype (as_a <gcall *> (pattern_stmt),
++				      gimple_call_fntype (last_stmt));
++	      return pattern_stmt;
++	    }
++	}
++
++      oprnd1 = build_int_cst (integer_type_node, 8);
++      rhs_code = LROTATE_EXPR;
++      bswap16_p = true;
+     }
++  else
++    return NULL;
+ 
+-  lhs = gimple_assign_lhs (last_stmt);
+-  oprnd0 = gimple_assign_rhs1 (last_stmt);
+-  type = TREE_TYPE (oprnd0);
+-  oprnd1 = gimple_assign_rhs2 (last_stmt);
+   if (TREE_CODE (oprnd0) != SSA_NAME
+       || TYPE_PRECISION (TREE_TYPE (lhs)) != TYPE_PRECISION (type)
+       || !INTEGRAL_TYPE_P (type)
+@@ -2029,7 +2118,7 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       && dt != vect_external_def)
+     return NULL;
+ 
+-  vectype = get_vectype_for_scalar_type (type);
++  vectype = get_vectype_for_scalar_type (vinfo, type);
+   if (vectype == NULL_TREE)
+     return NULL;
+ 
+@@ -2038,14 +2127,39 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   optab1 = optab_for_tree_code (rhs_code, vectype, optab_vector);
+   if (optab1
+       && optab_handler (optab1, TYPE_MODE (vectype)) != CODE_FOR_nothing)
+-    return NULL;
++    {
++     use_rotate:
++      if (bswap16_p)
++	{
++	  if (!useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
++	    {
++	      def = vect_recog_temp_ssa_var (type, NULL);
++	      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
++	      append_pattern_def_seq (stmt_vinfo, def_stmt);
++	      oprnd0 = def;
++	    }
++
++	  /* Pattern detected.  */
++	  vect_pattern_detected ("vect_recog_rotate_pattern", last_stmt);
++
++	  *type_out = vectype;
++
++	  /* Pattern supported.  Create a stmt to be used to replace the
++	     pattern.  */
++	  var = vect_recog_temp_ssa_var (type, NULL);
++	  pattern_stmt = gimple_build_assign (var, LROTATE_EXPR, oprnd0,
++					      oprnd1);
++	  return pattern_stmt;
++	}
++      return NULL;
++    }
+ 
+   if (is_a <bb_vec_info> (vinfo) || dt != vect_internal_def)
+     {
+       optab2 = optab_for_tree_code (rhs_code, vectype, optab_scalar);
+       if (optab2
+ 	  && optab_handler (optab2, TYPE_MODE (vectype)) != CODE_FOR_nothing)
+-	return NULL;
++	goto use_rotate;
+     }
+ 
+   /* If vector/vector or vector/scalar shifts aren't supported by the target,
+@@ -2070,6 +2184,14 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 
+   *type_out = vectype;
+ 
++  if (bswap16_p && !useless_type_conversion_p (type, TREE_TYPE (oprnd0)))
++    {
++      def = vect_recog_temp_ssa_var (type, NULL);
++      def_stmt = gimple_build_assign (def, NOP_EXPR, oprnd0);
++      append_pattern_def_seq (stmt_vinfo, def_stmt);
++      oprnd0 = def;
++    }
++
+   if (dt == vect_external_def && TREE_CODE (oprnd1) == SSA_NAME)
+     ext_def = vect_get_external_def_edge (vinfo, oprnd1);
+ 
+@@ -2106,7 +2228,7 @@ vect_recog_rotate_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+     }
+   else
+     {
+-      tree vecstype = get_vectype_for_scalar_type (stype);
++      tree vecstype = get_vectype_for_scalar_type (vinfo, stype);
+ 
+       if (vecstype == NULL_TREE)
+ 	return NULL;
+@@ -2235,7 +2357,7 @@ vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
+   if (!def_vinfo)
+     return NULL;
+ 
+-  *type_out = get_vectype_for_scalar_type (TREE_TYPE (oprnd0));
++  *type_out = get_vectype_for_scalar_type (vinfo, TREE_TYPE (oprnd0));
+   if (*type_out == NULL_TREE)
+     return NULL;
+ 
+@@ -2258,7 +2380,8 @@ vect_recog_vector_vector_shift_pattern (stmt_vec_info stmt_vinfo,
+ 				       TYPE_PRECISION (TREE_TYPE (oprnd1)));
+ 	      def = vect_recog_temp_ssa_var (TREE_TYPE (rhs1), NULL);
+ 	      def_stmt = gimple_build_assign (def, BIT_AND_EXPR, rhs1, mask);
+-	      tree vecstype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
++	      tree vecstype = get_vectype_for_scalar_type (vinfo,
++							   TREE_TYPE (rhs1));
+ 	      append_pattern_def_seq (stmt_vinfo, def_stmt, vecstype);
+ 	    }
+ 	}
+@@ -2423,6 +2546,7 @@ static gimple *
+ vect_synth_mult_by_constant (tree op, tree val,
+ 			     stmt_vec_info stmt_vinfo)
+ {
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   tree itype = TREE_TYPE (op);
+   machine_mode mode = TYPE_MODE (itype);
+   struct algorithm alg;
+@@ -2441,7 +2565,7 @@ vect_synth_mult_by_constant (tree op, tree val,
+ 
+   /* Targets that don't support vector shifts but support vector additions
+      can synthesize shifts that way.  */
+-  bool synth_shift_p = !vect_supportable_shift (LSHIFT_EXPR, multtype);
++  bool synth_shift_p = !vect_supportable_shift (vinfo, LSHIFT_EXPR, multtype);
+ 
+   HOST_WIDE_INT hwval = tree_to_shwi (val);
+   /* Use MAX_COST here as we don't want to limit the sequence on rtx costs.
+@@ -2452,7 +2576,7 @@ vect_synth_mult_by_constant (tree op, tree val,
+   if (!possible)
+     return NULL;
+ 
+-  tree vectype = get_vectype_for_scalar_type (multtype);
++  tree vectype = get_vectype_for_scalar_type (vinfo, multtype);
+ 
+   if (!vectype
+       || !target_supports_mult_synth_alg (&alg, variant,
+@@ -2598,6 +2722,7 @@ vect_synth_mult_by_constant (tree op, tree val,
+ static gimple *
+ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ {
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   gimple *last_stmt = stmt_vinfo->stmt;
+   tree oprnd0, oprnd1, vectype, itype;
+   gimple *pattern_stmt;
+@@ -2618,7 +2743,7 @@ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       || !type_has_mode_precision_p (itype))
+     return NULL;
+ 
+-  vectype = get_vectype_for_scalar_type (itype);
++  vectype = get_vectype_for_scalar_type (vinfo, itype);
+   if (vectype == NULL_TREE)
+     return NULL;
+ 
+@@ -2686,6 +2811,7 @@ vect_recog_mult_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ static gimple *
+ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ {
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   gimple *last_stmt = stmt_vinfo->stmt;
+   tree oprnd0, oprnd1, vectype, itype, cond;
+   gimple *pattern_stmt, *def_stmt;
+@@ -2718,7 +2844,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+     return NULL;
+ 
+   scalar_int_mode itype_mode = SCALAR_INT_TYPE_MODE (itype);
+-  vectype = get_vectype_for_scalar_type (itype);
++  vectype = get_vectype_for_scalar_type (vinfo, itype);
+   if (vectype == NULL_TREE)
+     return NULL;
+ 
+@@ -2785,7 +2911,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 	    {
+ 	      tree utype
+ 		= build_nonstandard_integer_type (prec, 1);
+-	      tree vecutype = get_vectype_for_scalar_type (utype);
++	      tree vecutype = get_vectype_for_scalar_type (vinfo, utype);
+ 	      tree shift
+ 		= build_int_cst (utype, GET_MODE_BITSIZE (itype_mode)
+ 					- tree_log2 (oprnd1));
+@@ -3104,6 +3230,7 @@ vect_recog_divmod_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ static gimple *
+ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ {
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   gimple *last_stmt = stmt_vinfo->stmt;
+   tree cond_expr, then_clause, else_clause;
+   tree type, vectype, comp_vectype, itype = NULL_TREE, vecitype;
+@@ -3126,7 +3253,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+     return NULL;
+ 
+   comp_scalar_type = TREE_TYPE (TREE_OPERAND (cond_expr, 0));
+-  comp_vectype = get_vectype_for_scalar_type (comp_scalar_type);
++  comp_vectype = get_vectype_for_scalar_type (vinfo, comp_scalar_type);
+   if (comp_vectype == NULL_TREE)
+     return NULL;
+ 
+@@ -3174,7 +3301,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   if (GET_MODE_BITSIZE (type_mode) == cmp_mode_size)
+     return NULL;
+ 
+-  vectype = get_vectype_for_scalar_type (type);
++  vectype = get_vectype_for_scalar_type (vinfo, type);
+   if (vectype == NULL_TREE)
+     return NULL;
+ 
+@@ -3189,7 +3316,7 @@ vect_recog_mixed_size_cond_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       || GET_MODE_BITSIZE (SCALAR_TYPE_MODE (itype)) != cmp_mode_size)
+     return NULL;
+ 
+-  vecitype = get_vectype_for_scalar_type (itype);
++  vecitype = get_vectype_for_scalar_type (vinfo, itype);
+   if (vecitype == NULL_TREE)
+     return NULL;
+ 
+@@ -3283,11 +3410,12 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
+ 	  if (stmt_could_throw_p (cfun, def_stmt))
+ 	    return false;
+ 
+-	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
++	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+ 	  if (comp_vectype == NULL_TREE)
+ 	    return false;
+ 
+-	  tree mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
++	  tree mask_type = get_mask_type_for_scalar_type (vinfo,
++							  TREE_TYPE (rhs1));
+ 	  if (mask_type
+ 	      && expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
+ 	    return false;
+@@ -3297,7 +3425,7 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
+ 	      scalar_mode mode = SCALAR_TYPE_MODE (TREE_TYPE (rhs1));
+ 	      tree itype
+ 		= build_nonstandard_integer_type (GET_MODE_BITSIZE (mode), 1);
+-	      vecitype = get_vectype_for_scalar_type (itype);
++	      vecitype = get_vectype_for_scalar_type (vinfo, itype);
+ 	      if (vecitype == NULL_TREE)
+ 		return false;
+ 	    }
+@@ -3326,10 +3454,11 @@ check_bool_pattern (tree var, vec_info *vinfo, hash_set<gimple *> &stmts)
+ static tree
+ adjust_bool_pattern_cast (tree type, tree var, stmt_vec_info stmt_info)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   gimple *cast_stmt = gimple_build_assign (vect_recog_temp_ssa_var (type, NULL),
+ 					   NOP_EXPR, var);
+   append_pattern_def_seq (stmt_info, cast_stmt,
+-			  get_vectype_for_scalar_type (type));
++			  get_vectype_for_scalar_type (vinfo, type));
+   return gimple_assign_lhs (cast_stmt);
+ }
+ 
+@@ -3343,6 +3472,7 @@ static void
+ adjust_bool_pattern (tree var, tree out_type,
+ 		     stmt_vec_info stmt_info, hash_map <tree, tree> &defs)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   gimple *stmt = SSA_NAME_DEF_STMT (var);
+   enum tree_code rhs_code, def_rhs_code;
+   tree itype, cond_expr, rhs1, rhs2, irhs1, irhs2;
+@@ -3504,7 +3634,7 @@ adjust_bool_pattern (tree var, tree out_type,
+ 
+   gimple_set_location (pattern_stmt, loc);
+   append_pattern_def_seq (stmt_info, pattern_stmt,
+-			  get_vectype_for_scalar_type (itype));
++			  get_vectype_for_scalar_type (vinfo, itype));
+   defs.put (var, gimple_assign_lhs (pattern_stmt));
+ }
+ 
+@@ -3607,14 +3737,14 @@ search_type_for_mask_1 (tree var, vec_info *vinfo,
+ 	      break;
+ 	    }
+ 
+-	  comp_vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
++	  comp_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+ 	  if (comp_vectype == NULL_TREE)
+ 	    {
+ 	      res = NULL_TREE;
+ 	      break;
+ 	    }
+ 
+-	  mask_type = get_mask_type_for_scalar_type (TREE_TYPE (rhs1));
++	  mask_type = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+ 	  if (!mask_type
+ 	      || !expand_vec_cmp_expr_p (comp_vectype, mask_type, rhs_code))
+ 	    {
+@@ -3722,7 +3852,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       if (! INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ 	  || TYPE_PRECISION (TREE_TYPE (lhs)) == 1)
+ 	return NULL;
+-      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
++      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
+       if (vectype == NULL_TREE)
+ 	return NULL;
+ 
+@@ -3759,7 +3889,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 
+ 	  if (!useless_type_conversion_p (type, TREE_TYPE (lhs)))
+ 	    {
+-	      tree new_vectype = get_vectype_for_scalar_type (type);
++	      tree new_vectype = get_vectype_for_scalar_type (vinfo, type);
+ 	      append_pattern_def_seq (stmt_vinfo, pattern_stmt, new_vectype);
+ 
+ 	      lhs = vect_recog_temp_ssa_var (TREE_TYPE (lhs), NULL);
+@@ -3775,7 +3905,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   else if (rhs_code == COND_EXPR
+ 	   && TREE_CODE (var) == SSA_NAME)
+     {
+-      vectype = get_vectype_for_scalar_type (TREE_TYPE (lhs));
++      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
+       if (vectype == NULL_TREE)
+ 	return NULL;
+ 
+@@ -3789,7 +3919,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       tree type
+ 	= build_nonstandard_integer_type (prec,
+ 					  TYPE_UNSIGNED (TREE_TYPE (var)));
+-      if (get_vectype_for_scalar_type (type) == NULL_TREE)
++      if (get_vectype_for_scalar_type (vinfo, type) == NULL_TREE)
+ 	return NULL;
+ 
+       if (!check_bool_pattern (var, vinfo, bool_stmts))
+@@ -3833,7 +3963,7 @@ vect_recog_bool_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 
+ 	  cst0 = build_int_cst (type, 0);
+ 	  cst1 = build_int_cst (type, 1);
+-	  new_vectype = get_vectype_for_scalar_type (type);
++	  new_vectype = get_vectype_for_scalar_type (vinfo, type);
+ 
+ 	  rhs = vect_recog_temp_ssa_var (type, NULL);
+ 	  pattern_stmt = gimple_build_assign (rhs, COND_EXPR, var, cst1, cst0);
+@@ -3874,7 +4004,7 @@ build_mask_conversion (tree mask, tree vectype, stmt_vec_info stmt_vinfo)
+   gimple *stmt;
+   tree masktype, tmp;
+ 
+-  masktype = build_same_sized_truth_vector_type (vectype);
++  masktype = truth_type_for (vectype);
+   tmp = vect_recog_temp_ssa_var (TREE_TYPE (masktype), NULL);
+   stmt = gimple_build_assign (tmp, CONVERT_EXPR, mask);
+   append_pattern_def_seq (stmt_vinfo, stmt, masktype);
+@@ -3934,19 +4064,19 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 	{
+ 	  int rhs_index = internal_fn_stored_value_index (ifn);
+ 	  tree rhs = gimple_call_arg (last_stmt, rhs_index);
+-	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (rhs));
++	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs));
+ 	}
+       else
+ 	{
+ 	  lhs = gimple_call_lhs (last_stmt);
+-	  vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
++	  vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
+ 	}
+ 
+       tree mask_arg = gimple_call_arg (last_stmt, mask_argno);
+       tree mask_arg_type = search_type_for_mask (mask_arg, vinfo);
+       if (!mask_arg_type)
+ 	return NULL;
+-      vectype2 = get_mask_type_for_scalar_type (mask_arg_type);
++      vectype2 = get_mask_type_for_scalar_type (vinfo, mask_arg_type);
+ 
+       if (!vectype1 || !vectype2
+ 	  || known_eq (TYPE_VECTOR_SUBPARTS (vectype1),
+@@ -3992,7 +4122,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+   /* Check for cond expression requiring mask conversion.  */
+   if (rhs_code == COND_EXPR)
+     {
+-      vectype1 = get_vectype_for_scalar_type (TREE_TYPE (lhs));
++      vectype1 = get_vectype_for_scalar_type (vinfo, TREE_TYPE (lhs));
+ 
+       if (TREE_CODE (rhs1) == SSA_NAME)
+ 	{
+@@ -4023,7 +4153,7 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+       else
+ 	return NULL;
+ 
+-      vectype2 = get_mask_type_for_scalar_type (rhs1_type);
++      vectype2 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
+ 
+       if (!vectype1 || !vectype2)
+ 	return NULL;
+@@ -4058,7 +4188,8 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 	      tree wide_scalar_type = build_nonstandard_integer_type
+ 		(tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype1))),
+ 		 TYPE_UNSIGNED (rhs1_type));
+-	      tree vectype3 = get_vectype_for_scalar_type (wide_scalar_type);
++	      tree vectype3 = get_vectype_for_scalar_type (vinfo,
++							   wide_scalar_type);
+ 	      if (expand_vec_cond_expr_p (vectype1, vectype3, TREE_CODE (rhs1)))
+ 		return NULL;
+ 	    }
+@@ -4113,14 +4244,14 @@ vect_recog_mask_conversion_pattern (stmt_vec_info stmt_vinfo, tree *type_out)
+ 
+   if (TYPE_PRECISION (rhs1_type) < TYPE_PRECISION (rhs2_type))
+     {
+-      vectype1 = get_mask_type_for_scalar_type (rhs1_type);
++      vectype1 = get_mask_type_for_scalar_type (vinfo, rhs1_type);
+       if (!vectype1)
+ 	return NULL;
+       rhs2 = build_mask_conversion (rhs2, vectype1, stmt_vinfo);
+     }
+   else
+     {
+-      vectype1 = get_mask_type_for_scalar_type (rhs2_type);
++      vectype1 = get_mask_type_for_scalar_type (vinfo, rhs2_type);
+       if (!vectype1)
+ 	return NULL;
+       rhs1 = build_mask_conversion (rhs1, vectype1, stmt_vinfo);
+@@ -4191,7 +4322,7 @@ vect_convert_mask_for_vectype (tree mask, tree vectype,
+   tree mask_type = search_type_for_mask (mask, vinfo);
+   if (mask_type)
+     {
+-      tree mask_vectype = get_mask_type_for_scalar_type (mask_type);
++      tree mask_vectype = get_mask_type_for_scalar_type (vinfo, mask_type);
+       if (mask_vectype
+ 	  && maybe_ne (TYPE_VECTOR_SUBPARTS (vectype),
+ 		       TYPE_VECTOR_SUBPARTS (mask_vectype)))
+@@ -4214,10 +4345,11 @@ vect_add_conversion_to_pattern (tree type, tree value, stmt_vec_info stmt_info)
+   if (useless_type_conversion_p (type, TREE_TYPE (value)))
+     return value;
+ 
++  vec_info *vinfo = stmt_info->vinfo;
+   tree new_value = vect_recog_temp_ssa_var (type, NULL);
+   gassign *conversion = gimple_build_assign (new_value, CONVERT_EXPR, value);
+   append_pattern_def_seq (stmt_info, conversion,
+-			  get_vectype_for_scalar_type (type));
++			  get_vectype_for_scalar_type (vinfo, type));
+   return new_value;
+ }
+ 
+@@ -4253,7 +4385,8 @@ vect_recog_gather_scatter_pattern (stmt_vec_info stmt_info, tree *type_out)
+     return NULL;
+ 
+   /* Convert the mask to the right form.  */
+-  tree gs_vectype = get_vectype_for_scalar_type (gs_info.element_type);
++  tree gs_vectype = get_vectype_for_scalar_type (loop_vinfo,
++						 gs_info.element_type);
+   if (mask)
+     mask = vect_convert_mask_for_vectype (mask, gs_vectype, stmt_info,
+ 					  loop_vinfo);
+@@ -4731,6 +4864,7 @@ static inline void
+ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
+                          tree pattern_vectype)
+ {
++  stmt_vec_info orig_stmt_info_saved = orig_stmt_info;
+   gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (orig_stmt_info);
+ 
+   gimple *orig_pattern_stmt = NULL;
+@@ -4765,6 +4899,9 @@ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
+     for (gimple_stmt_iterator si = gsi_start (def_seq);
+ 	 !gsi_end_p (si); gsi_next (&si))
+       {
++	if (dump_enabled_p ())
++	  dump_printf_loc (MSG_NOTE, vect_location,
++			   "extra pattern stmt: %G", gsi_stmt (si));
+ 	stmt_vec_info pattern_stmt_info
+ 	  = vect_init_pattern_stmt (gsi_stmt (si),
+ 				    orig_stmt_info, pattern_vectype);
+@@ -4790,6 +4927,60 @@ vect_mark_pattern_stmts (stmt_vec_info orig_stmt_info, gimple *pattern_stmt,
+     }
+   else
+     vect_set_pattern_stmt (pattern_stmt, orig_stmt_info, pattern_vectype);
++
++  /* Transfer reduction path info to the pattern.  */
++  if (STMT_VINFO_REDUC_IDX (orig_stmt_info_saved) != -1)
++    {
++      vec_info *vinfo = orig_stmt_info_saved->vinfo;
++      tree lookfor = gimple_op (orig_stmt_info_saved->stmt,
++				1 + STMT_VINFO_REDUC_IDX (orig_stmt_info));
++      /* Search the pattern def sequence and the main pattern stmt.  Note
++         we may have inserted all into a containing pattern def sequence
++	 so the following is a bit awkward.  */
++      gimple_stmt_iterator si;
++      gimple *s;
++      if (def_seq)
++	{
++	  si = gsi_start (def_seq);
++	  s = gsi_stmt (si);
++	  gsi_next (&si);
++	}
++      else
++	{
++	  si = gsi_none ();
++	  s = pattern_stmt;
++	}
++      do
++	{
++	  bool found = false;
++	  for (unsigned i = 1; i < gimple_num_ops (s); ++i)
++	    if (gimple_op (s, i) == lookfor)
++	      {
++		STMT_VINFO_REDUC_IDX (vinfo->lookup_stmt (s)) = i - 1;
++		lookfor = gimple_get_lhs (s);
++		found = true;
++		break;
++	      }
++	  if (s == pattern_stmt)
++	    {
++	      if (!found && dump_enabled_p ())
++		dump_printf_loc (MSG_NOTE, vect_location,
++				 "failed to update reduction index.\n");
++	      break;
++	    }
++	  if (gsi_end_p (si))
++	    s = pattern_stmt;
++	  else
++	    {
++	      s = gsi_stmt (si);
++	      if (s == pattern_stmt)
++		/* Found the end inside a bigger pattern def seq.  */
++		si = gsi_none ();
++	      else
++		gsi_next (&si);
++	    }
++	} while (1);
++    }
+ }
+ 
+ /* Function vect_pattern_recog_1
+diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
+index 2abf480270c..0bef35782b5 100644
+--- a/gcc/tree-vect-slp.c
++++ b/gcc/tree-vect-slp.c
+@@ -79,6 +79,7 @@ vect_free_slp_tree (slp_tree node, bool final_p)
+ 
+   SLP_TREE_CHILDREN (node).release ();
+   SLP_TREE_SCALAR_STMTS (node).release ();
++  SLP_TREE_SCALAR_OPS (node).release ();
+   SLP_TREE_VEC_STMTS (node).release ();
+   SLP_TREE_LOAD_PERMUTATION (node).release ();
+ 
+@@ -122,6 +123,7 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
+ 
+   node = XNEW (struct _slp_tree);
+   SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
++  SLP_TREE_SCALAR_OPS (node) = vNULL;
+   SLP_TREE_VEC_STMTS (node).create (0);
+   SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
+   SLP_TREE_CHILDREN (node).create (nops);
+@@ -138,6 +140,28 @@ vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
+   return node;
+ }
+ 
++/* Create an SLP node for OPS.  */
++
++static slp_tree
++vect_create_new_slp_node (vec<tree> ops)
++{
++  slp_tree node;
++
++  node = XNEW (struct _slp_tree);
++  SLP_TREE_SCALAR_STMTS (node) = vNULL;
++  SLP_TREE_SCALAR_OPS (node) = ops;
++  SLP_TREE_VEC_STMTS (node).create (0);
++  SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
++  SLP_TREE_CHILDREN (node) = vNULL;
++  SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
++  SLP_TREE_TWO_OPERATORS (node) = false;
++  SLP_TREE_DEF_TYPE (node) = vect_external_def;
++  node->refcnt = 1;
++  node->max_nunits = 1;
++
++  return node;
++}
++
+ 
+ /* This structure is used in creation of an SLP tree.  Each instance
+    corresponds to the same operand in a group of scalar stmts in an SLP
+@@ -146,6 +170,8 @@ typedef struct _slp_oprnd_info
+ {
+   /* Def-stmts for the operands.  */
+   vec<stmt_vec_info> def_stmts;
++  /* Operands.  */
++  vec<tree> ops;
+   /* Information about the first statement, its vector def-type, type, the
+      operand itself in case it's constant, and an indication if it's a pattern
+      stmt.  */
+@@ -169,6 +195,7 @@ vect_create_oprnd_info (int nops, int group_size)
+     {
+       oprnd_info = XNEW (struct _slp_oprnd_info);
+       oprnd_info->def_stmts.create (group_size);
++      oprnd_info->ops.create (group_size);
+       oprnd_info->first_dt = vect_uninitialized_def;
+       oprnd_info->first_op_type = NULL_TREE;
+       oprnd_info->any_pattern = false;
+@@ -190,6 +217,7 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
+   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
+     {
+       oprnd_info->def_stmts.release ();
++      oprnd_info->ops.release ();
+       XDELETE (oprnd_info);
+     }
+ 
+@@ -197,6 +225,19 @@ vect_free_oprnd_info (vec<slp_oprnd_info> &oprnds_info)
+ }
+ 
+ 
++/* Return true if STMTS contains a pattern statement.  */
++
++static bool
++vect_contains_pattern_stmt_p (vec<stmt_vec_info> stmts)
++{
++  stmt_vec_info stmt_info;
++  unsigned int i;
++  FOR_EACH_VEC_ELT (stmts, i, stmt_info)
++    if (is_pattern_stmt_p (stmt_info))
++      return true;
++  return false;
++}
++
+ /* Find the place of the data-ref in STMT_INFO in the interleaving chain
+    that starts from FIRST_STMT_INFO.  Return -1 if the data-ref is not a part
+    of the chain.  */
+@@ -231,7 +272,8 @@ vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
+    (if nonnull).  */
+ 
+ bool
+-can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
++can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
++				machine_mode elt_mode,
+ 				unsigned int *nvectors_out,
+ 				tree *vector_type_out,
+ 				tree *permutes)
+@@ -243,7 +285,7 @@ can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
+     {
+       scalar_int_mode int_mode;
+       poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
+-      if (multiple_p (current_vector_size, elt_bytes, &nelts)
++      if (multiple_p (GET_MODE_SIZE (vinfo->vector_mode), elt_bytes, &nelts)
+ 	  && int_mode_for_size (elt_bits, 0).exists (&int_mode))
+ 	{
+ 	  tree int_type = build_nonstandard_integer_type
+@@ -322,6 +364,14 @@ vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
+ 	{
+ 	  internal_fn ifn = gimple_call_internal_fn (stmt);
+ 	  commutative_op = first_commutative_argument (ifn);
++
++	  /* Masked load, only look at mask.  */
++	  if (ifn == IFN_MASK_LOAD)
++	    {
++	      number_of_oprnds = 1;
++	      /* Mask operand index.  */
++	      first_op_idx = 5;
++	    }
+ 	}
+     }
+   else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
+@@ -380,6 +430,13 @@ again:
+ 
+       if (first)
+ 	{
++	  /* For the swapping logic below force vect_reduction_def
++	     for the reduction op in a SLP reduction group.  */
++	  if (!STMT_VINFO_DATA_REF (stmt_info)
++	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
++	      && (int)i == STMT_VINFO_REDUC_IDX (stmt_info)
++	      && def_stmt_info)
++	    dt = vect_reduction_def;
+ 	  oprnd_info->first_dt = dt;
+ 	  oprnd_info->first_op_type = TREE_TYPE (oprnd);
+ 	}
+@@ -389,20 +446,35 @@ again:
+ 	     the def-stmt/s of the first stmt.  Allow different definition
+ 	     types for reduction chains: the first stmt must be a
+ 	     vect_reduction_def (a phi node), and the rest
+-	     vect_internal_def.  */
++	     end in the reduction chain.  */
+ 	  tree type = TREE_TYPE (oprnd);
+ 	  if ((oprnd_info->first_dt != dt
+ 	       && !(oprnd_info->first_dt == vect_reduction_def
+-		    && dt == vect_internal_def)
++		    && !STMT_VINFO_DATA_REF (stmt_info)
++		    && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
++		    && def_stmt_info
++		    && !STMT_VINFO_DATA_REF (def_stmt_info)
++		    && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
++			== REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
+ 	       && !((oprnd_info->first_dt == vect_external_def
+ 		     || oprnd_info->first_dt == vect_constant_def)
+ 		    && (dt == vect_external_def
+ 			|| dt == vect_constant_def)))
+-	      || !types_compatible_p (oprnd_info->first_op_type, type))
++	      || !types_compatible_p (oprnd_info->first_op_type, type)
++	      || (!STMT_VINFO_DATA_REF (stmt_info)
++		  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
++		  && ((!def_stmt_info
++		       || STMT_VINFO_DATA_REF (def_stmt_info)
++		       || (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
++			   != REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
++		      != (oprnd_info->first_dt != vect_reduction_def))))
+ 	    {
+ 	      /* Try swapping operands if we got a mismatch.  */
+ 	      if (i == commutative_op && !swapped)
+ 		{
++		  if (dump_enabled_p ())
++		    dump_printf_loc (MSG_NOTE, vect_location,
++				     "trying swapped operands\n");
+ 		  swapped = true;
+ 		  goto again;
+ 		}
+@@ -415,9 +487,9 @@ again:
+ 	    }
+ 	  if ((dt == vect_constant_def
+ 	       || dt == vect_external_def)
+-	      && !current_vector_size.is_constant ()
++	      && !GET_MODE_SIZE (vinfo->vector_mode).is_constant ()
+ 	      && (TREE_CODE (type) == BOOLEAN_TYPE
+-		  || !can_duplicate_and_interleave_p (stmts.length (),
++		  || !can_duplicate_and_interleave_p (vinfo, stmts.length (),
+ 						      TYPE_MODE (type))))
+ 	    {
+ 	      if (dump_enabled_p ())
+@@ -431,14 +503,37 @@ again:
+       /* Check the types of the definitions.  */
+       switch (dt)
+ 	{
+-	case vect_constant_def:
+ 	case vect_external_def:
++	  /* Make sure to demote the overall operand to external.  */
++	  oprnd_info->first_dt = vect_external_def;
++	  /* Fallthru.  */
++	case vect_constant_def:
++	  oprnd_info->def_stmts.quick_push (NULL);
++	  oprnd_info->ops.quick_push (oprnd);
+ 	  break;
+ 
++	case vect_internal_def:
+ 	case vect_reduction_def:
++	  if (oprnd_info->first_dt == vect_reduction_def
++	      && !STMT_VINFO_DATA_REF (stmt_info)
++	      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
++	      && !STMT_VINFO_DATA_REF (def_stmt_info)
++	      && (REDUC_GROUP_FIRST_ELEMENT (def_stmt_info)
++		  == REDUC_GROUP_FIRST_ELEMENT (stmt_info)))
++	    {
++	      /* For a SLP reduction chain we want to duplicate the
++	         reduction to each of the chain members.  That gets
++		 us a sane SLP graph (still the stmts are not 100%
++		 correct wrt the initial values).  */
++	      gcc_assert (!first);
++	      oprnd_info->def_stmts.quick_push (oprnd_info->def_stmts[0]);
++	      oprnd_info->ops.quick_push (oprnd_info->ops[0]);
++	      break;
++	    }
++	  /* Fallthru.  */
+ 	case vect_induction_def:
+-	case vect_internal_def:
+ 	  oprnd_info->def_stmts.quick_push (def_stmt_info);
++	  oprnd_info->ops.quick_push (oprnd);
+ 	  break;
+ 
+ 	default:
+@@ -468,6 +563,8 @@ again:
+ 
+       if (first_op_cond)
+ 	{
++	  /* To get rid of this swapping we have to move the stmt code
++	     to the SLP tree as well (and gather it here per stmt).  */
+ 	  gassign *stmt = as_a <gassign *> (stmt_info->stmt);
+ 	  tree cond = gimple_assign_rhs1 (stmt);
+ 	  enum tree_code code = TREE_CODE (cond);
+@@ -492,10 +589,8 @@ again:
+ 	}
+       else
+ 	{
+-	  unsigned int op = commutative_op + first_op_idx;
+-	  swap_ssa_operands (stmt_info->stmt,
+-			     gimple_op_ptr (stmt_info->stmt, op),
+-			     gimple_op_ptr (stmt_info->stmt, op + 1));
++	  /* Commutative ops need not reflect swapping, ops are in
++	     the SLP tree.  */
+ 	}
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+@@ -620,7 +715,7 @@ vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
+    is false then this indicates the comparison could not be
+    carried out or the stmts will never be vectorized by SLP.
+ 
+-   Note COND_EXPR is possibly ismorphic to another one after swapping its
++   Note COND_EXPR is possibly isomorphic to another one after swapping its
+    operands.  Set SWAP[i] to 1 if stmt I is COND_EXPR and isomorphic to
+    the first stmt by swapping the two operands of comparison; set SWAP[i]
+    to 2 if stmt I is isormorphic to the first stmt by inverting the code
+@@ -1030,7 +1125,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 		       vec<stmt_vec_info> stmts, unsigned int group_size,
+ 		       poly_uint64 *max_nunits,
+ 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
+-		       unsigned max_tree_size,
+ 		       scalar_stmts_to_slp_tree_map_t *bst_map);
+ 
+ static slp_tree
+@@ -1038,7 +1132,6 @@ vect_build_slp_tree (vec_info *vinfo,
+ 		     vec<stmt_vec_info> stmts, unsigned int group_size,
+ 		     poly_uint64 *max_nunits,
+ 		     bool *matches, unsigned *npermutes, unsigned *tree_size,
+-		     unsigned max_tree_size,
+ 		     scalar_stmts_to_slp_tree_map_t *bst_map)
+ {
+   if (slp_tree *leader = bst_map->get (stmts))
+@@ -1056,8 +1149,7 @@ vect_build_slp_tree (vec_info *vinfo,
+   poly_uint64 this_max_nunits = 1;
+   slp_tree res = vect_build_slp_tree_2 (vinfo, stmts, group_size,
+ 					&this_max_nunits,
+-					matches, npermutes, tree_size,
+-					max_tree_size, bst_map);
++					matches, npermutes, tree_size, bst_map);
+   if (res)
+     {
+       res->max_nunits = this_max_nunits;
+@@ -1081,7 +1173,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 		       vec<stmt_vec_info> stmts, unsigned int group_size,
+ 		       poly_uint64 *max_nunits,
+ 		       bool *matches, unsigned *npermutes, unsigned *tree_size,
+-		       unsigned max_tree_size,
+ 		       scalar_stmts_to_slp_tree_map_t *bst_map)
+ {
+   unsigned nops, i, this_tree_size = 0;
+@@ -1109,7 +1200,7 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+   if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
+     {
+       tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
+-      tree vectype = get_vectype_for_scalar_type (scalar_type);
++      tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+       if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
+ 	return NULL;
+ 
+@@ -1129,18 +1220,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 	  /* Else def types have to match.  */
+ 	  stmt_vec_info other_info;
+ 	  FOR_EACH_VEC_ELT (stmts, i, other_info)
+-	    {
+-	      /* But for reduction chains only check on the first stmt.  */
+-	      if (!STMT_VINFO_DATA_REF (other_info)
+-		  && REDUC_GROUP_FIRST_ELEMENT (other_info)
+-		  && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
+-		continue;
+-	      if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
+-		return NULL;
+-	    }
++	    if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
++	      return NULL;
+ 	}
+       else
+ 	return NULL;
++      (*tree_size)++;
+       node = vect_create_new_slp_node (stmts);
+       return node;
+     }
+@@ -1152,13 +1237,23 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 			      &this_max_nunits, matches, &two_operators))
+     return NULL;
+ 
+-  /* If the SLP node is a load, terminate the recursion.  */
++  /* If the SLP node is a load, terminate the recursion unless masked.  */
+   if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+       && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+     {
+-      *max_nunits = this_max_nunits;
+-      node = vect_create_new_slp_node (stmts);
+-      return node;
++      if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
++	{
++	  /* Masked load.  */
++	  gcc_assert (gimple_call_internal_p (stmt, IFN_MASK_LOAD));
++	  nops = 1;
++	}
++      else
++	{
++	  *max_nunits = this_max_nunits;
++	  (*tree_size)++;
++	  node = vect_create_new_slp_node (stmts);
++	  return node;
++	}
+     }
+ 
+   /* Get at the operands, verifying they are compatible.  */
+@@ -1184,9 +1279,6 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 
+   stmt_info = stmts[0];
+ 
+-  if (tree_size)
+-    max_tree_size -= *tree_size;
+-
+   /* Create SLP_TREE nodes for the definition node/s.  */
+   FOR_EACH_VEC_ELT (oprnds_info, i, oprnd_info)
+     {
+@@ -1194,32 +1286,34 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+       unsigned old_tree_size = this_tree_size;
+       unsigned int j;
+ 
++      if (oprnd_info->first_dt == vect_uninitialized_def)
++	{
++	  /* COND_EXPR have one too many eventually if the condition
++	     is a SSA name.  */
++	  gcc_assert (i == 3 && nops == 4);
++	  continue;
++	}
++
+       if (oprnd_info->first_dt != vect_internal_def
+ 	  && oprnd_info->first_dt != vect_reduction_def
+ 	  && oprnd_info->first_dt != vect_induction_def)
+-        continue;
+-
+-      if (++this_tree_size > max_tree_size)
+ 	{
+-	  if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION,
+-			     vect_location,
+-			     "Build SLP failed: SLP tree too large\n");
+-	  FOR_EACH_VEC_ELT (children, j, child)
+-	    vect_free_slp_tree (child, false);
+-	  vect_free_oprnd_info (oprnds_info);
+-	  return NULL;
++	  slp_tree invnode = vect_create_new_slp_node (oprnd_info->ops);
++	  SLP_TREE_DEF_TYPE (invnode) = oprnd_info->first_dt;
++	  oprnd_info->ops = vNULL;
++	  children.safe_push (invnode);
++	  continue;
+ 	}
+ 
+       if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
+ 					group_size, &this_max_nunits,
+ 					matches, npermutes,
+-					&this_tree_size,
+-					max_tree_size, bst_map)) != NULL)
++					&this_tree_size, bst_map)) != NULL)
+ 	{
+ 	  /* If we have all children of child built up from scalars then just
+ 	     throw that away and build it up this node from scalars.  */
+-	  if (!SLP_TREE_CHILDREN (child).is_empty ()
++	  if (is_a <bb_vec_info> (vinfo)
++	      && !SLP_TREE_CHILDREN (child).is_empty ()
+ 	      /* ???  Rejecting patterns this way doesn't work.  We'd have to
+ 		 do extra work to cancel the pattern so the uses see the
+ 		 scalar version.  */
+@@ -1244,6 +1338,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 				     "scalars instead\n");
+ 		  oprnd_info->def_stmts = vNULL;
+ 		  SLP_TREE_DEF_TYPE (child) = vect_external_def;
++		  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
++		  oprnd_info->ops = vNULL;
++		  ++this_tree_size;
+ 		  children.safe_push (child);
+ 		  continue;
+ 		}
+@@ -1273,9 +1370,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_NOTE, vect_location,
+ 			     "Building vector operands from scalars\n");
++	  this_tree_size++;
+ 	  child = vect_create_new_slp_node (oprnd_info->def_stmts);
+ 	  SLP_TREE_DEF_TYPE (child) = vect_external_def;
++	  SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
+ 	  children.safe_push (child);
++	  oprnd_info->ops = vNULL;
+ 	  oprnd_info->def_stmts = vNULL;
+ 	  continue;
+ 	}
+@@ -1355,6 +1455,8 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 	      {
+ 		std::swap (oprnds_info[0]->def_stmts[j],
+ 			   oprnds_info[1]->def_stmts[j]);
++		std::swap (oprnds_info[0]->ops[j],
++			   oprnds_info[1]->ops[j]);
+ 		if (dump_enabled_p ())
+ 		  dump_printf (MSG_NOTE, "%d ", j);
+ 	      }
+@@ -1365,37 +1467,12 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 	  if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
+ 					    group_size, &this_max_nunits,
+ 					    tem, npermutes,
+-					    &this_tree_size,
+-					    max_tree_size, bst_map)) != NULL)
++					    &this_tree_size, bst_map)) != NULL)
+ 	    {
+-	      /* ... so if successful we can apply the operand swapping
+-		 to the GIMPLE IL.  This is necessary because for example
+-		 vect_get_slp_defs uses operand indexes and thus expects
+-		 canonical operand order.  This is also necessary even
+-		 if we end up building the operand from scalars as
+-		 we'll continue to process swapped operand two.  */
+-	      for (j = 0; j < group_size; ++j)
+-		gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
+-	      for (j = 0; j < group_size; ++j)
+-		if (matches[j] == !swap_not_matching)
+-		  {
+-		    gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
+-		    /* Avoid swapping operands twice.  */
+-		    if (gimple_plf (stmt, GF_PLF_1))
+-		      continue;
+-		    swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
+-				       gimple_assign_rhs2_ptr (stmt));
+-		    gimple_set_plf (stmt, GF_PLF_1, true);
+-		  }
+-	      /* Verify we swap all duplicates or none.  */
+-	      if (flag_checking)
+-		for (j = 0; j < group_size; ++j)
+-		  gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
+-			      == (matches[j] == !swap_not_matching));
+-
+ 	      /* If we have all children of child built up from scalars then
+ 		 just throw that away and build it up this node from scalars.  */
+-	      if (!SLP_TREE_CHILDREN (child).is_empty ()
++	      if (is_a <bb_vec_info> (vinfo)
++		  && !SLP_TREE_CHILDREN (child).is_empty ()
+ 		  /* ???  Rejecting patterns this way doesn't work.  We'd have
+ 		     to do extra work to cancel the pattern so the uses see the
+ 		     scalar version.  */
+@@ -1421,6 +1498,9 @@ vect_build_slp_tree_2 (vec_info *vinfo,
+ 					 "scalars instead\n");
+ 		      oprnd_info->def_stmts = vNULL;
+ 		      SLP_TREE_DEF_TYPE (child) = vect_external_def;
++		      SLP_TREE_SCALAR_OPS (child) = oprnd_info->ops;
++		      oprnd_info->ops = vNULL;
++		      ++this_tree_size;
+ 		      children.safe_push (child);
+ 		      continue;
+ 		    }
+@@ -1444,8 +1524,7 @@ fail:
+ 
+   vect_free_oprnd_info (oprnds_info);
+ 
+-  if (tree_size)
+-    *tree_size += this_tree_size;
++  *tree_size += this_tree_size + 1;
+   *max_nunits = this_max_nunits;
+ 
+   node = vect_create_new_slp_node (stmts);
+@@ -1460,9 +1539,10 @@ static void
+ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
+ 		     slp_tree node, hash_set<slp_tree> &visited)
+ {
+-  int i;
++  unsigned i;
+   stmt_vec_info stmt_info;
+   slp_tree child;
++  tree op;
+ 
+   if (visited.add (node))
+     return;
+@@ -1470,11 +1550,23 @@ vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
+   dump_metadata_t metadata (dump_kind, loc.get_impl_location ());
+   dump_user_location_t user_loc = loc.get_user_location ();
+   dump_printf_loc (metadata, user_loc, "node%s %p (max_nunits=%u)\n",
+-		   SLP_TREE_DEF_TYPE (node) != vect_internal_def
+-		   ? " (external)" : "", node,
++		   SLP_TREE_DEF_TYPE (node) == vect_external_def
++		   ? " (external)"
++		   : (SLP_TREE_DEF_TYPE (node) == vect_constant_def
++		      ? " (constant)"
++		      : ""), node,
+ 		   estimated_poly_value (node->max_nunits));
+-  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
+-    dump_printf_loc (metadata, user_loc, "\tstmt %d %G", i, stmt_info->stmt);
++  if (SLP_TREE_SCALAR_STMTS (node).exists ())
++    FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
++      dump_printf_loc (metadata, user_loc, "\tstmt %u %G", i, stmt_info->stmt);
++  else
++    {
++      dump_printf_loc (metadata, user_loc, "\t{ ");
++      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
++	dump_printf (metadata, "%T%s ", op,
++		     i < SLP_TREE_SCALAR_OPS (node).length () - 1 ? "," : "");
++      dump_printf (metadata, "}\n");
++    }
+   if (SLP_TREE_CHILDREN (node).is_empty ())
+     return;
+   dump_printf_loc (metadata, user_loc, "\tchildren");
+@@ -1563,8 +1655,6 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
+                           vec<unsigned> permutation,
+ 			  hash_set<slp_tree> &visited)
+ {
+-  stmt_vec_info stmt_info;
+-  vec<stmt_vec_info> tmp_stmts;
+   unsigned int i;
+   slp_tree child;
+ 
+@@ -1574,15 +1664,30 @@ vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+     vect_slp_rearrange_stmts (child, group_size, permutation, visited);
+ 
+-  gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
+-  tmp_stmts.create (group_size);
+-  tmp_stmts.quick_grow_cleared (group_size);
+-
+-  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
+-    tmp_stmts[permutation[i]] = stmt_info;
+-
+-  SLP_TREE_SCALAR_STMTS (node).release ();
+-  SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
++  if (SLP_TREE_SCALAR_STMTS (node).exists ())
++    {
++      gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
++      vec<stmt_vec_info> tmp_stmts;
++      tmp_stmts.create (group_size);
++      tmp_stmts.quick_grow (group_size);
++      stmt_vec_info stmt_info;
++      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
++	tmp_stmts[permutation[i]] = stmt_info;
++      SLP_TREE_SCALAR_STMTS (node).release ();
++      SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
++    }
++  if (SLP_TREE_SCALAR_OPS (node).exists ())
++    {
++      gcc_assert (group_size == SLP_TREE_SCALAR_OPS (node).length ());
++      vec<tree> tmp_ops;
++      tmp_ops.create (group_size);
++      tmp_ops.quick_grow (group_size);
++      tree op;
++      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
++	tmp_ops[permutation[i]] = op;
++      SLP_TREE_SCALAR_OPS (node).release ();
++      SLP_TREE_SCALAR_OPS (node) = tmp_ops;
++    }
+ }
+ 
+ 
+@@ -1668,9 +1773,10 @@ vect_gather_slp_loads (slp_instance inst, slp_tree node,
+ 
+   if (SLP_TREE_CHILDREN (node).length () == 0)
+     {
++      if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
++	return;
+       stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
+-      if (SLP_TREE_DEF_TYPE (node) == vect_internal_def
+-	  && STMT_VINFO_GROUPED_ACCESS (stmt_info)
++      if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
+ 	  && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
+ 	SLP_INSTANCE_LOADS (inst).safe_push (node);
+     }
+@@ -1913,7 +2019,7 @@ vect_analyze_slp_instance (vec_info *vinfo,
+   if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
+     {
+       scalar_type = TREE_TYPE (DR_REF (dr));
+-      vectype = get_vectype_for_scalar_type (scalar_type);
++      vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+       group_size = DR_GROUP_SIZE (stmt_info);
+     }
+   else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+@@ -1964,7 +2070,8 @@ vect_analyze_slp_instance (vec_info *vinfo,
+       /* Mark the first element of the reduction chain as reduction to properly
+ 	 transform the node.  In the reduction analysis phase only the last
+ 	 element of the chain is marked as reduction.  */
+-      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
++      STMT_VINFO_DEF_TYPE (stmt_info)
++	= STMT_VINFO_DEF_TYPE (scalar_stmts.last ());
+       STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info))
+ 	= STMT_VINFO_REDUC_DEF (vect_orig_stmt (scalar_stmts.last ()));
+     }
+@@ -1982,9 +2089,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
+   scalar_stmts_to_slp_tree_map_t *bst_map
+     = new scalar_stmts_to_slp_tree_map_t ();
+   poly_uint64 max_nunits = nunits;
++  unsigned tree_size = 0;
+   node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
+ 			      &max_nunits, matches, &npermutes,
+-			      NULL, max_tree_size, bst_map);
++			      &tree_size, bst_map);
+   /* The map keeps a reference on SLP nodes built, release that.  */
+   for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
+        it != bst_map->end (); ++it)
+@@ -1993,6 +2101,34 @@ vect_analyze_slp_instance (vec_info *vinfo,
+   delete bst_map;
+   if (node != NULL)
+     {
++      /* If this is a reduction chain with a conversion in front
++         amend the SLP tree with a node for that.  */
++      if (!dr
++	  && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
++	  && STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def)
++	{
++	  /* Get at the conversion stmt - we know it's the single use
++	     of the last stmt of the reduction chain.  */
++	  gimple *tem = vect_orig_stmt (scalar_stmts[group_size - 1])->stmt;
++	  use_operand_p use_p;
++	  gimple *use_stmt;
++	  bool r = single_imm_use (gimple_assign_lhs (tem), &use_p, &use_stmt);
++	  gcc_assert (r);
++	  next_info = vinfo->lookup_stmt (use_stmt);
++	  next_info = vect_stmt_to_vectorize (next_info);
++	  scalar_stmts = vNULL;
++	  scalar_stmts.create (group_size);
++	  for (unsigned i = 0; i < group_size; ++i)
++	    scalar_stmts.quick_push (next_info);
++	  slp_tree conv = vect_create_new_slp_node (scalar_stmts);
++	  SLP_TREE_CHILDREN (conv).quick_push (node);
++	  node = conv;
++	  /* We also have to fake this conversion stmt as SLP reduction group
++	     so we don't have to mess with too much code elsewhere.  */
++	  REDUC_GROUP_FIRST_ELEMENT (next_info) = next_info;
++	  REDUC_GROUP_NEXT_ELEMENT (next_info) = NULL;
++	}
++
+       /* Calculate the unrolling factor based on the smallest type.  */
+       poly_uint64 unrolling_factor
+ 	= calculate_unrolling_factor (max_nunits, group_size);
+@@ -2025,6 +2161,10 @@ vect_analyze_slp_instance (vec_info *vinfo,
+ 	  SLP_INSTANCE_UNROLLING_FACTOR (new_instance) = unrolling_factor;
+ 	  SLP_INSTANCE_LOADS (new_instance) = vNULL;
+ 	  vect_gather_slp_loads (new_instance, node);
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "SLP size %u vs. limit %u.\n",
++			     tree_size, max_tree_size);
+ 
+ 	  /* Compute the load permutation.  */
+ 	  slp_tree load_node;
+@@ -2231,8 +2371,11 @@ vect_make_slp_decision (loop_vec_info loop_vinfo)
+   FOR_EACH_VEC_ELT (slp_instances, i, instance)
+     {
+       /* FORNOW: SLP if you can.  */
+-      /* All unroll factors have the form current_vector_size * X for some
+-	 rational X, so they must have a common multiple.  */
++      /* All unroll factors have the form:
++
++	   GET_MODE_SIZE (vinfo->vector_mode) * X
++
++	 for some rational X, so they must have a common multiple.  */
+       unrolling_factor
+ 	= force_common_multiple (unrolling_factor,
+ 				 SLP_INSTANCE_UNROLLING_FACTOR (instance));
+@@ -2327,7 +2470,8 @@ vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype,
+ 
+   if (!only_edge)
+     FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+-      if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
++      if (SLP_TREE_DEF_TYPE (child) != vect_external_def
++	  && SLP_TREE_DEF_TYPE (child) != vect_constant_def)
+ 	vect_detect_hybrid_slp_stmts (child, i, stype, visited);
+ }
+ 
+@@ -2514,8 +2658,15 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
+      VF divided by the number of elements in a vector.  */
+   if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+       && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
+-    SLP_TREE_NUMBER_OF_VEC_STMTS (node)
+-      = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
++    {
++      for (unsigned i = 0; i < SLP_TREE_CHILDREN (node).length (); ++i)
++	if (SLP_TREE_DEF_TYPE (SLP_TREE_CHILDREN (node)[i]) == vect_internal_def)
++	  {
++	    SLP_TREE_NUMBER_OF_VEC_STMTS (node)
++	      = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[i]);
++	    break;
++	  }
++    }
+   else
+     {
+       poly_uint64 vf;
+@@ -2533,6 +2684,39 @@ vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
+   return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
+ }
+ 
++/* Try to build NODE from scalars, returning true on success.
++   NODE_INSTANCE is the SLP instance that contains NODE.  */
++
++static bool
++vect_slp_convert_to_external (vec_info *vinfo, slp_tree node,
++			      slp_instance node_instance)
++{
++  stmt_vec_info stmt_info;
++  unsigned int i;
++
++  if (!is_a <bb_vec_info> (vinfo)
++      || node == SLP_INSTANCE_TREE (node_instance)
++      || vect_contains_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (node)))
++    return false;
++
++  if (dump_enabled_p ())
++    dump_printf_loc (MSG_NOTE, vect_location,
++		     "Building vector operands from scalars instead\n");
++
++  /* Don't remove and free the child nodes here, since they could be
++     referenced by other structures.  The analysis and scheduling phases
++     (need to) ignore child nodes of anything that isn't vect_internal_def.  */
++  unsigned int group_size = SLP_TREE_SCALAR_STMTS (node).length ();
++  SLP_TREE_DEF_TYPE (node) = vect_external_def;
++  SLP_TREE_SCALAR_OPS (node).safe_grow (group_size);
++  FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
++    {
++      tree lhs = gimple_get_lhs (vect_orig_stmt (stmt_info)->stmt);
++      SLP_TREE_SCALAR_OPS (node)[i] = lhs;
++    }
++  return true;
++}
++
+ /* Analyze statements contained in SLP tree NODE after recursively analyzing
+    the subtree.  NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
+ 
+@@ -2559,6 +2743,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
+     {
+       SLP_TREE_NUMBER_OF_VEC_STMTS (node)
+ 	= SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
++      /* Cope with cases in which we made a late decision to build the
++	 node from scalars.  */
++      if (SLP_TREE_DEF_TYPE (*leader) == vect_external_def
++	  && vect_slp_convert_to_external (vinfo, node, node_instance))
++	;
++      else
++	gcc_assert (SLP_TREE_DEF_TYPE (node) == SLP_TREE_DEF_TYPE (*leader));
+       return true;
+     }
+ 
+@@ -2579,25 +2770,31 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
+   auto_vec<vect_def_type, 4> dt;
+   dt.safe_grow (SLP_TREE_CHILDREN (node).length ());
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+-    dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
++    if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
++      dt[j] = STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]);
+ 
+   /* Push SLP node def-type to stmt operands.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+-    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
++    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def
++	&& SLP_TREE_SCALAR_STMTS (child).length () != 0)
+       STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
+ 	= SLP_TREE_DEF_TYPE (child);
+ 
+   /* Check everything worked out.  */
+   bool res = true;
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+-    if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
+-      {
+-	if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
+-	    != SLP_TREE_DEF_TYPE (child))
+-	  res = false;
+-      }
+-    else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) != dt[j])
+-      res = false;
++      if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
++	{
++	  if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
++	    {
++	      if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
++		  != SLP_TREE_DEF_TYPE (child))
++		res = false;
++	    }
++	  else if (STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
++		   != dt[j])
++	    res = false;
++	}
+   if (!res && dump_enabled_p ())
+     dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 		     "not vectorized: same operand with different "
+@@ -2609,7 +2806,13 @@ vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
+ 
+   /* Restore def-types.  */
+   FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
+-    STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
++    if (SLP_TREE_SCALAR_STMTS (child).length () != 0)
++      STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) = dt[j];
++
++  /* If this node can't be vectorized, try pruning the tree here rather
++     than felling the whole thing.  */
++  if (!res && vect_slp_convert_to_external (vinfo, node, node_instance))
++    res = true;
+ 
+   return res;
+ }
+@@ -2818,19 +3021,17 @@ vect_bb_vectorization_profitable_p (bb_vec_info bb_vinfo)
+   return true;
+ }
+ 
+-/* Check if the basic block can be vectorized.  Returns a bb_vec_info
+-   if so and sets fatal to true if failure is independent of
+-   current_vector_size.  */
++/* Check if the region described by BB_VINFO can be vectorized, returning
++   true if so.  When returning false, set FATAL to true if the same failure
++   would prevent vectorization at other vector sizes, false if it is still
++   worth trying other sizes.  N_STMTS is the number of statements in the
++   region.  */
+ 
+-static bb_vec_info
+-vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+-		       gimple_stmt_iterator region_end,
+-		       vec<data_reference_p> datarefs, int n_stmts,
+-		       bool &fatal, vec_info_shared *shared)
++static bool
++vect_slp_analyze_bb_1 (bb_vec_info bb_vinfo, int n_stmts, bool &fatal)
+ {
+   DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
+ 
+-  bb_vec_info bb_vinfo;
+   slp_instance instance;
+   int i;
+   poly_uint64 min_vf = 2;
+@@ -2838,34 +3039,15 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+   /* The first group of checks is independent of the vector size.  */
+   fatal = true;
+ 
+-  if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+-    {
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			 "not vectorized: too many instructions in "
+-			 "basic block.\n");
+-      free_data_refs (datarefs);
+-      return NULL;
+-    }
+-
+-  bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
+-  if (!bb_vinfo)
+-    return NULL;
+-
+-  BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
+-  bb_vinfo->shared->save_datarefs ();
+-
+   /* Analyze the data references.  */
+ 
+-  if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
++  if (!vect_analyze_data_refs (bb_vinfo, &min_vf, NULL))
+     {
+       if (dump_enabled_p ())
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			 "not vectorized: unhandled data-ref in basic "
+ 			 "block.\n");
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   if (BB_VINFO_DATAREFS (bb_vinfo).length () < 2)
+@@ -2874,9 +3056,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			 "not vectorized: not enough data-refs in "
+ 			 "basic block.\n");
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   if (!vect_analyze_data_ref_accesses (bb_vinfo))
+@@ -2885,9 +3065,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+        dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			"not vectorized: unhandled data access in "
+ 			"basic block.\n");
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   /* If there are no grouped stores in the region there is no need
+@@ -2899,9 +3077,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+ 	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			 "not vectorized: no grouped stores in "
+ 			 "basic block.\n");
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   /* While the rest of the analysis below depends on it in some way.  */
+@@ -2921,9 +3097,7 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+ 			   "not vectorized: failed to find SLP opportunities "
+ 			   "in basic block.\n");
+ 	}
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   vect_record_base_alignments (bb_vinfo);
+@@ -2954,19 +3128,14 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+       i++;
+     }
+   if (! BB_VINFO_SLP_INSTANCES (bb_vinfo).length ())
+-    {
+-      delete bb_vinfo;
+-      return NULL;
+-    }
++    return false;
+ 
+   if (!vect_slp_analyze_operations (bb_vinfo))
+     {
+       if (dump_enabled_p ())
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			 "not vectorized: bad operation in basic block.\n");
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   /* Cost model: check if the vectorization is worthwhile.  */
+@@ -2977,80 +3146,61 @@ vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
+         dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+ 			 "not vectorized: vectorization is not "
+ 			 "profitable.\n");
+-
+-      delete bb_vinfo;
+-      return NULL;
++      return false;
+     }
+ 
+   if (dump_enabled_p ())
+     dump_printf_loc (MSG_NOTE, vect_location,
+ 		     "Basic block will be vectorized using SLP\n");
+-
+-  return bb_vinfo;
++  return true;
+ }
+ 
++/* Subroutine of vect_slp_bb.  Try to vectorize the statements between
++   REGION_BEGIN (inclusive) and REGION_END (exclusive), returning true
++   on success.  The region has N_STMTS statements and has the datarefs
++   given by DATAREFS.  */
+ 
+-/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
+-   true if anything in the basic-block was vectorized.  */
+-
+-bool
+-vect_slp_bb (basic_block bb)
++static bool
++vect_slp_bb_region (gimple_stmt_iterator region_begin,
++		    gimple_stmt_iterator region_end,
++		    vec<data_reference_p> datarefs,
++		    unsigned int n_stmts)
+ {
+   bb_vec_info bb_vinfo;
+-  gimple_stmt_iterator gsi;
+-  bool any_vectorized = false;
+-  auto_vector_sizes vector_sizes;
++  auto_vector_modes vector_modes;
+ 
+   /* Autodetect first vector size we try.  */
+-  current_vector_size = 0;
+-  targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
+-  unsigned int next_size = 0;
++  machine_mode next_vector_mode = VOIDmode;
++  targetm.vectorize.autovectorize_vector_modes (&vector_modes, false);
++  unsigned int mode_i = 0;
+ 
+-  gsi = gsi_start_bb (bb);
++  vec_info_shared shared;
+ 
+-  poly_uint64 autodetected_vector_size = 0;
++  machine_mode autodetected_vector_mode = VOIDmode;
+   while (1)
+     {
+-      if (gsi_end_p (gsi))
+-	break;
+-
+-      gimple_stmt_iterator region_begin = gsi;
+-      vec<data_reference_p> datarefs = vNULL;
+-      int insns = 0;
+-
+-      for (; !gsi_end_p (gsi); gsi_next (&gsi))
+-	{
+-	  gimple *stmt = gsi_stmt (gsi);
+-	  if (is_gimple_debug (stmt))
+-	    continue;
+-	  insns++;
+-
+-	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
+-	    vect_location = stmt;
+-
+-	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
+-	    break;
+-	}
+-
+-      /* Skip leading unhandled stmts.  */
+-      if (gsi_stmt (region_begin) == gsi_stmt (gsi))
+-	{
+-	  gsi_next (&gsi);
+-	  continue;
+-	}
+-
+-      gimple_stmt_iterator region_end = gsi;
+-
+       bool vectorized = false;
+       bool fatal = false;
+-      vec_info_shared shared;
+-      bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
+-					datarefs, insns, fatal, &shared);
+-      if (bb_vinfo
++      bb_vinfo = new _bb_vec_info (region_begin, region_end, &shared);
++
++      bool first_time_p = shared.datarefs.is_empty ();
++      BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
++      if (first_time_p)
++	bb_vinfo->shared->save_datarefs ();
++      else
++	bb_vinfo->shared->check_datarefs ();
++      bb_vinfo->vector_mode = next_vector_mode;
++
++      if (vect_slp_analyze_bb_1 (bb_vinfo, n_stmts, fatal)
+ 	  && dbg_cnt (vect_slp))
+ 	{
+ 	  if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
++	    {
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "***** Analysis succeeded with vector mode"
++			       " %s\n", GET_MODE_NAME (bb_vinfo->vector_mode));
++	      dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
++	    }
+ 
+ 	  bb_vinfo->shared->check_datarefs ();
+ 	  vect_schedule_slp (bb_vinfo);
+@@ -3058,7 +3208,7 @@ vect_slp_bb (basic_block bb)
+ 	  unsigned HOST_WIDE_INT bytes;
+ 	  if (dump_enabled_p ())
+ 	    {
+-	      if (current_vector_size.is_constant (&bytes))
++	      if (GET_MODE_SIZE (bb_vinfo->vector_mode).is_constant (&bytes))
+ 		dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+ 				 "basic block part vectorized using %wu byte "
+ 				 "vectors\n", bytes);
+@@ -3070,50 +3220,120 @@ vect_slp_bb (basic_block bb)
+ 
+ 	  vectorized = true;
+ 	}
+-      delete bb_vinfo;
++      else
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "***** Analysis failed with vector mode %s\n",
++			     GET_MODE_NAME (bb_vinfo->vector_mode));
++	}
+ 
+-      any_vectorized |= vectorized;
++      if (mode_i == 0)
++	autodetected_vector_mode = bb_vinfo->vector_mode;
+ 
+-      if (next_size == 0)
+-	autodetected_vector_size = current_vector_size;
++      if (!fatal)
++	while (mode_i < vector_modes.length ()
++	       && vect_chooses_same_modes_p (bb_vinfo, vector_modes[mode_i]))
++	  {
++	    if (dump_enabled_p ())
++	      dump_printf_loc (MSG_NOTE, vect_location,
++			       "***** The result for vector mode %s would"
++			       " be the same\n",
++			       GET_MODE_NAME (vector_modes[mode_i]));
++	    mode_i += 1;
++	  }
+ 
+-      if (next_size < vector_sizes.length ()
+-	  && known_eq (vector_sizes[next_size], autodetected_vector_size))
+-	next_size += 1;
++      delete bb_vinfo;
++
++      if (mode_i < vector_modes.length ()
++	  && VECTOR_MODE_P (autodetected_vector_mode)
++	  && (related_vector_mode (vector_modes[mode_i],
++				   GET_MODE_INNER (autodetected_vector_mode))
++	      == autodetected_vector_mode)
++	  && (related_vector_mode (autodetected_vector_mode,
++				   GET_MODE_INNER (vector_modes[mode_i]))
++	      == vector_modes[mode_i]))
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "***** Skipping vector mode %s, which would"
++			     " repeat the analysis for %s\n",
++			     GET_MODE_NAME (vector_modes[mode_i]),
++			     GET_MODE_NAME (autodetected_vector_mode));
++	  mode_i += 1;
++	}
+ 
+       if (vectorized
+-	  || next_size == vector_sizes.length ()
+-	  || known_eq (current_vector_size, 0U)
++	  || mode_i == vector_modes.length ()
++	  || autodetected_vector_mode == VOIDmode
+ 	  /* If vect_slp_analyze_bb_1 signaled that analysis for all
+ 	     vector sizes will fail do not bother iterating.  */
+ 	  || fatal)
++	return vectorized;
++
++      /* Try the next biggest vector size.  */
++      next_vector_mode = vector_modes[mode_i++];
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "***** Re-trying analysis with vector mode %s\n",
++			 GET_MODE_NAME (next_vector_mode));
++    }
++}
++
++/* Main entry for the BB vectorizer.  Analyze and transform BB, returns
++   true if anything in the basic-block was vectorized.  */
++
++bool
++vect_slp_bb (basic_block bb)
++{
++  gimple_stmt_iterator gsi;
++  bool any_vectorized = false;
++
++  gsi = gsi_start_bb (bb);
++  while (!gsi_end_p (gsi))
++    {
++      gimple_stmt_iterator region_begin = gsi;
++      vec<data_reference_p> datarefs = vNULL;
++      int insns = 0;
++
++      for (; !gsi_end_p (gsi); gsi_next (&gsi))
+ 	{
+-	  if (gsi_end_p (region_end))
++	  gimple *stmt = gsi_stmt (gsi);
++	  if (is_gimple_debug (stmt))
++	    continue;
++	  insns++;
++
++	  if (gimple_location (stmt) != UNKNOWN_LOCATION)
++	    vect_location = stmt;
++
++	  if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
+ 	    break;
++	}
+ 
+-	  /* Skip the unhandled stmt.  */
++      /* Skip leading unhandled stmts.  */
++      if (gsi_stmt (region_begin) == gsi_stmt (gsi))
++	{
+ 	  gsi_next (&gsi);
+-
+-	  /* And reset vector sizes.  */
+-	  current_vector_size = 0;
+-	  next_size = 0;
++	  continue;
+ 	}
+-      else
++
++      gimple_stmt_iterator region_end = gsi;
++
++      if (insns > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
+ 	{
+-	  /* Try the next biggest vector size.  */
+-	  current_vector_size = vector_sizes[next_size++];
+ 	  if (dump_enabled_p ())
+-	    {
+-	      dump_printf_loc (MSG_NOTE, vect_location,
+-			       "***** Re-trying analysis with "
+-			       "vector size ");
+-	      dump_dec (MSG_NOTE, current_vector_size);
+-	      dump_printf (MSG_NOTE, "\n");
+-	    }
+-
+-	  /* Start over.  */
+-	  gsi = region_begin;
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "not vectorized: too many instructions in "
++			     "basic block.\n");
+ 	}
++      else if (vect_slp_bb_region (region_begin, region_end, datarefs, insns))
++	any_vectorized = true;
++
++      if (gsi_end_p (region_end))
++	break;
++
++      /* Skip the unhandled stmt.  */
++      gsi_next (&gsi);
+     }
+ 
+   return any_vectorized;
+@@ -3184,8 +3404,9 @@ vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo)
+    to cut down on the number of interleaves.  */
+ 
+ void
+-duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
+-			  unsigned int nresults, vec<tree> &results)
++duplicate_and_interleave (vec_info *vinfo, gimple_seq *seq, tree vector_type,
++			  vec<tree> elts, unsigned int nresults,
++			  vec<tree> &results)
+ {
+   unsigned int nelts = elts.length ();
+   tree element_type = TREE_TYPE (vector_type);
+@@ -3194,7 +3415,7 @@ duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
+   unsigned int nvectors = 1;
+   tree new_vector_type;
+   tree permutes[2];
+-  if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
++  if (!can_duplicate_and_interleave_p (vinfo, nelts, TYPE_MODE (element_type),
+ 				       &nvectors, &new_vector_type,
+ 				       permutes))
+     gcc_unreachable ();
+@@ -3276,52 +3497,45 @@ duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
+ 
+ /* For constant and loop invariant defs of SLP_NODE this function returns
+    (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
+-   OP_NUM determines if we gather defs for operand 0 or operand 1 of the RHS of
+-   scalar stmts.  NUMBER_OF_VECTORS is the number of vector defs to create.
+-   REDUC_INDEX is the index of the reduction operand in the statements, unless
+-   it is -1.  */
++   OP_NODE determines the node for the operand containing the scalar
++   operands.  */
+ 
+ static void
+-vect_get_constant_vectors (tree op, slp_tree slp_node,
+-                           vec<tree> *vec_oprnds,
+-			   unsigned int op_num, unsigned int number_of_vectors)
++vect_get_constant_vectors (slp_tree op_node, slp_tree slp_node,
++                           vec<tree> *vec_oprnds)
+ {
+-  vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
+-  stmt_vec_info stmt_vinfo = stmts[0];
+-  gimple *stmt = stmt_vinfo->stmt;
++  stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (slp_node)[0];
++  vec_info *vinfo = stmt_vinfo->vinfo;
+   unsigned HOST_WIDE_INT nunits;
+   tree vec_cst;
+   unsigned j, number_of_places_left_in_vector;
+   tree vector_type;
+   tree vop;
+-  int group_size = stmts.length ();
++  int group_size = op_node->ops.length ();
+   unsigned int vec_num, i;
+   unsigned number_of_copies = 1;
+-  vec<tree> voprnds;
+-  voprnds.create (number_of_vectors);
+-  bool constant_p, is_store;
++  bool constant_p;
+   tree neutral_op = NULL;
+-  enum tree_code code = gimple_expr_code (stmt);
+   gimple_seq ctor_seq = NULL;
+   auto_vec<tree, 16> permute_results;
+ 
++  /* ???  SLP analysis should compute the vector type for the
++     constant / invariant and store it in the SLP node.  */
++  tree op = op_node->ops[0];
+   /* Check if vector type is a boolean vector.  */
++  tree stmt_vectype = STMT_VINFO_VECTYPE (stmt_vinfo);
+   if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
+       && vect_mask_constant_operand_p (stmt_vinfo))
+-    vector_type
+-      = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
+-  else
+-    vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
+-
+-  if (STMT_VINFO_DATA_REF (stmt_vinfo))
+-    {
+-      is_store = true;
+-      op = gimple_assign_rhs1 (stmt);
+-    }
++    vector_type = truth_type_for (stmt_vectype);
+   else
+-    is_store = false;
++    vector_type = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
+ 
+-  gcc_assert (op);
++  unsigned int number_of_vectors
++    = vect_get_num_vectors (SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
++			    * TYPE_VECTOR_SUBPARTS (stmt_vectype),
++			    vector_type);
++  vec_oprnds->create (number_of_vectors);
++  auto_vec<tree> voprnds (number_of_vectors);
+ 
+   /* NUMBER_OF_COPIES is the number of times we need to use the same values in
+      created vectors. It is greater than 1 if unrolling is performed.
+@@ -3353,56 +3567,8 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
+   bool place_after_defs = false;
+   for (j = 0; j < number_of_copies; j++)
+     {
+-      for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
++      for (i = group_size - 1; op_node->ops.iterate (i, &op); i--)
+         {
+-	  stmt = stmt_vinfo->stmt;
+-          if (is_store)
+-            op = gimple_assign_rhs1 (stmt);
+-          else
+-	    {
+-	      switch (code)
+-		{
+-		  case COND_EXPR:
+-		    {
+-		      tree cond = gimple_assign_rhs1 (stmt);
+-		      if (TREE_CODE (cond) == SSA_NAME)
+-			op = gimple_op (stmt, op_num + 1);
+-		      else if (op_num == 0 || op_num == 1)
+-			op = TREE_OPERAND (cond, op_num);
+-		      else
+-			{
+-			  if (op_num == 2)
+-			    op = gimple_assign_rhs2 (stmt);
+-			  else
+-			    op = gimple_assign_rhs3 (stmt);
+-			}
+-		    }
+-		    break;
+-
+-		  case CALL_EXPR:
+-		    op = gimple_call_arg (stmt, op_num);
+-		    break;
+-
+-		  case LSHIFT_EXPR:
+-		  case RSHIFT_EXPR:
+-		  case LROTATE_EXPR:
+-		  case RROTATE_EXPR:
+-		    op = gimple_op (stmt, op_num + 1);
+-		    /* Unlike the other binary operators, shifts/rotates have
+-		       the shift count being int, instead of the same type as
+-		       the lhs, so make sure the scalar is the right type if
+-		       we are dealing with vectors of
+-		       long long/long/short/char.  */
+-		    if (op_num == 1 && TREE_CODE (op) == INTEGER_CST)
+-		      op = fold_convert (TREE_TYPE (vector_type), op);
+-		    break;
+-
+-		  default:
+-		    op = gimple_op (stmt, op_num + 1);
+-		    break;
+-		}
+-	    }
+-
+           /* Create 'vect_ = {op0,op1,...,opn}'.  */
+           number_of_places_left_in_vector--;
+ 	  tree orig_op = op;
+@@ -3472,9 +3638,9 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
+ 		vec_cst = gimple_build_vector (&ctor_seq, &elts);
+ 	      else
+ 		{
+-		  if (vec_oprnds->is_empty ())
+-		    duplicate_and_interleave (&ctor_seq, vector_type, elts,
+-					      number_of_vectors,
++		  if (permute_results.is_empty ())
++		    duplicate_and_interleave (vinfo, &ctor_seq, vector_type,
++					      elts, number_of_vectors,
+ 					      permute_results);
+ 		  vec_cst = permute_results[number_of_vectors - j - 1];
+ 		}
+@@ -3516,8 +3682,6 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
+       vec_oprnds->quick_push (vop);
+     }
+ 
+-  voprnds.release ();
+-
+   /* In case that VF is greater than the unrolling factor needed for the SLP
+      group of stmts, NUMBER_OF_VECTORS to be created is greater than
+      NUMBER_OF_SCALARS/NUNITS or NUNITS/NUMBER_OF_SCALARS, and hence we have
+@@ -3548,25 +3712,17 @@ vect_get_constant_vectors (tree op, slp_tree slp_node,
+ static void
+ vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
+ {
+-  tree vec_oprnd;
+   stmt_vec_info vec_def_stmt_info;
+   unsigned int i;
+ 
+   gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
+ 
+   FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
+-    {
+-      gcc_assert (vec_def_stmt_info);
+-      if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
+-	vec_oprnd = gimple_phi_result (vec_def_phi);
+-      else
+-	vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
+-      vec_oprnds->quick_push (vec_oprnd);
+-    }
++    vec_oprnds->quick_push (gimple_get_lhs (vec_def_stmt_info->stmt));
+ }
+ 
+ 
+-/* Get vectorized definitions for SLP_NODE.
++/* Get N vectorized definitions for SLP_NODE.
+    If the scalar definitions are loop invariants or constants, collect them and
+    call vect_get_constant_vectors() to create vector stmts.
+    Otherwise, the def-stmts must be already vectorized and the vectorized stmts
+@@ -3574,91 +3730,26 @@ vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
+    vect_get_slp_vect_defs () to retrieve them.  */
+ 
+ void
+-vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
+-		   vec<vec<tree> > *vec_oprnds)
++vect_get_slp_defs (slp_tree slp_node, vec<vec<tree> > *vec_oprnds, unsigned n)
+ {
+-  int number_of_vects = 0, i;
+-  unsigned int child_index = 0;
+-  HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
+-  slp_tree child = NULL;
+-  vec<tree> vec_defs;
+-  tree oprnd;
+-  bool vectorized_defs;
++  if (n == -1U)
++    n = SLP_TREE_CHILDREN (slp_node).length ();
+ 
+-  stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
+-  FOR_EACH_VEC_ELT (ops, i, oprnd)
++  for (unsigned i = 0; i < n; ++i)
+     {
+-      /* For each operand we check if it has vectorized definitions in a child
+-	 node or we need to create them (for invariants and constants).  We
+-	 check if the LHS of the first stmt of the next child matches OPRND.
+-	 If it does, we found the correct child.  Otherwise, we call
+-	 vect_get_constant_vectors (), and not advance CHILD_INDEX in order
+-	 to check this child node for the next operand.  */
+-      vectorized_defs = false;
+-      if (SLP_TREE_CHILDREN (slp_node).length () > child_index)
+-        {
+-          child = SLP_TREE_CHILDREN (slp_node)[child_index];
+-
+-	  /* We have to check both pattern and original def, if available.  */
+-	  if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+-	    {
+-	      stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
+-	      stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
+-	      tree first_def_op;
+-
+-	      if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
+-		first_def_op = gimple_phi_result (first_def);
+-	      else
+-		first_def_op = gimple_get_lhs (first_def_info->stmt);
+-	      if (operand_equal_p (oprnd, first_def_op, 0)
+-		  || (related
+-		      && operand_equal_p (oprnd,
+-					  gimple_get_lhs (related->stmt), 0)))
+-		{
+-		  /* The number of vector defs is determined by the number of
+-		     vector statements in the node from which we get those
+-		     statements.  */
+-		  number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
+-		  vectorized_defs = true;
+-		  child_index++;
+-		}
+-	    }
+-	  else
+-	    child_index++;
+-        }
+-
+-      if (!vectorized_defs)
+-        {
+-          if (i == 0)
+-            {
+-              number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
+-              /* Number of vector stmts was calculated according to LHS in
+-                 vect_schedule_slp_instance (), fix it by replacing LHS with
+-                 RHS, if necessary.  See vect_get_smallest_scalar_type () for
+-                 details.  */
+-	      vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
+-					     &rhs_size_unit);
+-              if (rhs_size_unit != lhs_size_unit)
+-                {
+-                  number_of_vects *= rhs_size_unit;
+-                  number_of_vects /= lhs_size_unit;
+-                }
+-            }
+-        }
++      slp_tree child = SLP_TREE_CHILDREN (slp_node)[i];
+ 
+-      /* Allocate memory for vectorized defs.  */
+-      vec_defs = vNULL;
+-      vec_defs.create (number_of_vects);
++      vec<tree> vec_defs = vNULL;
+ 
+-      /* For reduction defs we call vect_get_constant_vectors (), since we are
+-         looking for initial loop invariant values.  */
+-      if (vectorized_defs)
+-        /* The defs are already vectorized.  */
+-	vect_get_slp_vect_defs (child, &vec_defs);
++      /* For each operand we check if it has vectorized definitions in a child
++	 node or we need to create them (for invariants and constants).  */
++      if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
++	{
++	  vec_defs.create (SLP_TREE_NUMBER_OF_VEC_STMTS (child));
++	  vect_get_slp_vect_defs (child, &vec_defs);
++	}
+       else
+-	/* Build vectors from scalar defs.  */
+-	vect_get_constant_vectors (oprnd, slp_node, &vec_defs, i,
+-				   number_of_vects);
++	vect_get_constant_vectors (child, slp_node, &vec_defs);
+ 
+       vec_oprnds->quick_push (vec_defs);
+     }
+@@ -3939,17 +4030,6 @@ vect_schedule_slp_instance (slp_tree node, slp_instance instance,
+   stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
+   si = gsi_for_stmt (last_stmt_info->stmt);
+ 
+-  /* Mark the first element of the reduction chain as reduction to properly
+-     transform the node.  In the analysis phase only the last element of the
+-     chain is marked as reduction.  */
+-  if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
+-      && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
+-      && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
+-    {
+-      STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
+-      STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
+-    }
+-
+   /* Handle two-operation SLP nodes by vectorizing the group with
+      both operations and then performing a merge.  */
+   if (SLP_TREE_TWO_OPERATORS (node))
+diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
+index 74abfbfe56e..5d6da3d9708 100644
+--- a/gcc/tree-vect-stmts.c
++++ b/gcc/tree-vect-stmts.c
+@@ -329,13 +329,13 @@ vect_stmt_relevant_p (stmt_vec_info stmt_info, loop_vec_info loop_vinfo,
+ 	  basic_block bb = gimple_bb (USE_STMT (use_p));
+ 	  if (!flow_bb_inside_loop_p (loop, bb))
+ 	    {
++	      if (is_gimple_debug (USE_STMT (use_p)))
++		continue;
++
+ 	      if (dump_enabled_p ())
+ 		dump_printf_loc (MSG_NOTE, vect_location,
+                                  "vec_stmt_relevant_p: used out of loop.\n");
+ 
+-	      if (is_gimple_debug (USE_STMT (use_p)))
+-		continue;
+-
+ 	      /* We expect all such uses to be in the loop exit phis
+ 		 (because of loop closed form)   */
+ 	      gcc_assert (gimple_code (USE_STMT (use_p)) == GIMPLE_PHI);
+@@ -456,7 +456,6 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
+ 	     bool force)
+ {
+   stmt_vec_info dstmt_vinfo;
+-  basic_block bb, def_bb;
+   enum vect_def_type dt;
+ 
+   /* case 1: we are only interested in uses that need to be vectorized.  Uses
+@@ -472,28 +471,8 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
+   if (!dstmt_vinfo)
+     return opt_result::success ();
+ 
+-  def_bb = gimple_bb (dstmt_vinfo->stmt);
+-
+-  /* case 2: A reduction phi (STMT) defined by a reduction stmt (DSTMT_VINFO).
+-     DSTMT_VINFO must have already been processed, because this should be the
+-     only way that STMT, which is a reduction-phi, was put in the worklist,
+-     as there should be no other uses for DSTMT_VINFO in the loop.  So we just
+-     check that everything is as expected, and we are done.  */
+-  bb = gimple_bb (stmt_vinfo->stmt);
+-  if (gimple_code (stmt_vinfo->stmt) == GIMPLE_PHI
+-      && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def
+-      && gimple_code (dstmt_vinfo->stmt) != GIMPLE_PHI
+-      && STMT_VINFO_DEF_TYPE (dstmt_vinfo) == vect_reduction_def
+-      && bb->loop_father == def_bb->loop_father)
+-    {
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_NOTE, vect_location,
+-                         "reduc-stmt defining reduc-phi in the same nest.\n");
+-      gcc_assert (STMT_VINFO_RELEVANT (dstmt_vinfo) < vect_used_by_reduction);
+-      gcc_assert (STMT_VINFO_LIVE_P (dstmt_vinfo)
+-		  || STMT_VINFO_RELEVANT (dstmt_vinfo) > vect_unused_in_scope);
+-      return opt_result::success ();
+-    }
++  basic_block def_bb = gimple_bb (dstmt_vinfo->stmt);
++  basic_block bb = gimple_bb (stmt_vinfo->stmt);
+ 
+   /* case 3a: outer-loop stmt defining an inner-loop stmt:
+ 	outer-loop-header-bb:
+@@ -607,7 +586,7 @@ process_use (stmt_vec_info stmt_vinfo, tree use, loop_vec_info loop_vinfo,
+    This pass detects such stmts.  */
+ 
+ opt_result
+-vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
++vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo, bool *fatal)
+ {
+   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+   basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
+@@ -777,7 +756,11 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
+ 	    = process_use (stmt_vinfo, gs_info.offset, loop_vinfo, relevant,
+ 			   &worklist, true);
+ 	  if (!res)
+-	    return res;
++	    {
++	      if (fatal)
++		*fatal = false;
++	      return res;
++	    }
+ 	}
+     } /* while worklist */
+ 
+@@ -791,6 +774,7 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
+ 			       unsigned opno, enum vect_def_type dt,
+ 			       stmt_vector_for_cost *cost_vec)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]->stmt;
+   tree op = gimple_op (stmt, opno);
+   unsigned prologue_cost = 0;
+@@ -798,7 +782,7 @@ vect_prologue_cost_for_slp_op (slp_tree node, stmt_vec_info stmt_info,
+   /* Without looking at the actual initializer a vector of
+      constants can be implemented as load from the constant pool.
+      When all elements are the same we can use a splat.  */
+-  tree vectype = get_vectype_for_scalar_type (TREE_TYPE (op));
++  tree vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op));
+   unsigned group_size = SLP_TREE_SCALAR_STMTS (node).length ();
+   unsigned num_vects_to_check;
+   unsigned HOST_WIDE_INT const_nunits;
+@@ -1603,9 +1587,9 @@ vect_get_vec_def_for_operand (tree op, stmt_vec_info stmt_vinfo, tree vectype)
+ 	vector_type = vectype;
+       else if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
+ 	       && VECTOR_BOOLEAN_TYPE_P (stmt_vectype))
+-	vector_type = build_same_sized_truth_vector_type (stmt_vectype);
++	vector_type = truth_type_for (stmt_vectype);
+       else
+-	vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
++	vector_type = get_vectype_for_scalar_type (loop_vinfo, TREE_TYPE (op));
+ 
+       gcc_assert (vector_type);
+       return vect_init_vector (stmt_vinfo, op, vector_type, NULL);
+@@ -1720,16 +1704,8 @@ vect_get_vec_defs (tree op0, tree op1, stmt_vec_info stmt_info,
+ {
+   if (slp_node)
+     {
+-      int nops = (op1 == NULL_TREE) ? 1 : 2;
+-      auto_vec<tree> ops (nops);
+-      auto_vec<vec<tree> > vec_defs (nops);
+-
+-      ops.quick_push (op0);
+-      if (op1)
+-        ops.quick_push (op1);
+-
+-      vect_get_slp_defs (ops, slp_node, &vec_defs);
+-
++      auto_vec<vec<tree> > vec_defs (SLP_TREE_CHILDREN (slp_node).length ());
++      vect_get_slp_defs (slp_node, &vec_defs, op1 ? 2 : 1);
+       *vec_oprnds0 = vec_defs[0];
+       if (op1)
+ 	*vec_oprnds1 = vec_defs[1];
+@@ -1874,7 +1850,8 @@ static tree permute_vec_elements (tree, tree, tree, stmt_vec_info,
+    says how the load or store is going to be implemented and GROUP_SIZE
+    is the number of load or store statements in the containing group.
+    If the access is a gather load or scatter store, GS_INFO describes
+-   its arguments.
++   its arguments.  If the load or store is conditional, SCALAR_MASK is the
++   condition under which it occurs.
+ 
+    Clear LOOP_VINFO_CAN_FULLY_MASK_P if a fully-masked loop is not
+    supported, otherwise record the required mask types.  */
+@@ -1883,7 +1860,7 @@ static void
+ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
+ 			  vec_load_store_type vls_type, int group_size,
+ 			  vect_memory_access_type memory_access_type,
+-			  gather_scatter_info *gs_info)
++			  gather_scatter_info *gs_info, tree scalar_mask)
+ {
+   /* Invariant loads need no special support.  */
+   if (memory_access_type == VMAT_INVARIANT)
+@@ -1907,7 +1884,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
+ 	  return;
+ 	}
+       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
+-      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
++      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
+       return;
+     }
+ 
+@@ -1931,7 +1908,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
+ 	  return;
+ 	}
+       unsigned int ncopies = vect_get_num_copies (loop_vinfo, vectype);
+-      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype);
++      vect_record_loop_mask (loop_vinfo, masks, ncopies, vectype, scalar_mask);
+       return;
+     }
+ 
+@@ -1949,9 +1926,8 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
+     }
+ 
+   machine_mode mask_mode;
+-  if (!(targetm.vectorize.get_mask_mode
+-	(GET_MODE_NUNITS (vecmode),
+-	 GET_MODE_SIZE (vecmode)).exists (&mask_mode))
++  if (!VECTOR_MODE_P (vecmode)
++      || !targetm.vectorize.get_mask_mode (vecmode).exists (&mask_mode)
+       || !can_vec_mask_load_store_p (vecmode, mask_mode, is_load))
+     {
+       if (dump_enabled_p ())
+@@ -1969,7 +1945,7 @@ check_load_store_masking (loop_vec_info loop_vinfo, tree vectype,
+   poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
+   unsigned int nvectors;
+   if (can_div_away_from_zero_p (group_size * vf, nunits, &nvectors))
+-    vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype);
++    vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype, scalar_mask);
+   else
+     gcc_unreachable ();
+ }
+@@ -2311,6 +2287,29 @@ get_group_load_store_type (stmt_vec_info stmt_info, tree vectype, bool slp,
+ 	      && gap < (vect_known_alignment_in_bytes (first_dr_info)
+ 			/ vect_get_scalar_dr_size (first_dr_info)))
+ 	    overrun_p = false;
++
++	  /* If the gap splits the vector in half and the target
++	     can do half-vector operations avoid the epilogue peeling
++	     by simply loading half of the vector only.  Usually
++	     the construction with an upper zero half will be elided.  */
++	  dr_alignment_support alignment_support_scheme;
++	  scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
++	  machine_mode vmode;
++	  if (overrun_p
++	      && !masked_p
++	      && (((alignment_support_scheme
++		      = vect_supportable_dr_alignment (first_dr_info, false)))
++		   == dr_aligned
++		  || alignment_support_scheme == dr_unaligned_supported)
++	      && known_eq (nunits, (group_size - gap) * 2)
++	      && known_eq (nunits, group_size)
++	      && related_vector_mode (TYPE_MODE (vectype), elmode,
++				      group_size - gap).exists (&vmode)
++	      && (convert_optab_handler (vec_init_optab,
++					 TYPE_MODE (vectype), vmode)
++		  != CODE_FOR_nothing))
++	    overrun_p = false;
++
+ 	  if (overrun_p && !can_overrun_p)
+ 	    {
+ 	      if (dump_enabled_p ())
+@@ -2536,6 +2535,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
+ 			    vect_def_type *mask_dt_out,
+ 			    tree *mask_vectype_out)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   if (!VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (mask)))
+     {
+       if (dump_enabled_p ())
+@@ -2564,7 +2564,7 @@ vect_check_load_store_mask (stmt_vec_info stmt_info, tree mask,
+ 
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+   if (!mask_vectype)
+-    mask_vectype = get_mask_type_for_scalar_type (TREE_TYPE (vectype));
++    mask_vectype = get_mask_type_for_scalar_type (vinfo, TREE_TYPE (vectype));
+ 
+   if (!mask_vectype || !VECTOR_BOOLEAN_TYPE_P (mask_vectype))
+     {
+@@ -2728,7 +2728,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
+ 			   || TREE_CODE (masktype) == INTEGER_TYPE
+ 			   || types_compatible_p (srctype, masktype)));
+   if (mask && TREE_CODE (masktype) == INTEGER_TYPE)
+-    masktype = build_same_sized_truth_vector_type (srctype);
++    masktype = truth_type_for (srctype);
+ 
+   tree mask_halftype = masktype;
+   tree perm_mask = NULL_TREE;
+@@ -2774,8 +2774,7 @@ vect_build_gather_load_calls (stmt_vec_info stmt_info,
+ 	  mask_perm_mask = vect_gen_perm_mask_checked (masktype, indices);
+ 	}
+       else if (mask)
+-	mask_halftype
+-	  = build_same_sized_truth_vector_type (gs_info->offset_vectype);
++	mask_halftype = truth_type_for (gs_info->offset_vectype);
+     }
+   else
+     gcc_unreachable ();
+@@ -2952,6 +2951,7 @@ vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
+ 			     gather_scatter_info *gs_info,
+ 			     tree *dataref_ptr, tree *vec_offset)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   gimple_seq stmts = NULL;
+   *dataref_ptr = force_gimple_operand (gs_info->base, &stmts, true, NULL_TREE);
+   if (stmts != NULL)
+@@ -2962,7 +2962,7 @@ vect_get_gather_scatter_ops (struct loop *loop, stmt_vec_info stmt_info,
+       gcc_assert (!new_bb);
+     }
+   tree offset_type = TREE_TYPE (gs_info->offset);
+-  tree offset_vectype = get_vectype_for_scalar_type (offset_type);
++  tree offset_vectype = get_vectype_for_scalar_type (vinfo, offset_type);
+   *vec_offset = vect_get_vec_def_for_operand (gs_info->offset, stmt_info,
+ 					      offset_vectype);
+ }
+@@ -2997,7 +2997,7 @@ vect_get_strided_load_store_ops (stmt_vec_info stmt_info,
+   /* The offset given in GS_INFO can have pointer type, so use the element
+      type of the vector instead.  */
+   tree offset_type = TREE_TYPE (gs_info->offset);
+-  tree offset_vectype = get_vectype_for_scalar_type (offset_type);
++  tree offset_vectype = get_vectype_for_scalar_type (loop_vinfo, offset_type);
+   offset_type = TREE_TYPE (offset_vectype);
+ 
+   /* Calculate X = DR_STEP / SCALE and convert it to the appropriate type.  */
+@@ -3161,8 +3161,7 @@ simple_integer_narrowing (tree vectype_out, tree vectype_in,
+   int multi_step_cvt = 0;
+   auto_vec <tree, 8> interm_types;
+   if (!supportable_narrowing_operation (NOP_EXPR, vectype_out, vectype_in,
+-					&code, &multi_step_cvt,
+-					&interm_types)
++					&code, &multi_step_cvt, &interm_types)
+       || multi_step_cvt)
+     return false;
+ 
+@@ -3295,10 +3294,10 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  return false;
+ 	}
+     }
+-  /* If all arguments are external or constant defs use a vector type with
+-     the same size as the output vector type.  */
++  /* If all arguments are external or constant defs, infer the vector type
++     from the scalar type.  */
+   if (!vectype_in)
+-    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
++    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
+   if (vec_stmt)
+     gcc_assert (vectype_in);
+   if (!vectype_in)
+@@ -3309,6 +3308,19 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 
+       return false;
+     }
++  /* FORNOW: we don't yet support mixtures of vector sizes for calls,
++     just mixtures of nunits.  E.g. DI->SI versions of __builtin_ctz*
++     are traditionally vectorized as two VnDI->VnDI IFN_CTZs followed
++     by a pack of the two vectors into an SI vector.  We would need
++     separate code to handle direct VnDI->VnSI IFN_CTZs.  */
++  if (TYPE_SIZE (vectype_in) != TYPE_SIZE (vectype_out))
++    {
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			 "mismatched vector sizes %T and %T\n",
++			 vectype_in, vectype_out);
++      return false;
++    }
+ 
+   /* FORNOW */
+   nunits_in = TYPE_VECTOR_SUBPARTS (vectype_in);
+@@ -3415,7 +3427,9 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  unsigned int nvectors = (slp_node
+ 				   ? SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node)
+ 				   : ncopies);
+-	  vect_record_loop_mask (loop_vinfo, masks, nvectors, vectype_out);
++	  tree scalar_mask = gimple_call_arg (stmt_info->stmt, mask_opno);
++	  vect_record_loop_mask (loop_vinfo, masks, nvectors,
++				 vectype_out, scalar_mask);
+ 	}
+       return true;
+     }
+@@ -3446,9 +3460,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	      auto_vec<vec<tree> > vec_defs (nargs);
+ 	      vec<tree> vec_oprnds0;
+ 
+-	      for (i = 0; i < nargs; i++)
+-		vargs[i] = gimple_call_arg (stmt, i);
+-	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
++	      vect_get_slp_defs (slp_node, &vec_defs);
+ 	      vec_oprnds0 = vec_defs[0];
+ 
+ 	      /* Arguments are ready.  Create the new vector stmt.  */
+@@ -3470,8 +3482,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 			= gimple_build_call_internal_vec (ifn, vargs);
+ 		      gimple_call_set_lhs (call, half_res);
+ 		      gimple_call_set_nothrow (call, true);
+-		      new_stmt_info
+-			= vect_finish_stmt_generation (stmt_info, call, gsi);
++		      vect_finish_stmt_generation (stmt_info, call, gsi);
+ 		      if ((i & 1) == 0)
+ 			{
+ 			  prev_res = half_res;
+@@ -3523,8 +3534,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  if (mask_opno >= 0 && !vectypes[mask_opno])
+ 	    {
+ 	      gcc_assert (modifier != WIDEN);
+-	      vectypes[mask_opno]
+-		= build_same_sized_truth_vector_type (vectype_in);
++	      vectypes[mask_opno] = truth_type_for (vectype_in);
+ 	    }
+ 
+ 	  for (i = 0; i < nargs; i++)
+@@ -3570,8 +3580,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	      gcall *call = gimple_build_call_internal_vec (ifn, vargs);
+ 	      gimple_call_set_lhs (call, half_res);
+ 	      gimple_call_set_nothrow (call, true);
+-	      new_stmt_info
+-		= vect_finish_stmt_generation (stmt_info, call, gsi);
++	      vect_finish_stmt_generation (stmt_info, call, gsi);
+ 	      if ((j & 1) == 0)
+ 		{
+ 		  prev_res = half_res;
+@@ -3622,9 +3631,7 @@ vectorizable_call (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	      auto_vec<vec<tree> > vec_defs (nargs);
+ 	      vec<tree> vec_oprnds0;
+ 
+-	      for (i = 0; i < nargs; i++)
+-		vargs.quick_push (gimple_call_arg (stmt, i));
+-	      vect_get_slp_defs (vargs, slp_node, &vec_defs);
++	      vect_get_slp_defs (slp_node, &vec_defs);
+ 	      vec_oprnds0 = vec_defs[0];
+ 
+ 	      /* Arguments are ready.  Create the new vector stmt.  */
+@@ -4087,9 +4094,8 @@ vectorizable_simd_clone_call (stmt_vec_info stmt_info,
+ 	 || arginfo[i].dt == vect_external_def)
+ 	&& bestn->simdclone->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
+       {
+-	arginfo[i].vectype
+-	  = get_vectype_for_scalar_type (TREE_TYPE (gimple_call_arg (stmt,
+-								     i)));
++	tree arg_type = TREE_TYPE (gimple_call_arg (stmt, i));
++	arginfo[i].vectype = get_vectype_for_scalar_type (vinfo, arg_type);
+ 	if (arginfo[i].vectype == NULL
+ 	    || (simd_clone_subparts (arginfo[i].vectype)
+ 		> bestn->simdclone->simdlen))
+@@ -4802,10 +4808,10 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	}
+     }
+ 
+-  /* If op0 is an external or constant defs use a vector type of
+-     the same size as the output vector type.  */
++  /* If op0 is an external or constant def, infer the vector type
++     from the scalar type.  */
+   if (!vectype_in)
+-    vectype_in = get_same_sized_vectype (rhs_type, vectype_out);
++    vectype_in = get_vectype_for_scalar_type (vinfo, rhs_type);
+   if (vec_stmt)
+     gcc_assert (vectype_in);
+   if (!vectype_in)
+@@ -4863,7 +4869,9 @@ vectorizable_conversion (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   switch (modifier)
+     {
+     case NONE:
+-      if (code != FIX_TRUNC_EXPR && code != FLOAT_EXPR)
++      if (code != FIX_TRUNC_EXPR
++	  && code != FLOAT_EXPR
++	  && !CONVERT_EXPR_CODE_P (code))
+ 	return false;
+       if (supportable_convert_operation (code, vectype_out, vectype_in,
+ 					 &decl1, &code1))
+@@ -5452,7 +5460,7 @@ vectorizable_assignment (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+    either as shift by a scalar or by a vector.  */
+ 
+ bool
+-vect_supportable_shift (enum tree_code code, tree scalar_type)
++vect_supportable_shift (vec_info *vinfo, enum tree_code code, tree scalar_type)
+ {
+ 
+   machine_mode vec_mode;
+@@ -5460,7 +5468,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
+   int icode;
+   tree vectype;
+ 
+-  vectype = get_vectype_for_scalar_type (scalar_type);
++  vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+   if (!vectype)
+     return false;
+ 
+@@ -5491,7 +5499,7 @@ vect_supportable_shift (enum tree_code code, tree scalar_type)
+    stmt to replace it, put it in VEC_STMT, and insert it at GSI.
+    Return true if STMT_INFO is vectorizable in this way.  */
+ 
+-bool
++static bool
+ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		    stmt_vec_info *vec_stmt, slp_tree slp_node,
+ 		    stmt_vector_for_cost *cost_vec)
+@@ -5524,6 +5532,7 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   bool scalar_shift_arg = true;
+   bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
+   vec_info *vinfo = stmt_info->vinfo;
++  bool incompatible_op1_vectype_p = false;
+ 
+   if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
+     return false;
+@@ -5565,10 +5574,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                          "use not simple.\n");
+       return false;
+     }
+-  /* If op0 is an external or constant def use a vector type with
+-     the same size as the output vector type.  */
++  /* If op0 is an external or constant def, infer the vector type
++     from the scalar type.  */
+   if (!vectype)
+-    vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
++    vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
+   if (vec_stmt)
+     gcc_assert (vectype);
+   if (!vectype)
+@@ -5666,9 +5675,16 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                          "vector/vector shift/rotate found.\n");
+ 
+       if (!op1_vectype)
+-	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
+-      if (op1_vectype == NULL_TREE
+-	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
++	op1_vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op1));
++      incompatible_op1_vectype_p
++	= (op1_vectype == NULL_TREE
++	   || maybe_ne (TYPE_VECTOR_SUBPARTS (op1_vectype),
++			TYPE_VECTOR_SUBPARTS (vectype))
++	   || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype));
++      if (incompatible_op1_vectype_p
++	  && (!slp_node
++	      || SLP_TREE_DEF_TYPE
++		   (SLP_TREE_CHILDREN (slp_node)[1]) != vect_constant_def))
+ 	{
+ 	  if (dump_enabled_p ())
+ 	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+@@ -5707,7 +5723,10 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                  so make sure the scalar is the right type if we are
+ 		 dealing with vectors of long long/long/short/char.  */
+               if (dt[1] == vect_constant_def)
+-                op1 = fold_convert (TREE_TYPE (vectype), op1);
++		{
++		  if (!slp_node)
++		    op1 = fold_convert (TREE_TYPE (vectype), op1);
++		}
+ 	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
+ 						   TREE_TYPE (op1)))
+ 		{
+@@ -5818,6 +5837,21 @@ vectorizable_shift (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                     }
+                 }
+             }
++	  else if (slp_node && incompatible_op1_vectype_p)
++	    {
++	      /* Convert the scalar constant shift amounts in-place.  */
++	      slp_tree shift = SLP_TREE_CHILDREN (slp_node)[1];
++	      gcc_assert (SLP_TREE_DEF_TYPE (shift) == vect_constant_def);
++	      for (unsigned i = 0;
++		   i < SLP_TREE_SCALAR_OPS (shift).length (); ++i)
++		{
++		  SLP_TREE_SCALAR_OPS (shift)[i]
++		    = fold_convert (TREE_TYPE (vectype),
++				    SLP_TREE_SCALAR_OPS (shift)[i]);
++		  gcc_assert ((TREE_CODE (SLP_TREE_SCALAR_OPS (shift)[i])
++			       == INTEGER_CST));
++		}
++	    }
+ 
+           /* vec_oprnd1 is available if operand 1 should be of a scalar-type
+              (a special case for certain kind of vector shifts); otherwise,
+@@ -5894,7 +5928,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   poly_uint64 nunits_in;
+   poly_uint64 nunits_out;
+   tree vectype_out;
+-  int ncopies;
++  int ncopies, vec_num;
+   int j, i;
+   vec<tree> vec_oprnds0 = vNULL;
+   vec<tree> vec_oprnds1 = vNULL;
+@@ -5964,8 +5998,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+                          "use not simple.\n");
+       return false;
+     }
+-  /* If op0 is an external or constant def use a vector type with
+-     the same size as the output vector type.  */
++  /* If op0 is an external or constant def, infer the vector type
++     from the scalar type.  */
+   if (!vectype)
+     {
+       /* For boolean type we cannot determine vectype by
+@@ -5985,7 +6019,7 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  vectype = vectype_out;
+ 	}
+       else
+-	vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
++	vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (op0));
+     }
+   if (vec_stmt)
+     gcc_assert (vectype);
+@@ -6031,9 +6065,15 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+      vectorized stmts for each SLP node.  Hence, NCOPIES is always 1 in
+      case of SLP.  */
+   if (slp_node)
+-    ncopies = 1;
++    {
++      ncopies = 1;
++      vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
++    }
+   else
+-    ncopies = vect_get_num_copies (loop_vinfo, vectype);
++    {
++      ncopies = vect_get_num_copies (loop_vinfo, vectype);
++      vec_num = 1;
++    }
+ 
+   gcc_assert (ncopies >= 1);
+ 
+@@ -6086,8 +6126,34 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       return false;
+     }
+ 
++  int reduc_idx = STMT_VINFO_REDUC_IDX (stmt_info);
++  vec_loop_masks *masks = (loop_vinfo ? &LOOP_VINFO_MASKS (loop_vinfo) : NULL);
++  internal_fn cond_fn = get_conditional_internal_fn (code);
++
+   if (!vec_stmt) /* transformation not required.  */
+     {
++      /* If this operation is part of a reduction, a fully-masked loop
++	 should only change the active lanes of the reduction chain,
++	 keeping the inactive lanes as-is.  */
++      if (loop_vinfo
++	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
++	  && reduc_idx >= 0)
++	{
++	  if (cond_fn == IFN_LAST
++	      || !direct_internal_fn_supported_p (cond_fn, vectype,
++						  OPTIMIZE_FOR_SPEED))
++	    {
++	      if (dump_enabled_p ())
++		dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++				 "can't use a fully-masked loop because no"
++				 " conditional operation is available.\n");
++	      LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
++	    }
++	  else
++	    vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
++				   vectype, NULL);
++	}
++
+       STMT_VINFO_TYPE (stmt_info) = op_vec_info_type;
+       DUMP_VECT_SCOPE ("vectorizable_operation");
+       vect_model_simple_cost (stmt_info, ncopies, dt, ndts, slp_node, cost_vec);
+@@ -6100,6 +6166,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+     dump_printf_loc (MSG_NOTE, vect_location,
+                      "transform binary/unary operation.\n");
+ 
++  bool masked_loop_p = loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
++
+   /* POINTER_DIFF_EXPR has pointer arguments which are vectorized as
+      vectors with unsigned elements, but the result is signed.  So, we
+      need to compute the MINUS_EXPR into vectype temporary and
+@@ -6180,12 +6248,8 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	    {
+ 	      if (slp_node)
+ 		{
+-		  auto_vec<tree> ops(3);
+-		  ops.quick_push (op0);
+-		  ops.quick_push (op1);
+-		  ops.quick_push (op2);
+ 		  auto_vec<vec<tree> > vec_defs(3);
+-		  vect_get_slp_defs (ops, slp_node, &vec_defs);
++		  vect_get_slp_defs (slp_node, &vec_defs);
+ 		  vec_oprnds0 = vec_defs[0];
+ 		  vec_oprnds1 = vec_defs[1];
+ 		  vec_oprnds2 = vec_defs[2];
+@@ -6221,22 +6285,41 @@ vectorizable_operation (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		  ? vec_oprnds1[i] : NULL_TREE);
+ 	  vop2 = ((op_type == ternary_op)
+ 		  ? vec_oprnds2[i] : NULL_TREE);
+-	  gassign *new_stmt = gimple_build_assign (vec_dest, code,
+-						   vop0, vop1, vop2);
+-	  new_temp = make_ssa_name (vec_dest, new_stmt);
+-	  gimple_assign_set_lhs (new_stmt, new_temp);
+-	  new_stmt_info
+-	    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+-	  if (vec_cvt_dest)
++	  if (masked_loop_p && reduc_idx >= 0)
+ 	    {
+-	      new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
+-	      gassign *new_stmt
+-		= gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
+-				       new_temp);
+-	      new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
++	      /* Perform the operation on active elements only and take
++		 inactive elements from the reduction chain input.  */
++	      gcc_assert (!vop2);
++	      vop2 = reduc_idx == 1 ? vop1 : vop0;
++	      tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
++					      vectype, i * ncopies + j);
++	      gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
++							vop0, vop1, vop2);
++	      new_temp = make_ssa_name (vec_dest, call);
++	      gimple_call_set_lhs (call, new_temp);
++	      gimple_call_set_nothrow (call, true);
++	      new_stmt_info
++		= vect_finish_stmt_generation (stmt_info, call, gsi);
++	    }
++	  else
++	    {
++	      gassign *new_stmt = gimple_build_assign (vec_dest, code,
++						       vop0, vop1, vop2);
++	      new_temp = make_ssa_name (vec_dest, new_stmt);
+ 	      gimple_assign_set_lhs (new_stmt, new_temp);
+ 	      new_stmt_info
+ 		= vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++	      if (vec_cvt_dest)
++		{
++		  new_temp = build1 (VIEW_CONVERT_EXPR, vectype_out, new_temp);
++		  gassign *new_stmt
++		    = gimple_build_assign (vec_cvt_dest, VIEW_CONVERT_EXPR,
++					   new_temp);
++		  new_temp = make_ssa_name (vec_cvt_dest, new_stmt);
++		  gimple_assign_set_lhs (new_stmt, new_temp);
++		  new_stmt_info
++		    = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++		}
+ 	    }
+           if (slp_node)
+ 	    SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
+@@ -6517,7 +6600,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       if (loop_vinfo
+ 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ 	check_load_store_masking (loop_vinfo, vectype, vls_type, group_size,
+-				  memory_access_type, &gs_info);
++				  memory_access_type, &gs_info, mask);
+ 
+       STMT_VINFO_TYPE (stmt_info) = store_vec_info_type;
+       vect_model_store_cost (stmt_info, ncopies, rhs_dt, memory_access_type,
+@@ -6580,8 +6663,7 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  ncopies *= 2;
+ 
+ 	  if (mask)
+-	    mask_halfvectype
+-	      = build_same_sized_truth_vector_type (gs_info.offset_vectype);
++	    mask_halfvectype = truth_type_for (gs_info.offset_vectype);
+ 	}
+       else
+ 	gcc_unreachable ();
+@@ -6840,9 +6922,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		 of vector elts directly.  */
+ 	      scalar_mode elmode = SCALAR_TYPE_MODE (elem_type);
+ 	      machine_mode vmode;
+-	      if (!mode_for_vector (elmode, group_size).exists (&vmode)
+-		  || !VECTOR_MODE_P (vmode)
+-		  || !targetm.vector_mode_supported_p (vmode)
++	      if (!related_vector_mode (TYPE_MODE (vectype), elmode,
++					group_size).exists (&vmode)
+ 		  || (convert_optab_handler (vec_extract_optab,
+ 					     TYPE_MODE (vectype), vmode)
+ 		      == CODE_FOR_nothing))
+@@ -6859,9 +6940,8 @@ vectorizable_store (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		     element extracts from the original vector type and
+ 		     element size stores.  */
+ 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+-		      && mode_for_vector (elmode, lnunits).exists (&vmode)
+-		      && VECTOR_MODE_P (vmode)
+-		      && targetm.vector_mode_supported_p (vmode)
++		      && related_vector_mode (TYPE_MODE (vectype), elmode,
++					      lnunits).exists (&vmode)
+ 		      && (convert_optab_handler (vec_extract_optab,
+ 						 vmode, elmode)
+ 			  != CODE_FOR_nothing))
+@@ -7624,14 +7704,6 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       if (!scalar_dest)
+ 	return false;
+ 
+-      if (slp_node != NULL)
+-	{
+-	  if (dump_enabled_p ())
+-	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+-			     "SLP of masked loads not supported.\n");
+-	  return false;
+-	}
+-
+       int mask_index = internal_fn_mask_index (ifn);
+       if (mask_index >= 0)
+ 	{
+@@ -7714,6 +7786,15 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       first_stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
+       group_size = DR_GROUP_SIZE (first_stmt_info);
+ 
++      /* Refuse non-SLP vectorization of SLP-only groups.  */
++      if (!slp && STMT_VINFO_SLP_VECT_ONLY (first_stmt_info))
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "cannot vectorize load in non-SLP mode.\n");
++	  return false;
++	}
++
+       if (slp && SLP_TREE_LOAD_PERMUTATION (slp_node).exists ())
+ 	slp_perm = true;
+ 
+@@ -7767,7 +7848,7 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       if (loop_vinfo
+ 	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
+ 	check_load_store_masking (loop_vinfo, vectype, VLS_LOAD, group_size,
+-				  memory_access_type, &gs_info);
++				  memory_access_type, &gs_info, mask);
+ 
+       STMT_VINFO_TYPE (stmt_info) = load_vec_info_type;
+       vect_model_load_cost (stmt_info, ncopies, memory_access_type,
+@@ -7947,9 +8028,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		 vector elts directly.  */
+ 	      scalar_mode elmode = SCALAR_TYPE_MODE (TREE_TYPE (vectype));
+ 	      machine_mode vmode;
+-	      if (mode_for_vector (elmode, group_size).exists (&vmode)
+-		  && VECTOR_MODE_P (vmode)
+-		  && targetm.vector_mode_supported_p (vmode)
++	      if (related_vector_mode (TYPE_MODE (vectype), elmode,
++				       group_size).exists (&vmode)
+ 		  && (convert_optab_handler (vec_init_optab,
+ 					     TYPE_MODE (vectype), vmode)
+ 		      != CODE_FOR_nothing))
+@@ -7973,9 +8053,8 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		  /* If we can't construct such a vector fall back to
+ 		     element loads of the original vector type.  */
+ 		  if (int_mode_for_size (lsize, 0).exists (&elmode)
+-		      && mode_for_vector (elmode, lnunits).exists (&vmode)
+-		      && VECTOR_MODE_P (vmode)
+-		      && targetm.vector_mode_supported_p (vmode)
++		      && related_vector_mode (TYPE_MODE (vectype), elmode,
++					      lnunits).exists (&vmode)
+ 		      && (convert_optab_handler (vec_init_optab, vmode, elmode)
+ 			  != CODE_FOR_nothing))
+ 		    {
+@@ -8413,8 +8492,17 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 					  simd_lane_access_p,
+ 					  byte_offset, bump);
+ 	  if (mask)
+-	    vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
+-						     mask_vectype);
++	    {
++	      if (slp_node)
++		{
++		  auto_vec<vec<tree> > vec_defs (1);
++		  vect_get_slp_defs (slp_node, &vec_defs);
++		  vec_mask = vec_defs[0][0];
++		}
++	      else
++		vec_mask = vect_get_vec_def_for_operand (mask, stmt_info,
++							 mask_vectype);
++	    }
+ 	}
+       else
+ 	{
+@@ -8564,8 +8652,25 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		      }
+ 		    else
+ 		      {
++			tree ltype = vectype;
++			/* If there's no peeling for gaps but we have a gap
++			   with slp loads then load the lower half of the
++			   vector only.  See get_group_load_store_type for
++			   when we apply this optimization.  */
++			if (slp
++			    && loop_vinfo
++			    && !LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
++			    && DR_GROUP_GAP (first_stmt_info) != 0
++			    && known_eq (nunits,
++					 (group_size
++					  - DR_GROUP_GAP (first_stmt_info)) * 2)
++			    && known_eq (nunits, group_size))
++			  ltype = build_vector_type (TREE_TYPE (vectype),
++						     (group_size
++						      - DR_GROUP_GAP
++						          (first_stmt_info)));
+ 			data_ref
+-			  = fold_build2 (MEM_REF, vectype, dataref_ptr,
++			  = fold_build2 (MEM_REF, ltype, dataref_ptr,
+ 					 dataref_offset
+ 					 ? dataref_offset
+ 					 : build_int_cst (ref_type, 0));
+@@ -8579,6 +8684,23 @@ vectorizable_load (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 			  TREE_TYPE (data_ref)
+ 			    = build_aligned_type (TREE_TYPE (data_ref),
+ 						  TYPE_ALIGN (elem_type));
++			if (ltype != vectype)
++			  {
++			    vect_copy_ref_info (data_ref, DR_REF (first_dr_info->dr));
++			    tree tem = make_ssa_name (ltype);
++			    new_stmt = gimple_build_assign (tem, data_ref);
++			    vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++			    data_ref = NULL;
++			    vec<constructor_elt, va_gc> *v;
++			    vec_alloc (v, 2);
++			    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, tem);
++			    CONSTRUCTOR_APPEND_ELT (v, NULL_TREE,
++						    build_zero_cst (ltype));
++			    new_stmt
++			      = gimple_build_assign (vec_dest,
++						     build_constructor
++						       (vectype, v));
++			  }
+ 		      }
+ 		    break;
+ 		  }
+@@ -8864,7 +8986,7 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
+ 	scalar_type = build_nonstandard_integer_type
+ 	  (tree_to_uhwi (TYPE_SIZE (TREE_TYPE (vectype))),
+ 	   TYPE_UNSIGNED (scalar_type));
+-      *comp_vectype = get_vectype_for_scalar_type (scalar_type);
++      *comp_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+     }
+ 
+   return true;
+@@ -8881,9 +9003,9 @@ vect_is_simple_cond (tree cond, vec_info *vinfo,
+ 
+    Return true if STMT_INFO is vectorizable in this way.  */
+ 
+-bool
++static bool
+ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+-			stmt_vec_info *vec_stmt, bool for_reduction,
++			stmt_vec_info *vec_stmt,
+ 			slp_tree slp_node, stmt_vector_for_cost *cost_vec)
+ {
+   vec_info *vinfo = stmt_info->vinfo;
+@@ -8913,22 +9035,39 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   tree vec_cmp_type;
+   bool masked = false;
+ 
+-  if (for_reduction && STMT_SLP_TYPE (stmt_info))
++  if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
++    return false;
++
++  /* Is vectorizable conditional operation?  */
++  gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
++  if (!stmt)
++    return false;
++
++  code = gimple_assign_rhs_code (stmt);
++  if (code != COND_EXPR)
+     return false;
+ 
+-  vect_reduction_type reduction_type
+-    = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
+-  if (reduction_type == TREE_CODE_REDUCTION)
++  stmt_vec_info reduc_info = NULL;
++  int reduc_index = -1;
++  vect_reduction_type reduction_type = TREE_CODE_REDUCTION;
++  bool for_reduction
++    = STMT_VINFO_REDUC_DEF (vect_orig_stmt (stmt_info)) != NULL;
++  if (for_reduction)
+     {
+-      if (!STMT_VINFO_RELEVANT_P (stmt_info) && !bb_vinfo)
++      if (STMT_SLP_TYPE (stmt_info))
+ 	return false;
+-
+-      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def
+-	  && !(STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle
+-	       && for_reduction))
++      reduc_info = info_for_reduction (stmt_info);
++      reduction_type = STMT_VINFO_REDUC_TYPE (reduc_info);
++      reduc_index = STMT_VINFO_REDUC_IDX (stmt_info);
++      gcc_assert (reduction_type != EXTRACT_LAST_REDUCTION
++		  || reduc_index != -1);
++    }
++  else
++    {
++      if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_internal_def)
+ 	return false;
+ 
+-      /* FORNOW: not yet supported.  */
++      /* FORNOW: only supported as part of a reduction.  */
+       if (STMT_VINFO_LIVE_P (stmt_info))
+ 	{
+ 	  if (dump_enabled_p ())
+@@ -8938,16 +9077,6 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	}
+     }
+ 
+-  /* Is vectorizable conditional operation?  */
+-  gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
+-  if (!stmt)
+-    return false;
+-
+-  code = gimple_assign_rhs_code (stmt);
+-
+-  if (code != COND_EXPR)
+-    return false;
+-
+   tree vectype = STMT_VINFO_VECTYPE (stmt_info);
+   tree vectype1 = NULL_TREE, vectype2 = NULL_TREE;
+ 
+@@ -8981,7 +9110,7 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+     return false;
+ 
+   masked = !COMPARISON_CLASS_P (cond_expr);
+-  vec_cmp_type = build_same_sized_truth_vector_type (comp_vectype);
++  vec_cmp_type = truth_type_for (comp_vectype);
+ 
+   if (vec_cmp_type == NULL_TREE)
+     return false;
+@@ -8993,6 +9122,29 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       cond_expr1 = TREE_OPERAND (cond_expr, 1);
+     }
+ 
++  /* For conditional reductions, the "then" value needs to be the candidate
++     value calculated by this iteration while the "else" value needs to be
++     the result carried over from previous iterations.  If the COND_EXPR
++     is the other way around, we need to swap it.  */
++  bool must_invert_cmp_result = false;
++  if (reduction_type == EXTRACT_LAST_REDUCTION && reduc_index == 1)
++    {
++      if (masked)
++	must_invert_cmp_result = true;
++      else
++	{
++	  bool honor_nans = HONOR_NANS (TREE_TYPE (cond_expr0));
++	  tree_code new_code = invert_tree_comparison (cond_code, honor_nans);
++	  if (new_code == ERROR_MARK)
++	    must_invert_cmp_result = true;
++	  else
++	    cond_code = new_code;
++	}
++      /* Make sure we don't accidentally use the old condition.  */
++      cond_expr = NULL_TREE;
++      std::swap (then_clause, else_clause);
++    }
++
+   if (!masked && VECTOR_BOOLEAN_TYPE_P (comp_vectype))
+     {
+       /* Boolean values may have another representation in vectors
+@@ -9053,6 +9205,16 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		return false;
+ 	    }
+ 	}
++      if (loop_vinfo
++	  && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
++	  && reduction_type == EXTRACT_LAST_REDUCTION)
++	{
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
++			     "can't yet use a fully-masked loop for"
++			     " EXTRACT_LAST_REDUCTION.\n");
++	  LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
++	}
+       if (expand_vec_cond_expr_p (vectype, comp_vectype,
+ 				     cond_code))
+ 	{
+@@ -9082,24 +9244,42 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   /* Handle cond expr.  */
+   for (j = 0; j < ncopies; j++)
+     {
++      tree loop_mask = NULL_TREE;
++      bool swap_cond_operands = false;
++
++      /* See whether another part of the vectorized code applies a loop
++	 mask to the condition, or to its inverse.  */
++
++      if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
++	{
++	  scalar_cond_masked_key cond (cond_expr, ncopies);
++	  if (loop_vinfo->scalar_cond_masked_set.contains (cond))
++	    {
++	      vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
++	      loop_mask = vect_get_loop_mask (gsi, masks, ncopies, vectype, j);
++	    }
++	  else
++	    {
++	      bool honor_nans = HONOR_NANS (TREE_TYPE (cond.op0));
++	      cond.code = invert_tree_comparison (cond.code, honor_nans);
++	      if (loop_vinfo->scalar_cond_masked_set.contains (cond))
++		{
++		  vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
++		  loop_mask = vect_get_loop_mask (gsi, masks, ncopies,
++						  vectype, j);
++		  cond_code = cond.code;
++		  swap_cond_operands = true;
++		}
++	    }
++	}
++
+       stmt_vec_info new_stmt_info = NULL;
+       if (j == 0)
+ 	{
+           if (slp_node)
+             {
+-              auto_vec<tree, 4> ops;
+ 	      auto_vec<vec<tree>, 4> vec_defs;
+-
+-	      if (masked)
+-		ops.safe_push (cond_expr);
+-	      else
+-		{
+-		  ops.safe_push (cond_expr0);
+-		  ops.safe_push (cond_expr1);
+-		}
+-              ops.safe_push (then_clause);
+-              ops.safe_push (else_clause);
+-              vect_get_slp_defs (ops, slp_node, &vec_defs);
++              vect_get_slp_defs (slp_node, &vec_defs);
+ 	      vec_oprnds3 = vec_defs.pop ();
+ 	      vec_oprnds2 = vec_defs.pop ();
+ 	      if (!masked)
+@@ -9159,6 +9339,9 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+           vec_then_clause = vec_oprnds2[i];
+           vec_else_clause = vec_oprnds3[i];
+ 
++	  if (swap_cond_operands)
++	    std::swap (vec_then_clause, vec_else_clause);
++
+ 	  if (masked)
+ 	    vec_compare = vec_cond_lhs;
+ 	  else
+@@ -9197,6 +9380,50 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		    }
+ 		}
+ 	    }
++
++	  /* If we decided to apply a loop mask to the result of the vector
++             comparison, AND the comparison with the mask now.  Later passes
++             should then be able to reuse the AND results between mulitple
++             vector statements.
++
++	     For example:
++	     for (int i = 0; i < 100; ++i)
++	       x[i] = y[i] ? z[i] : 10;
++
++	     results in following optimized GIMPLE:
++
++	     mask__35.8_43 = vect__4.7_41 != { 0, ... };
++	     vec_mask_and_46 = loop_mask_40 & mask__35.8_43;
++	     _19 = &MEM[base: z_12(D), index: ivtmp_56, step: 4, offset: 0B];
++	     vect_iftmp.11_47 = .MASK_LOAD (_19, 4B, vec_mask_and_46);
++	     vect_iftmp.12_52 = VEC_COND_EXPR <vec_mask_and_46,
++					       vect_iftmp.11_47, { 10, ... }>;
++
++	     instead of using a masked and unmasked forms of
++	     vec != { 0, ... } (masked in the MASK_LOAD,
++	     unmasked in the VEC_COND_EXPR).  */
++
++	  if (loop_mask)
++	    {
++	      if (COMPARISON_CLASS_P (vec_compare))
++		{
++		  tree tmp = make_ssa_name (vec_cmp_type);
++		  tree op0 = TREE_OPERAND (vec_compare, 0);
++		  tree op1 = TREE_OPERAND (vec_compare, 1);
++		  gassign *g = gimple_build_assign (tmp,
++						    TREE_CODE (vec_compare),
++						    op0, op1);
++		  vect_finish_stmt_generation (stmt_info, g, gsi);
++		  vec_compare = tmp;
++		}
++
++	      tree tmp2 = make_ssa_name (vec_cmp_type);
++	      gassign *g = gimple_build_assign (tmp2, BIT_AND_EXPR,
++						vec_compare, loop_mask);
++	      vect_finish_stmt_generation (stmt_info, g, gsi);
++	      vec_compare = tmp2;
++	    }
++
+ 	  if (reduction_type == EXTRACT_LAST_REDUCTION)
+ 	    {
+ 	      if (!is_gimple_val (vec_compare))
+@@ -9207,6 +9434,15 @@ vectorizable_condition (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
+ 		  vec_compare = vec_compare_name;
+ 		}
++	      if (must_invert_cmp_result)
++		{
++		  tree vec_compare_name = make_ssa_name (vec_cmp_type);
++		  gassign *new_stmt = gimple_build_assign (vec_compare_name,
++							   BIT_NOT_EXPR,
++							   vec_compare);
++		  vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
++		  vec_compare = vec_compare_name;
++		}
+ 	      gcall *new_stmt = gimple_build_call_internal
+ 		(IFN_FOLD_EXTRACT_LAST, 3, else_clause, vec_compare,
+ 		 vec_then_clause);
+@@ -9345,7 +9581,7 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+   /* Invariant comparison.  */
+   if (!vectype)
+     {
+-      vectype = get_vectype_for_scalar_type (TREE_TYPE (rhs1));
++      vectype = get_vectype_for_scalar_type (vinfo, TREE_TYPE (rhs1));
+       if (maybe_ne (TYPE_VECTOR_SUBPARTS (vectype), nunits))
+ 	return false;
+     }
+@@ -9446,12 +9682,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	{
+ 	  if (slp_node)
+ 	    {
+-	      auto_vec<tree, 2> ops;
+ 	      auto_vec<vec<tree>, 2> vec_defs;
+-
+-	      ops.safe_push (rhs1);
+-	      ops.safe_push (rhs2);
+-	      vect_get_slp_defs (ops, slp_node, &vec_defs);
++	      vect_get_slp_defs (slp_node, &vec_defs);
+ 	      vec_oprnds1 = vec_defs.pop ();
+ 	      vec_oprnds0 = vec_defs.pop ();
+ 	      if (swap_p)
+@@ -9544,7 +9776,8 @@ vectorizable_comparison (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 
+ static bool
+ can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+-			  slp_tree slp_node, stmt_vec_info *vec_stmt,
++			  slp_tree slp_node, slp_instance slp_node_instance,
++			  stmt_vec_info *vec_stmt,
+ 			  stmt_vector_for_cost *cost_vec)
+ {
+   if (slp_node)
+@@ -9554,13 +9787,15 @@ can_vectorize_live_stmts (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (slp_node), i, slp_stmt_info)
+ 	{
+ 	  if (STMT_VINFO_LIVE_P (slp_stmt_info)
+-	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node, i,
++	      && !vectorizable_live_operation (slp_stmt_info, gsi, slp_node,
++					       slp_node_instance, i,
+ 					       vec_stmt, cost_vec))
+ 	    return false;
+ 	}
+     }
+   else if (STMT_VINFO_LIVE_P (stmt_info)
+-	   && !vectorizable_live_operation (stmt_info, gsi, slp_node, -1,
++	   && !vectorizable_live_operation (stmt_info, gsi, slp_node,
++					    slp_node_instance, -1,
+ 					    vec_stmt, cost_vec))
+     return false;
+ 
+@@ -9736,14 +9971,13 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
+ 	  || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
+ 				cost_vec)
+ 	  || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
+-	  || vectorizable_reduction (stmt_info, NULL, NULL, node,
+-				     node_instance, cost_vec)
++	  || vectorizable_reduction (stmt_info, node, node_instance, cost_vec)
+ 	  || vectorizable_induction (stmt_info, NULL, NULL, node, cost_vec)
+ 	  || vectorizable_shift (stmt_info, NULL, NULL, node, cost_vec)
+-	  || vectorizable_condition (stmt_info, NULL, NULL, false, node,
+-				     cost_vec)
++	  || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
+ 	  || vectorizable_comparison (stmt_info, NULL, NULL, node,
+-				      cost_vec));
++				      cost_vec)
++	  || vectorizable_lc_phi (stmt_info, NULL, node));
+   else
+     {
+       if (bb_vinfo)
+@@ -9759,8 +9993,7 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
+ 	      || vectorizable_load (stmt_info, NULL, NULL, node, node_instance,
+ 				    cost_vec)
+ 	      || vectorizable_store (stmt_info, NULL, NULL, node, cost_vec)
+-	      || vectorizable_condition (stmt_info, NULL, NULL, false, node,
+-					 cost_vec)
++	      || vectorizable_condition (stmt_info, NULL, NULL, node, cost_vec)
+ 	      || vectorizable_comparison (stmt_info, NULL, NULL, node,
+ 					  cost_vec));
+     }
+@@ -9775,7 +10008,9 @@ vect_analyze_stmt (stmt_vec_info stmt_info, bool *need_to_vectorize,
+       need extra handling, except for vectorizable reductions.  */
+   if (!bb_vinfo
+       && STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type
+-      && !can_vectorize_live_stmts (stmt_info, NULL, node, NULL, cost_vec))
++      && STMT_VINFO_TYPE (stmt_info) != lc_phi_info_type
++      && !can_vectorize_live_stmts (stmt_info, NULL, node, node_instance,
++				    NULL, cost_vec))
+     return opt_result::failure_at (stmt_info->stmt,
+ 				   "not vectorized:"
+ 				   " live stmt not supported: %G",
+@@ -9864,8 +10099,7 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       break;
+ 
+     case condition_vec_info_type:
+-      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, false,
+-				     slp_node, NULL);
++      done = vectorizable_condition (stmt_info, gsi, &vec_stmt, slp_node, NULL);
+       gcc_assert (done);
+       break;
+ 
+@@ -9887,8 +10121,18 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+       break;
+ 
+     case reduc_vec_info_type:
+-      done = vectorizable_reduction (stmt_info, gsi, &vec_stmt, slp_node,
+-				     slp_node_instance, NULL);
++      done = vect_transform_reduction (stmt_info, gsi, &vec_stmt, slp_node);
++      gcc_assert (done);
++      break;
++
++    case cycle_phi_info_type:
++      done = vect_transform_cycle_phi (stmt_info, &vec_stmt, slp_node,
++				       slp_node_instance);
++      gcc_assert (done);
++      break;
++
++    case lc_phi_info_type:
++      done = vectorizable_lc_phi (stmt_info, &vec_stmt, slp_node);
+       gcc_assert (done);
+       break;
+ 
+@@ -9944,19 +10188,66 @@ vect_transform_stmt (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
+ 	  }
+     }
+ 
+-  /* Handle stmts whose DEF is used outside the loop-nest that is
+-     being vectorized.  */
+-  if (STMT_VINFO_TYPE (stmt_info) != reduc_vec_info_type)
+-    {
+-      done = can_vectorize_live_stmts (stmt_info, gsi, slp_node, &vec_stmt,
+-				       NULL);
+-      gcc_assert (done);
+-    }
+-
+   if (vec_stmt)
+     STMT_VINFO_VEC_STMT (stmt_info) = vec_stmt;
+ 
+-  return is_store;
++  if (STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
++    return is_store;
++
++  /* If this stmt defines a value used on a backedge, update the
++     vectorized PHIs.  */
++  stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
++  stmt_vec_info reduc_info;
++  if (STMT_VINFO_REDUC_DEF (orig_stmt_info)
++      && vect_stmt_to_vectorize (orig_stmt_info) == stmt_info
++      && (reduc_info = info_for_reduction (orig_stmt_info))
++      && STMT_VINFO_REDUC_TYPE (reduc_info) != FOLD_LEFT_REDUCTION
++      && STMT_VINFO_REDUC_TYPE (reduc_info) != EXTRACT_LAST_REDUCTION)
++    {
++      gphi *phi;
++      if (!slp_node
++	  && (phi = dyn_cast <gphi *>
++		      (STMT_VINFO_REDUC_DEF (orig_stmt_info)->stmt))
++	  && dominated_by_p (CDI_DOMINATORS,
++			     gimple_bb (orig_stmt_info->stmt), gimple_bb (phi)))
++	{
++	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
++	  stmt_vec_info phi_info
++	    = STMT_VINFO_VEC_STMT (STMT_VINFO_REDUC_DEF (orig_stmt_info));
++	  stmt_vec_info vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
++	  do
++	    {
++	      add_phi_arg (as_a <gphi *> (phi_info->stmt),
++			   gimple_get_lhs (vec_stmt->stmt), e,
++			   gimple_phi_arg_location (phi, e->dest_idx));
++	      phi_info = STMT_VINFO_RELATED_STMT (phi_info);
++	      vec_stmt = STMT_VINFO_RELATED_STMT (vec_stmt);
++	    }
++	  while (phi_info);
++	  gcc_assert (!vec_stmt);
++	}
++      else if (slp_node
++	       && slp_node != slp_node_instance->reduc_phis)
++	{
++	  slp_tree phi_node = slp_node_instance->reduc_phis;
++	  gphi *phi = as_a <gphi *> (SLP_TREE_SCALAR_STMTS (phi_node)[0]->stmt);
++	  edge e = loop_latch_edge (gimple_bb (phi)->loop_father);
++	  gcc_assert (SLP_TREE_VEC_STMTS (phi_node).length ()
++		      == SLP_TREE_VEC_STMTS (slp_node).length ());
++	  for (unsigned i = 0; i < SLP_TREE_VEC_STMTS (phi_node).length (); ++i)
++	    add_phi_arg (as_a <gphi *> (SLP_TREE_VEC_STMTS (phi_node)[i]->stmt),
++			 gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[i]->stmt),
++			 e, gimple_phi_arg_location (phi, e->dest_idx));
++	}
++    }
++
++  /* Handle stmts whose DEF is used outside the loop-nest that is
++     being vectorized.  */
++  done = can_vectorize_live_stmts (stmt_info, gsi, slp_node,
++				   slp_node_instance, &vec_stmt, NULL);
++  gcc_assert (done);
++
++  return false;
+ }
+ 
+ 
+@@ -9979,18 +10270,28 @@ vect_remove_stores (stmt_vec_info first_stmt_info)
+     }
+ }
+ 
+-/* Function get_vectype_for_scalar_type_and_size.
++/* If NUNITS is nonzero, return a vector type that contains NUNITS
++   elements of type SCALAR_TYPE, or null if the target doesn't support
++   such a type.
+ 
+-   Returns the vector type corresponding to SCALAR_TYPE  and SIZE as supported
+-   by the target.  */
++   If NUNITS is zero, return a vector type that contains elements of
++   type SCALAR_TYPE, choosing whichever vector size the target prefers.
++
++   If PREVAILING_MODE is VOIDmode, we have not yet chosen a vector mode
++   for this vectorization region and want to "autodetect" the best choice.
++   Otherwise, PREVAILING_MODE is a previously-chosen vector TYPE_MODE
++   and we want the new type to be interoperable with it.   PREVAILING_MODE
++   in this case can be a scalar integer mode or a vector mode; when it
++   is a vector mode, the function acts like a tree-level version of
++   related_vector_mode.  */
+ 
+ tree
+-get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
++get_related_vectype_for_scalar_type (machine_mode prevailing_mode,
++				     tree scalar_type, poly_uint64 nunits)
+ {
+   tree orig_scalar_type = scalar_type;
+   scalar_mode inner_mode;
+   machine_mode simd_mode;
+-  poly_uint64 nunits;
+   tree vectype;
+ 
+   if (!is_int_mode (TYPE_MODE (scalar_type), &inner_mode)
+@@ -10030,19 +10331,45 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
+   if (scalar_type == NULL_TREE)
+     return NULL_TREE;
+ 
+-  /* If no size was supplied use the mode the target prefers.   Otherwise
+-     lookup a vector mode of the specified size.  */
+-  if (known_eq (size, 0U))
+-    simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
+-  else if (!multiple_p (size, nbytes, &nunits)
+-	   || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
+-    return NULL_TREE;
+-  /* NOTE: nunits == 1 is allowed to support single element vector types.  */
+-  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits))
+-    return NULL_TREE;
++  /* If no prevailing mode was supplied, use the mode the target prefers.
++     Otherwise lookup a vector mode based on the prevailing mode.  */
++  if (prevailing_mode == VOIDmode)
++    {
++      gcc_assert (known_eq (nunits, 0U));
++      simd_mode = targetm.vectorize.preferred_simd_mode (inner_mode);
++      if (SCALAR_INT_MODE_P (simd_mode))
++	{
++	  /* Traditional behavior is not to take the integer mode
++	     literally, but simply to use it as a way of determining
++	     the vector size.  It is up to mode_for_vector to decide
++	     what the TYPE_MODE should be.
++
++	     Note that nunits == 1 is allowed in order to support single
++	     element vector types.  */
++	  if (!multiple_p (GET_MODE_SIZE (simd_mode), nbytes, &nunits)
++	      || !mode_for_vector (inner_mode, nunits).exists (&simd_mode))
++	    return NULL_TREE;
++	}
++    }
++  else if (SCALAR_INT_MODE_P (prevailing_mode)
++	   || !related_vector_mode (prevailing_mode,
++				    inner_mode, nunits).exists (&simd_mode))
++    {
++      /* Fall back to using mode_for_vector, mostly in the hope of being
++	 able to use an integer mode.  */
++      if (known_eq (nunits, 0U)
++	  && !multiple_p (GET_MODE_SIZE (prevailing_mode), nbytes, &nunits))
++	return NULL_TREE;
+ 
+-  vectype = build_vector_type (scalar_type, nunits);
++      if (!mode_for_vector (inner_mode, nunits).exists (&simd_mode))
++	return NULL_TREE;
++    }
++
++  vectype = build_vector_type_for_mode (scalar_type, simd_mode);
+ 
++  /* In cases where the mode was chosen by mode_for_vector, check that
++     the target actually supports the chosen mode, or that it at least
++     allows the vector mode to be replaced by a like-sized integer.  */
+   if (!VECTOR_MODE_P (TYPE_MODE (vectype))
+       && !INTEGRAL_MODE_P (TYPE_MODE (vectype)))
+     return NULL_TREE;
+@@ -10056,22 +10383,22 @@ get_vectype_for_scalar_type_and_size (tree scalar_type, poly_uint64 size)
+   return vectype;
+ }
+ 
+-poly_uint64 current_vector_size;
+-
+ /* Function get_vectype_for_scalar_type.
+ 
+    Returns the vector type corresponding to SCALAR_TYPE as supported
+    by the target.  */
+ 
+ tree
+-get_vectype_for_scalar_type (tree scalar_type)
++get_vectype_for_scalar_type (vec_info *vinfo, tree scalar_type)
+ {
+-  tree vectype;
+-  vectype = get_vectype_for_scalar_type_and_size (scalar_type,
+-						  current_vector_size);
+-  if (vectype
+-      && known_eq (current_vector_size, 0U))
+-    current_vector_size = GET_MODE_SIZE (TYPE_MODE (vectype));
++  tree vectype = get_related_vectype_for_scalar_type (vinfo->vector_mode,
++						      scalar_type);
++  if (vectype && vinfo->vector_mode == VOIDmode)
++    vinfo->vector_mode = TYPE_MODE (vectype);
++
++  if (vectype)
++    vinfo->used_vector_modes.add (TYPE_MODE (vectype));
++
+   return vectype;
+ }
+ 
+@@ -10081,15 +10408,14 @@ get_vectype_for_scalar_type (tree scalar_type)
+    of vectors of specified SCALAR_TYPE as supported by target.  */
+ 
+ tree
+-get_mask_type_for_scalar_type (tree scalar_type)
++get_mask_type_for_scalar_type (vec_info *vinfo, tree scalar_type)
+ {
+-  tree vectype = get_vectype_for_scalar_type (scalar_type);
++  tree vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+ 
+   if (!vectype)
+     return NULL;
+ 
+-  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype),
+-				  current_vector_size);
++  return truth_type_for (vectype);
+ }
+ 
+ /* Function get_same_sized_vectype
+@@ -10101,10 +10427,29 @@ tree
+ get_same_sized_vectype (tree scalar_type, tree vector_type)
+ {
+   if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type))
+-    return build_same_sized_truth_vector_type (vector_type);
++    return truth_type_for (vector_type);
++
++  poly_uint64 nunits;
++  if (!multiple_p (GET_MODE_SIZE (TYPE_MODE (vector_type)),
++		   GET_MODE_SIZE (TYPE_MODE (scalar_type)), &nunits))
++    return NULL_TREE;
++
++  return get_related_vectype_for_scalar_type (TYPE_MODE (vector_type),
++					      scalar_type, nunits);
++}
++
++/* Return true if replacing LOOP_VINFO->vector_mode with VECTOR_MODE
++   would not change the chosen vector modes.  */
+ 
+-  return get_vectype_for_scalar_type_and_size
+-	   (scalar_type, GET_MODE_SIZE (TYPE_MODE (vector_type)));
++bool
++vect_chooses_same_modes_p (vec_info *vinfo, machine_mode vector_mode)
++{
++  for (vec_info::mode_set::iterator i = vinfo->used_vector_modes.begin ();
++       i != vinfo->used_vector_modes.end (); ++i)
++    if (!VECTOR_MODE_P (*i)
++	|| related_vector_mode (vector_mode, GET_MODE_INNER (*i), 0) != *i)
++      return false;
++  return true;
+ }
+ 
+ /* Function vect_is_simple_use.
+@@ -10492,11 +10837,8 @@ supportable_widening_operation (enum tree_code code, stmt_vec_info stmt_info,
+     {
+       intermediate_mode = insn_data[icode1].operand[0].mode;
+       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
+-	{
+-	  intermediate_type = vect_halve_mask_nunits (prev_type);
+-	  if (intermediate_mode != TYPE_MODE (intermediate_type))
+-	    return false;
+-	}
++	intermediate_type
++	  = vect_halve_mask_nunits (prev_type, intermediate_mode);
+       else
+ 	intermediate_type
+ 	  = lang_hooks.types.type_for_mode (intermediate_mode,
+@@ -10680,11 +11022,8 @@ supportable_narrowing_operation (enum tree_code code,
+     {
+       intermediate_mode = insn_data[icode1].operand[0].mode;
+       if (VECTOR_BOOLEAN_TYPE_P (prev_type))
+-	{
+-	  intermediate_type = vect_double_mask_nunits (prev_type);
+-	  if (intermediate_mode != TYPE_MODE (intermediate_type))
+-	    return false;
+-	}
++	intermediate_type
++	  = vect_double_mask_nunits (prev_type, intermediate_mode);
+       else
+ 	intermediate_type
+ 	  = lang_hooks.types.type_for_mode (intermediate_mode, uns);
+@@ -10777,6 +11116,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+ 				tree *stmt_vectype_out,
+ 				tree *nunits_vectype_out)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   gimple *stmt = stmt_info->stmt;
+ 
+   *stmt_vectype_out = NULL_TREE;
+@@ -10810,7 +11150,12 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+   tree vectype;
+   tree scalar_type = NULL_TREE;
+   if (STMT_VINFO_VECTYPE (stmt_info))
+-    *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
++    {
++      *stmt_vectype_out = vectype = STMT_VINFO_VECTYPE (stmt_info);
++      if (dump_enabled_p ())
++	dump_printf_loc (MSG_NOTE, vect_location,
++			 "precomputed vectype: %T\n", vectype);
++    }
+   else
+     {
+       gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
+@@ -10842,8 +11187,8 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+ 
+       if (dump_enabled_p ())
+ 	dump_printf_loc (MSG_NOTE, vect_location,
+-			 "get vectype for scalar type:  %T\n", scalar_type);
+-      vectype = get_vectype_for_scalar_type (scalar_type);
++			 "get vectype for scalar type: %T\n", scalar_type);
++      vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
+       if (!vectype)
+ 	return opt_result::failure_at (stmt,
+ 				       "not vectorized:"
+@@ -10859,42 +11204,38 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+ 
+   /* Don't try to compute scalar types if the stmt produces a boolean
+      vector; use the existing vector type instead.  */
+-  tree nunits_vectype;
+-  if (VECTOR_BOOLEAN_TYPE_P (vectype))
+-    nunits_vectype = vectype;
+-  else
++  tree nunits_vectype = vectype;
++  if (!VECTOR_BOOLEAN_TYPE_P (vectype)
++      && *stmt_vectype_out != boolean_type_node)
+     {
+       /* The number of units is set according to the smallest scalar
+ 	 type (or the largest vector size, but we only support one
+ 	 vector size per vectorization).  */
+-      if (*stmt_vectype_out != boolean_type_node)
++      HOST_WIDE_INT dummy;
++      scalar_type = vect_get_smallest_scalar_type (stmt_info, &dummy, &dummy);
++      if (scalar_type != TREE_TYPE (vectype))
+ 	{
+-	  HOST_WIDE_INT dummy;
+-	  scalar_type = vect_get_smallest_scalar_type (stmt_info,
+-						       &dummy, &dummy);
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location,
++			     "get vectype for smallest scalar type: %T\n",
++			     scalar_type);
++	  nunits_vectype = get_vectype_for_scalar_type (vinfo, scalar_type);
++	  if (!nunits_vectype)
++	    return opt_result::failure_at
++	      (stmt, "not vectorized: unsupported data-type %T\n",
++	       scalar_type);
++	  if (dump_enabled_p ())
++	    dump_printf_loc (MSG_NOTE, vect_location, "nunits vectype: %T\n",
++			     nunits_vectype);
+ 	}
+-      if (dump_enabled_p ())
+-	dump_printf_loc (MSG_NOTE, vect_location,
+-			 "get vectype for scalar type:  %T\n", scalar_type);
+-      nunits_vectype = get_vectype_for_scalar_type (scalar_type);
+     }
+-  if (!nunits_vectype)
+-    return opt_result::failure_at (stmt,
+-				   "not vectorized: unsupported data-type %T\n",
+-				   scalar_type);
+ 
+-  if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vectype)),
+-		GET_MODE_SIZE (TYPE_MODE (nunits_vectype))))
+-    return opt_result::failure_at (stmt,
+-				   "not vectorized: different sized vector "
+-				   "types in statement, %T and %T\n",
+-				   vectype, nunits_vectype);
++  gcc_assert (*stmt_vectype_out == boolean_type_node
++	      || multiple_p (TYPE_VECTOR_SUBPARTS (nunits_vectype),
++			     TYPE_VECTOR_SUBPARTS (*stmt_vectype_out)));
+ 
+   if (dump_enabled_p ())
+     {
+-      dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
+-		       nunits_vectype);
+-
+       dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
+       dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (nunits_vectype));
+       dump_printf (MSG_NOTE, "\n");
+@@ -10911,6 +11252,7 @@ vect_get_vector_types_for_stmt (stmt_vec_info stmt_info,
+ opt_tree
+ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
+ {
++  vec_info *vinfo = stmt_info->vinfo;
+   gimple *stmt = stmt_info->stmt;
+   tree mask_type = NULL;
+   tree vectype, scalar_type;
+@@ -10920,7 +11262,7 @@ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
+       && !VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (gimple_assign_rhs1 (stmt))))
+     {
+       scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
+-      mask_type = get_mask_type_for_scalar_type (scalar_type);
++      mask_type = get_mask_type_for_scalar_type (vinfo, scalar_type);
+ 
+       if (!mask_type)
+ 	return opt_tree::failure_at (stmt,
+@@ -10968,7 +11310,7 @@ vect_get_mask_type_for_stmt (stmt_vec_info stmt_info)
+ 	  && !VECTOR_BOOLEAN_TYPE_P (mask_type)
+ 	  && gimple_code (stmt) == GIMPLE_ASSIGN
+ 	  && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
+-	mask_type = build_same_sized_truth_vector_type (mask_type);
++	mask_type = truth_type_for (mask_type);
+     }
+ 
+   /* No mask_type should mean loop invariant predicate.
+diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
+index d89ec3b7c76..c2c6377d3f9 100644
+--- a/gcc/tree-vectorizer.c
++++ b/gcc/tree-vectorizer.c
+@@ -639,8 +639,11 @@ vec_info::new_stmt_vec_info (gimple *stmt)
+   STMT_VINFO_TYPE (res) = undef_vec_info_type;
+   STMT_VINFO_RELEVANT (res) = vect_unused_in_scope;
+   STMT_VINFO_VECTORIZABLE (res) = true;
+-  STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION;
+-  STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK;
++  STMT_VINFO_REDUC_TYPE (res) = TREE_CODE_REDUCTION;
++  STMT_VINFO_REDUC_CODE (res) = ERROR_MARK;
++  STMT_VINFO_REDUC_FN (res) = IFN_LAST;
++  STMT_VINFO_REDUC_IDX (res) = -1;
++  STMT_VINFO_SLP_VECT_ONLY (res) = false;
+ 
+   if (gimple_code (stmt) == GIMPLE_PHI
+       && is_loop_header_bb_p (gimple_bb (stmt)))
+@@ -862,8 +865,7 @@ set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call)
+ 
+ static unsigned
+ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
+-		      unsigned *num_vectorized_loops,
+-		      loop_p loop, loop_vec_info orig_loop_vinfo,
++		      unsigned *num_vectorized_loops, loop_p loop,
+ 		      gimple *loop_vectorized_call,
+ 		      gimple *loop_dist_alias_call)
+ {
+@@ -871,6 +873,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
+   vec_info_shared shared;
+   auto_purge_vect_location sentinel;
+   vect_location = find_loop_location (loop);
++
+   if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
+       && dump_enabled_p ())
+     dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS,
+@@ -878,10 +881,17 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
+ 		 LOCATION_FILE (vect_location.get_location_t ()),
+ 		 LOCATION_LINE (vect_location.get_location_t ()));
+ 
+-  /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
+-  opt_loop_vec_info loop_vinfo
+-    = vect_analyze_loop (loop, orig_loop_vinfo, &shared);
+-  loop->aux = loop_vinfo;
++  opt_loop_vec_info loop_vinfo = opt_loop_vec_info::success (NULL);
++  /* In the case of epilogue vectorization the loop already has its
++     loop_vec_info set, we do not require to analyze the loop in this case.  */
++  if (loop_vec_info vinfo = loop_vec_info_for_loop (loop))
++    loop_vinfo = opt_loop_vec_info::success (vinfo);
++  else
++    {
++      /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p.  */
++      loop_vinfo = vect_analyze_loop (loop, &shared);
++      loop->aux = loop_vinfo;
++    }
+ 
+   if (!loop_vinfo)
+     if (dump_enabled_p ())
+@@ -968,7 +978,7 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
+   unsigned HOST_WIDE_INT bytes;
+   if (dump_enabled_p ())
+     {
+-      if (current_vector_size.is_constant (&bytes))
++      if (GET_MODE_SIZE (loop_vinfo->vector_mode).is_constant (&bytes))
+ 	dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
+ 			 "loop vectorized using %wu byte vectors\n", bytes);
+       else
+@@ -1009,8 +1019,13 @@ try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
+ 
+   /* Epilogue of vectorized loop must be vectorized too.  */
+   if (new_loop)
+-    ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
+-				 new_loop, loop_vinfo, NULL, NULL);
++    {
++      /* Don't include vectorized epilogues in the "vectorized loops" count.
++       */
++      unsigned dont_count = *num_vectorized_loops;
++      ret |= try_vectorize_loop_1 (simduid_to_vf_htab, &dont_count,
++				   new_loop, NULL, NULL);
++    }
+ 
+   return ret;
+ }
+@@ -1026,8 +1041,7 @@ try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab,
+ 	|| loop->force_vectorize))
+     return 0;
+ 
+-  return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops,
+-			       loop, NULL,
++  return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, loop,
+ 			       vect_loop_vectorized_call (loop),
+ 			       vect_loop_dist_alias_call (loop));
+ }
+@@ -1344,7 +1358,8 @@ get_vec_alignment_for_array_type (tree type)
+   gcc_assert (TREE_CODE (type) == ARRAY_TYPE);
+   poly_uint64 array_size, vector_size;
+ 
+-  tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
++  tree scalar_type = strip_array_types (type);
++  tree vectype = get_related_vectype_for_scalar_type (VOIDmode, scalar_type);
+   if (!vectype
+       || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
+       || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
+@@ -1512,3 +1527,36 @@ make_pass_ipa_increase_alignment (gcc::context *ctxt)
+ {
+   return new pass_ipa_increase_alignment (ctxt);
+ }
++
++/* If the condition represented by T is a comparison or the SSA name
++   result of a comparison, extract the comparison's operands.  Represent
++   T as NE_EXPR <T, 0> otherwise.  */
++
++void
++scalar_cond_masked_key::get_cond_ops_from_tree (tree t)
++{
++  if (TREE_CODE_CLASS (TREE_CODE (t)) == tcc_comparison)
++    {
++      this->code = TREE_CODE (t);
++      this->op0 = TREE_OPERAND (t, 0);
++      this->op1 = TREE_OPERAND (t, 1);
++      return;
++    }
++
++  if (TREE_CODE (t) == SSA_NAME)
++    if (gassign *stmt = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (t)))
++      {
++	tree_code code = gimple_assign_rhs_code (stmt);
++	if (TREE_CODE_CLASS (code) == tcc_comparison)
++	  {
++	    this->code = code;
++	    this->op0 = gimple_assign_rhs1 (stmt);
++	    this->op1 = gimple_assign_rhs2 (stmt);
++	    return;
++	  }
++      }
++
++  this->code = NE_EXPR;
++  this->op0 = t;
++  this->op1 = build_zero_cst (TREE_TYPE (t));
++}
+diff --git a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
+index 148b9a7f215..c46e2742c36 100644
+--- a/gcc/tree-vectorizer.h
++++ b/gcc/tree-vectorizer.h
+@@ -26,6 +26,7 @@ typedef struct _stmt_vec_info *stmt_vec_info;
+ #include "tree-data-ref.h"
+ #include "tree-hash-traits.h"
+ #include "target.h"
++#include <utility>
+ 
+ /* Used for naming of new temporaries.  */
+ enum vect_var_kind {
+@@ -120,6 +121,8 @@ struct _slp_tree {
+   vec<slp_tree> children;
+   /* A group of scalar stmts to be vectorized together.  */
+   vec<stmt_vec_info> stmts;
++  /* A group of scalar operands to be vectorized together.  */
++  vec<tree> ops;
+   /* Load permutation relative to the stores, NULL if there is no
+      permutation.  */
+   vec<unsigned> load_permutation;
+@@ -170,13 +173,82 @@ typedef struct _slp_instance {
+ 
+ #define SLP_TREE_CHILDREN(S)                     (S)->children
+ #define SLP_TREE_SCALAR_STMTS(S)                 (S)->stmts
++#define SLP_TREE_SCALAR_OPS(S)                   (S)->ops
+ #define SLP_TREE_VEC_STMTS(S)                    (S)->vec_stmts
+ #define SLP_TREE_NUMBER_OF_VEC_STMTS(S)          (S)->vec_stmts_size
+ #define SLP_TREE_LOAD_PERMUTATION(S)             (S)->load_permutation
+ #define SLP_TREE_TWO_OPERATORS(S)		 (S)->two_operators
+ #define SLP_TREE_DEF_TYPE(S)			 (S)->def_type
+ 
++/* Key for map that records association between
++   scalar conditions and corresponding loop mask, and
++   is populated by vect_record_loop_mask.  */
+ 
++struct scalar_cond_masked_key
++{
++  scalar_cond_masked_key (tree t, unsigned ncopies_)
++    : ncopies (ncopies_)
++  {
++    get_cond_ops_from_tree (t);
++  }
++
++  void get_cond_ops_from_tree (tree);
++
++  unsigned ncopies;
++  tree_code code;
++  tree op0;
++  tree op1;
++};
++
++template<>
++struct default_hash_traits<scalar_cond_masked_key>
++{
++  typedef scalar_cond_masked_key compare_type;
++  typedef scalar_cond_masked_key value_type;
++
++  static inline hashval_t
++  hash (value_type v)
++  {
++    inchash::hash h;
++    h.add_int (v.code);
++    inchash::add_expr (v.op0, h, 0);
++    inchash::add_expr (v.op1, h, 0);
++    h.add_int (v.ncopies);
++    return h.end ();
++  }
++
++  static inline bool
++  equal (value_type existing, value_type candidate)
++  {
++    return (existing.ncopies == candidate.ncopies
++           && existing.code == candidate.code
++           && operand_equal_p (existing.op0, candidate.op0, 0)
++           && operand_equal_p (existing.op1, candidate.op1, 0));
++  }
++
++  static inline void
++  mark_empty (value_type &v)
++  {
++    v.ncopies = 0;
++  }
++
++  static inline bool
++  is_empty (value_type v)
++  {
++    return v.ncopies == 0;
++  }
++
++  static inline void mark_deleted (value_type &) {}
++
++  static inline bool is_deleted (const value_type &)
++  {
++    return false;
++  }
++
++  static inline void remove (value_type &) {}
++};
++
++typedef hash_set<scalar_cond_masked_key> scalar_cond_masked_set_type;
+ 
+ /* Describes two objects whose addresses must be unequal for the vectorized
+    loop to be valid.  */
+@@ -217,6 +289,7 @@ struct vec_info_shared {
+ 
+ /* Vectorizer state common between loop and basic-block vectorization.  */
+ struct vec_info {
++  typedef hash_set<int_hash<machine_mode, E_VOIDmode, E_BLKmode> > mode_set;
+   enum vec_kind { bb, loop };
+ 
+   vec_info (vec_kind, void *, vec_info_shared *);
+@@ -254,6 +327,14 @@ struct vec_info {
+   /* Cost data used by the target cost model.  */
+   void *target_cost_data;
+ 
++  /* The set of vector modes used in the vectorized region.  */
++  mode_set used_vector_modes;
++
++  /* The argument we should pass to related_vector_mode when looking up
++     the vector mode for a scalar mode, or VOIDmode if we haven't yet
++     made any decisions about which vector modes to use.  */
++  machine_mode vector_mode;
++
+ private:
+   stmt_vec_info new_stmt_vec_info (gimple *stmt);
+   void set_vinfo_for_stmt (gimple *, stmt_vec_info);
+@@ -377,6 +458,8 @@ struct rgroup_masks {
+ 
+ typedef auto_vec<rgroup_masks> vec_loop_masks;
+ 
++typedef auto_vec<std::pair<data_reference*, tree> > drs_init_vec;
++
+ /*-----------------------------------------------------------------*/
+ /* Info on vectorized loops.                                       */
+ /*-----------------------------------------------------------------*/
+@@ -399,7 +482,7 @@ typedef struct _loop_vec_info : public vec_info {
+   /* Condition under which this loop is analyzed and versioned.  */
+   tree num_iters_assumptions;
+ 
+-  /* Threshold of number of iterations below which vectorzation will not be
++  /* Threshold of number of iterations below which vectorization will not be
+      performed. It is calculated from MIN_PROFITABLE_ITERS and
+      PARAM_MIN_VECT_LOOP_BOUND.  */
+   unsigned int th;
+@@ -421,6 +504,9 @@ typedef struct _loop_vec_info : public vec_info {
+      on inactive scalars.  */
+   vec_loop_masks masks;
+ 
++  /* Set of scalar conditions that have loop mask applied.  */
++  scalar_cond_masked_set_type scalar_cond_masked_set;
++
+   /* If we are using a loop mask to align memory addresses, this variable
+      contains the number of vector elements that we should skip in the
+      first iteration of the vector loop (i.e. the number of leading
+@@ -497,6 +583,13 @@ typedef struct _loop_vec_info : public vec_info {
+   /* Cost of a single scalar iteration.  */
+   int single_scalar_iteration_cost;
+ 
++  /* The cost of the vector prologue and epilogue, including peeled
++     iterations and set-up code.  */
++  int vec_outside_cost;
++
++  /* The cost of the vector loop body.  */
++  int vec_inside_cost;
++
+   /* Is the loop vectorizable? */
+   bool vectorizable;
+ 
+@@ -551,6 +644,10 @@ typedef struct _loop_vec_info : public vec_info {
+      this points to the original vectorized loop.  Otherwise NULL.  */
+   _loop_vec_info *orig_loop_info;
+ 
++  /* Used to store loop_vec_infos of epilogues of this loop during
++     analysis.  */
++  vec<_loop_vec_info *> epilogue_vinfos;
++
+ } *loop_vec_info;
+ 
+ /* Access Functions.  */
+@@ -682,6 +779,8 @@ enum stmt_vec_info_type {
+   type_promotion_vec_info_type,
+   type_demotion_vec_info_type,
+   type_conversion_vec_info_type,
++  cycle_phi_info_type,
++  lc_phi_info_type,
+   loop_exit_ctrl_vec_info_type
+ };
+ 
+@@ -917,21 +1016,42 @@ struct _stmt_vec_info {
+      for loop vectorization.  */
+   vect_memory_access_type memory_access_type;
+ 
+-  /* For reduction loops, this is the type of reduction.  */
+-  enum vect_reduction_type v_reduc_type;
++  /* For INTEGER_INDUC_COND_REDUCTION, the initial value to be used.  */
++  tree induc_cond_initial_val;
+ 
+-  /* For CONST_COND_REDUCTION, record the reduc code.  */
+-  enum tree_code const_cond_reduc_code;
++  /* If not NULL the value to be added to compute final reduction value.  */
++  tree reduc_epilogue_adjustment;
+ 
+   /* On a reduction PHI the reduction type as detected by
+-     vect_force_simple_reduction.  */
++     vect_is_simple_reduction and vectorizable_reduction.  */
+   enum vect_reduction_type reduc_type;
+ 
++  /* The original reduction code, to be used in the epilogue.  */
++  enum tree_code reduc_code;
++  /* An internal function we should use in the epilogue.  */
++  internal_fn reduc_fn;
++
++  /* On a stmt participating in the reduction the index of the operand
++     on the reduction SSA cycle.  */
++  int reduc_idx;
++
+   /* On a reduction PHI the def returned by vect_force_simple_reduction.
+      On the def returned by vect_force_simple_reduction the
+      corresponding PHI.  */
+   stmt_vec_info reduc_def;
+ 
++  /* The vector input type relevant for reduction vectorization.  */
++  tree reduc_vectype_in;
++
++  /* The vector type for performing the actual reduction.  */
++  tree reduc_vectype;
++
++  /* Whether we force a single cycle PHI during reduction vectorization.  */
++  bool force_single_cycle;
++
++  /* Whether on this stmt reduction meta is recorded.  */
++  bool is_reduc_info;
++
+   /* The number of scalar stmt references from active SLP instances.  */
+   unsigned int num_slp_uses;
+ 
+@@ -949,6 +1069,9 @@ struct _stmt_vec_info {
+      and OPERATION_BITS without changing the result.  */
+   unsigned int operation_precision;
+   signop operation_sign;
++
++  /* True if this is only suitable for SLP vectorization.  */
++  bool slp_vect_only_p;
+ };
+ 
+ /* Information about a gather/scatter call.  */
+@@ -1011,8 +1134,10 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
+ #define STMT_VINFO_STRIDED_P(S)	   	   (S)->strided_p
+ #define STMT_VINFO_MEMORY_ACCESS_TYPE(S)   (S)->memory_access_type
+ #define STMT_VINFO_SIMD_LANE_ACCESS_P(S)   (S)->simd_lane_access_p
+-#define STMT_VINFO_VEC_REDUCTION_TYPE(S)   (S)->v_reduc_type
+-#define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code
++#define STMT_VINFO_VEC_INDUC_COND_INITIAL_VAL(S) (S)->induc_cond_initial_val
++#define STMT_VINFO_REDUC_EPILOGUE_ADJUSTMENT(S) (S)->reduc_epilogue_adjustment
++#define STMT_VINFO_REDUC_IDX(S)		   (S)->reduc_idx
++#define STMT_VINFO_FORCE_SINGLE_CYCLE(S)   (S)->force_single_cycle
+ 
+ #define STMT_VINFO_DR_WRT_VEC_LOOP(S)      (S)->dr_wrt_vec_loop
+ #define STMT_VINFO_DR_BASE_ADDRESS(S)      (S)->dr_wrt_vec_loop.base_address
+@@ -1043,7 +1168,12 @@ STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo)
+ #define STMT_VINFO_MIN_NEG_DIST(S)	(S)->min_neg_dist
+ #define STMT_VINFO_NUM_SLP_USES(S)	(S)->num_slp_uses
+ #define STMT_VINFO_REDUC_TYPE(S)	(S)->reduc_type
++#define STMT_VINFO_REDUC_CODE(S)	(S)->reduc_code
++#define STMT_VINFO_REDUC_FN(S)		(S)->reduc_fn
+ #define STMT_VINFO_REDUC_DEF(S)		(S)->reduc_def
++#define STMT_VINFO_REDUC_VECTYPE(S)     (S)->reduc_vectype
++#define STMT_VINFO_REDUC_VECTYPE_IN(S)  (S)->reduc_vectype_in
++#define STMT_VINFO_SLP_VECT_ONLY(S)     (S)->slp_vect_only_p
+ 
+ #define DR_GROUP_FIRST_ELEMENT(S) \
+   (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
+@@ -1358,7 +1488,7 @@ vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
+ static inline void
+ vect_update_max_nunits (poly_uint64 *max_nunits, poly_uint64 nunits)
+ {
+-  /* All unit counts have the form current_vector_size * X for some
++  /* All unit counts have the form vec_info::vector_size * X for some
+      rational X, so two unit sizes must have a common multiple.
+      Everything is a multiple of the initial value of 1.  */
+   *max_nunits = force_common_multiple (*max_nunits, nunits);
+@@ -1466,20 +1596,22 @@ extern void vect_set_loop_condition (struct loop *, loop_vec_info,
+ extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
+ struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
+ 						     struct loop *, edge);
+-struct loop *vect_loop_versioning (loop_vec_info, unsigned int, bool,
+-				   poly_uint64);
++struct loop *vect_loop_versioning (loop_vec_info);
+ extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
+-				     tree *, tree *, tree *, int, bool, bool);
++				    tree *, tree *, tree *, int, bool, bool,
++				    tree *, drs_init_vec &);
+ extern void vect_prepare_for_masked_peels (loop_vec_info);
+ extern dump_user_location_t find_loop_location (struct loop *);
+ extern bool vect_can_advance_ivs_p (loop_vec_info);
++extern void vect_update_inits_of_drs (loop_vec_info, tree, tree_code);
+ 
+ /* In tree-vect-stmts.c.  */
+-extern poly_uint64 current_vector_size;
+-extern tree get_vectype_for_scalar_type (tree);
+-extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64);
+-extern tree get_mask_type_for_scalar_type (tree);
++extern tree get_related_vectype_for_scalar_type (machine_mode, tree,
++						 poly_uint64 = 0);
++extern tree get_vectype_for_scalar_type (vec_info *, tree);
++extern tree get_mask_type_for_scalar_type (vec_info *, tree);
+ extern tree get_same_sized_vectype (tree, tree);
++extern bool vect_chooses_same_modes_p (vec_info *, machine_mode);
+ extern bool vect_get_loop_mask_type (loop_vec_info);
+ extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
+ 				stmt_vec_info * = NULL, gimple ** = NULL);
+@@ -1491,15 +1623,15 @@ extern bool supportable_widening_operation (enum tree_code, stmt_vec_info,
+ 					    enum tree_code *, int *,
+ 					    vec<tree> *);
+ extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
+-					     enum tree_code *,
+-					     int *, vec<tree> *);
++					     enum tree_code *, int *,
++					     vec<tree> *);
+ extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
+ 				  enum vect_cost_for_stmt, stmt_vec_info,
+ 				  int, enum vect_cost_model_location);
+ extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *);
+ extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *,
+ 						  gimple_stmt_iterator *);
+-extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info);
++extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info, bool *);
+ extern tree vect_get_store_rhs (stmt_vec_info);
+ extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type);
+ extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL);
+@@ -1515,19 +1647,13 @@ extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *,
+ extern void vect_remove_stores (stmt_vec_info);
+ extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
+ 				     slp_instance, stmt_vector_for_cost *);
+-extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
+-				    stmt_vec_info *, bool, slp_tree,
+-				    stmt_vector_for_cost *);
+-extern bool vectorizable_shift (stmt_vec_info, gimple_stmt_iterator *,
+-				stmt_vec_info *, slp_tree,
+-				stmt_vector_for_cost *);
+ extern void vect_get_load_cost (stmt_vec_info, int, bool,
+ 				unsigned int *, unsigned int *,
+ 				stmt_vector_for_cost *,
+ 				stmt_vector_for_cost *, bool);
+ extern void vect_get_store_cost (stmt_vec_info, int,
+ 				 unsigned int *, stmt_vector_for_cost *);
+-extern bool vect_supportable_shift (enum tree_code, tree);
++extern bool vect_supportable_shift (vec_info *, enum tree_code, tree);
+ extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
+ extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
+ extern void optimize_mask_stores (struct loop*);
+@@ -1557,7 +1683,7 @@ extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
+ 				       gather_scatter_info *);
+ extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
+ 						 vec<data_reference_p> *);
+-extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *);
++extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *, bool *);
+ extern void vect_record_base_alignments (vec_info *);
+ extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
+ 				      tree *, gimple_stmt_iterator *,
+@@ -1586,40 +1712,43 @@ extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *,
+ 						  tree, tree = NULL_TREE);
+ 
+ /* In tree-vect-loop.c.  */
+-/* FORNOW: Used in tree-parloops.c.  */
+-extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info,
+-						  bool *, bool);
+-/* Used in gimple-loop-interchange.c.  */
++/* Used in tree-vect-loop-manip.c */
++extern void determine_peel_for_niter (loop_vec_info);
++/* Used in gimple-loop-interchange.c and tree-parloops.c.  */
+ extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
+ 				  enum tree_code);
++extern bool needs_fold_left_reduction_p (tree, tree_code);
+ /* Drive for loop analysis stage.  */
+-extern opt_loop_vec_info vect_analyze_loop (struct loop *,
+-					    loop_vec_info,
+-					    vec_info_shared *);
++extern opt_loop_vec_info vect_analyze_loop (struct loop *, vec_info_shared *);
+ extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
+ extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
+ 					 tree *, bool);
+-extern tree vect_halve_mask_nunits (tree);
+-extern tree vect_double_mask_nunits (tree);
++extern tree vect_halve_mask_nunits (tree, machine_mode);
++extern tree vect_double_mask_nunits (tree, machine_mode);
+ extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
+-				   unsigned int, tree);
++				   unsigned int, tree, tree);
+ extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
+ 				unsigned int, tree, unsigned int);
++extern stmt_vec_info info_for_reduction (stmt_vec_info);
+ 
+ /* Drive for loop transformation stage.  */
+ extern struct loop *vect_transform_loop (loop_vec_info);
+ extern opt_loop_vec_info vect_analyze_loop_form (struct loop *,
+ 						 vec_info_shared *);
+ extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *,
+-					 slp_tree, int, stmt_vec_info *,
++					 slp_tree, slp_instance, int,
++					 stmt_vec_info *,
+ 					 stmt_vector_for_cost *);
+-extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
+-				    stmt_vec_info *, slp_tree, slp_instance,
++extern bool vectorizable_reduction (stmt_vec_info, slp_tree, slp_instance,
+ 				    stmt_vector_for_cost *);
+ extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
+ 				    stmt_vec_info *, slp_tree,
+ 				    stmt_vector_for_cost *);
+-extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *);
++extern bool vect_transform_reduction (stmt_vec_info, gimple_stmt_iterator *,
++				      stmt_vec_info *, slp_tree);
++extern bool vect_transform_cycle_phi (stmt_vec_info, stmt_vec_info *,
++				      slp_tree, slp_instance);
++extern bool vectorizable_lc_phi (stmt_vec_info, stmt_vec_info *, slp_tree);
+ extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
+ extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
+ 					stmt_vector_for_cost *,
+@@ -1637,15 +1766,16 @@ extern void vect_schedule_slp (vec_info *);
+ extern opt_result vect_analyze_slp (vec_info *, unsigned);
+ extern bool vect_make_slp_decision (loop_vec_info);
+ extern void vect_detect_hybrid_slp (loop_vec_info);
+-extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *);
++extern void vect_get_slp_defs (slp_tree, vec<vec<tree> > *, unsigned n = -1U);
+ extern bool vect_slp_bb (basic_block);
+ extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
+ extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
+-extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode,
++extern bool can_duplicate_and_interleave_p (vec_info *, unsigned int,
++					    machine_mode,
+ 					    unsigned int * = NULL,
+ 					    tree * = NULL, tree * = NULL);
+-extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>,
+-				      unsigned int, vec<tree> &);
++extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
++				      vec<tree>, unsigned int, vec<tree> &);
+ extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
+ 
+ /* In tree-vect-patterns.c.  */
+diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
+index 2140101d7d2..fbcd8aa6367 100644
+--- a/gcc/tree-vrp.c
++++ b/gcc/tree-vrp.c
+@@ -69,23 +69,20 @@ along with GCC; see the file COPYING3.  If not see
+ #include "builtins.h"
+ #include "wide-int-range.h"
+ 
++static bool
++ranges_from_anti_range (const value_range_base *ar,
++			value_range_base *vr0, value_range_base *vr1,
++			bool handle_pointers = false);
++
+ /* Set of SSA names found live during the RPO traversal of the function
+    for still active basic-blocks.  */
+ static sbitmap *live;
+ 
+-void
+-value_range_base::set (enum value_range_kind kind, tree min, tree max)
+-{
+-  m_kind = kind;
+-  m_min = min;
+-  m_max = max;
+-  if (flag_checking)
+-    check ();
+-}
+-
+ void
+ value_range::set_equiv (bitmap equiv)
+ {
++  if (undefined_p () || varying_p ())
++    equiv = NULL;
+   /* Since updating the equivalence set involves deep copying the
+      bitmaps, only do it if absolutely necessary.
+ 
+@@ -261,7 +258,8 @@ value_range_base::constant_p () const
+ void
+ value_range_base::set_undefined ()
+ {
+-  set (VR_UNDEFINED, NULL, NULL);
++  m_kind = VR_UNDEFINED;
++  m_min = m_max = NULL;
+ }
+ 
+ void
+@@ -273,7 +271,8 @@ value_range::set_undefined ()
+ void
+ value_range_base::set_varying ()
+ {
+-  set (VR_VARYING, NULL, NULL);
++  m_kind = VR_VARYING;
++  m_min = m_max = NULL;
+ }
+ 
+ void
+@@ -335,6 +334,24 @@ value_range::equiv_add (const_tree var,
+ bool
+ value_range_base::singleton_p (tree *result) const
+ {
++  if (m_kind == VR_ANTI_RANGE)
++    {
++      if (nonzero_p ())
++	{
++	  if (TYPE_PRECISION (type ()) == 1)
++	    {
++	      if (result)
++		*result = m_max;
++	      return true;
++	    }
++	  return false;
++	}
++
++      value_range_base vr0, vr1;
++      return (ranges_from_anti_range (this, &vr0, &vr1, true)
++	      && vr1.undefined_p ()
++	      && vr0.singleton_p (result));
++    }
+   if (m_kind == VR_RANGE
+       && vrp_operand_equal_p (min (), max ())
+       && is_gimple_min_invariant (min ()))
+@@ -510,23 +527,28 @@ static assert_locus **asserts_for;
+ /* Return the maximum value for TYPE.  */
+ 
+ tree
+-vrp_val_max (const_tree type)
++vrp_val_max (const_tree type, bool handle_pointers)
+ {
+-  if (!INTEGRAL_TYPE_P (type))
+-    return NULL_TREE;
+-
+-  return TYPE_MAX_VALUE (type);
++  if (INTEGRAL_TYPE_P (type))
++    return TYPE_MAX_VALUE (type);
++  if (POINTER_TYPE_P (type) && handle_pointers)
++    {
++      wide_int max = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
++      return wide_int_to_tree (const_cast<tree> (type), max);
++    }
++  return NULL_TREE;
+ }
+ 
+ /* Return the minimum value for TYPE.  */
+ 
+ tree
+-vrp_val_min (const_tree type)
++vrp_val_min (const_tree type, bool handle_pointers)
+ {
+-  if (!INTEGRAL_TYPE_P (type))
+-    return NULL_TREE;
+-
+-  return TYPE_MIN_VALUE (type);
++  if (INTEGRAL_TYPE_P (type))
++    return TYPE_MIN_VALUE (type);
++  if (POINTER_TYPE_P (type) && handle_pointers)
++    return build_zero_cst (const_cast<tree> (type));
++  return NULL_TREE;
+ }
+ 
+ /* Return whether VAL is equal to the maximum value of its type.
+@@ -637,8 +659,7 @@ intersect_range_with_nonzero_bits (enum value_range_kind vr_type,
+    extract ranges from var + CST op limit.  */
+ 
+ void
+-value_range_base::set_and_canonicalize (enum value_range_kind kind,
+-					tree min, tree max)
++value_range_base::set (enum value_range_kind kind, tree min, tree max)
+ {
+   /* Use the canonical setters for VR_UNDEFINED and VR_VARYING.  */
+   if (kind == VR_UNDEFINED)
+@@ -652,11 +673,31 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
+       return;
+     }
+ 
++  /* Convert POLY_INT_CST bounds into worst-case INTEGER_CST bounds.  */
++  if (POLY_INT_CST_P (min))
++    {
++      tree type_min = vrp_val_min (TREE_TYPE (min), true);
++      widest_int lb
++	= constant_lower_bound_with_limit (wi::to_poly_widest (min),
++					   wi::to_widest (type_min));
++      min = wide_int_to_tree (TREE_TYPE (min), lb);
++    }
++  if (POLY_INT_CST_P (max))
++    {
++      tree type_max = vrp_val_max (TREE_TYPE (max), true);
++      widest_int ub
++	= constant_upper_bound_with_limit (wi::to_poly_widest (max),
++					   wi::to_widest (type_max));
++      max = wide_int_to_tree (TREE_TYPE (max), ub);
++    }
++
+   /* Nothing to canonicalize for symbolic ranges.  */
+   if (TREE_CODE (min) != INTEGER_CST
+       || TREE_CODE (max) != INTEGER_CST)
+     {
+-      set (kind, min, max);
++      m_kind = kind;
++      m_min = min;
++      m_max = max;
+       return;
+     }
+ 
+@@ -692,12 +733,13 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
+       kind = kind == VR_RANGE ? VR_ANTI_RANGE : VR_RANGE;
+     }
+ 
++  tree type = TREE_TYPE (min);
++
+   /* Anti-ranges that can be represented as ranges should be so.  */
+   if (kind == VR_ANTI_RANGE)
+     {
+       /* For -fstrict-enums we may receive out-of-range ranges so consider
+          values < -INF and values > INF as -INF/INF as well.  */
+-      tree type = TREE_TYPE (min);
+       bool is_min = (INTEGRAL_TYPE_P (type)
+ 		     && tree_int_cst_compare (min, TYPE_MIN_VALUE (type)) <= 0);
+       bool is_max = (INTEGRAL_TYPE_P (type)
+@@ -740,22 +782,37 @@ value_range_base::set_and_canonicalize (enum value_range_kind kind,
+         }
+     }
+ 
++  /* Normalize [MIN, MAX] into VARYING and ~[MIN, MAX] into UNDEFINED.
++
++     Avoid using TYPE_{MIN,MAX}_VALUE because -fstrict-enums can
++     restrict those to a subset of what actually fits in the type.
++     Instead use the extremes of the type precision which will allow
++     compare_range_with_value() to check if a value is inside a range,
++     whereas if we used TYPE_*_VAL, said function would just punt
++     upon seeing a VARYING.  */
++  unsigned prec = TYPE_PRECISION (type);
++  signop sign = TYPE_SIGN (type);
++  if (wi::eq_p (wi::to_wide (min), wi::min_value (prec, sign))
++      && wi::eq_p (wi::to_wide (max), wi::max_value (prec, sign)))
++    {
++      if (kind == VR_RANGE)
++	set_varying ();
++      else if (kind == VR_ANTI_RANGE)
++	set_undefined ();
++      else
++	gcc_unreachable ();
++      return;
++    }
++
+   /* Do not drop [-INF(OVF), +INF(OVF)] to varying.  (OVF) has to be sticky
+      to make sure VRP iteration terminates, otherwise we can get into
+      oscillations.  */
+ 
+-  set (kind, min, max);
+-}
+-
+-void
+-value_range::set_and_canonicalize (enum value_range_kind kind,
+-				   tree min, tree max, bitmap equiv)
+-{
+-  value_range_base::set_and_canonicalize (kind, min, max);
+-  if (this->kind () == VR_RANGE || this->kind () == VR_ANTI_RANGE)
+-    set_equiv (equiv);
+-  else
+-    equiv_clear ();
++  m_kind = kind;
++  m_min = min;
++  m_max = max;
++  if (flag_checking)
++    check ();
+ }
+ 
+ void
+@@ -776,32 +833,19 @@ value_range::set (tree val)
+   set (VR_RANGE, val, val, NULL);
+ }
+ 
+-/* Set value range VR to a non-NULL range of type TYPE.  */
++/* Set value range VR to a nonzero range of type TYPE.  */
+ 
+ void
+-value_range_base::set_nonnull (tree type)
++value_range_base::set_nonzero (tree type)
+ {
+   tree zero = build_int_cst (type, 0);
+   set (VR_ANTI_RANGE, zero, zero);
+ }
+ 
+-void
+-value_range::set_nonnull (tree type)
+-{
+-  tree zero = build_int_cst (type, 0);
+-  set (VR_ANTI_RANGE, zero, zero, NULL);
+-}
+-
+-/* Set value range VR to a NULL range of type TYPE.  */
++/* Set value range VR to a ZERO range of type TYPE.  */
+ 
+ void
+-value_range_base::set_null (tree type)
+-{
+-  set (build_int_cst (type, 0));
+-}
+-
+-void
+-value_range::set_null (tree type)
++value_range_base::set_zero (tree type)
+ {
+   set (build_int_cst (type, 0));
+ }
+@@ -830,22 +874,6 @@ vrp_bitmap_equal_p (const_bitmap b1, const_bitmap b2)
+ 	      && bitmap_equal_p (b1, b2)));
+ }
+ 
+-/* Return true if VR is [0, 0].  */
+-
+-static inline bool
+-range_is_null (const value_range_base *vr)
+-{
+-  return vr->zero_p ();
+-}
+-
+-static inline bool
+-range_is_nonnull (const value_range_base *vr)
+-{
+-  return (vr->kind () == VR_ANTI_RANGE
+-	  && vr->min () == vr->max ()
+-	  && integer_zerop (vr->min ()));
+-}
+-
+ /* Return true if max and min of VR are INTEGER_CST.  It's not necessary
+    a singleton.  */
+ 
+@@ -949,22 +977,17 @@ operand_less_p (tree val, tree val2)
+   /* LT is folded faster than GE and others.  Inline the common case.  */
+   if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
+     return tree_int_cst_lt (val, val2);
++  else if (TREE_CODE (val) == SSA_NAME && TREE_CODE (val2) == SSA_NAME)
++    return val == val2 ? 0 : -2;
+   else
+     {
+-      tree tcmp;
+-
+-      fold_defer_overflow_warnings ();
+-
+-      tcmp = fold_binary_to_constant (LT_EXPR, boolean_type_node, val, val2);
+-
+-      fold_undefer_and_ignore_overflow_warnings ();
+-
+-      if (!tcmp
+-	  || TREE_CODE (tcmp) != INTEGER_CST)
+-	return -2;
+-
+-      if (!integer_zerop (tcmp))
++      int cmp = compare_values (val, val2);
++      if (cmp == -1)
+ 	return 1;
++      else if (cmp == 0 || cmp == 1)
++	return 0;
++      else
++	return -2;
+     }
+ 
+   return 0;
+@@ -998,8 +1021,8 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
+ 
+   /* Convert the two values into the same type.  This is needed because
+      sizetype causes sign extension even for unsigned types.  */
+-  val2 = fold_convert (TREE_TYPE (val1), val2);
+-  STRIP_USELESS_TYPE_CONVERSION (val2);
++  if (!useless_type_conversion_p (TREE_TYPE (val1), TREE_TYPE (val2)))
++    val2 = fold_convert (TREE_TYPE (val1), val2);
+ 
+   const bool overflow_undefined
+     = INTEGRAL_TYPE_P (TREE_TYPE (val1))
+@@ -1107,32 +1130,43 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
+     }
+   else
+     {
+-      tree t;
++      if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
++	{
++	  /* We cannot compare overflowed values.  */
++	  if (TREE_OVERFLOW (val1) || TREE_OVERFLOW (val2))
++	    return -2;
++
++	  return tree_int_cst_compare (val1, val2);
++	}
+ 
+       /* First see if VAL1 and VAL2 are not the same.  */
+-      if (val1 == val2 || operand_equal_p (val1, val2, 0))
++      if (operand_equal_p (val1, val2, 0))
+ 	return 0;
+ 
++      fold_defer_overflow_warnings ();
++
+       /* If VAL1 is a lower address than VAL2, return -1.  */
+-      if (operand_less_p (val1, val2) == 1)
+-	return -1;
++      tree t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val1, val2);
++      if (t && integer_onep (t))
++	{
++	  fold_undefer_and_ignore_overflow_warnings ();
++	  return -1;
++	}
+ 
+       /* If VAL1 is a higher address than VAL2, return +1.  */
+-      if (operand_less_p (val2, val1) == 1)
+-	return 1;
+-
+-      /* If VAL1 is different than VAL2, return +2.
+-	 For integer constants we either have already returned -1 or 1
+-	 or they are equivalent.  We still might succeed in proving
+-	 something about non-trivial operands.  */
+-      if (TREE_CODE (val1) != INTEGER_CST
+-	  || TREE_CODE (val2) != INTEGER_CST)
++      t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val2, val1);
++      if (t && integer_onep (t))
+ 	{
+-          t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
+-	  if (t && integer_onep (t))
+-	    return 2;
++	  fold_undefer_and_ignore_overflow_warnings ();
++	  return 1;
+ 	}
+ 
++      /* If VAL1 is different than VAL2, return +2.  */
++      t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
++      fold_undefer_and_ignore_overflow_warnings ();
++      if (t && integer_onep (t))
++	return 2;
++
+       return -2;
+     }
+ }
+@@ -1231,7 +1265,8 @@ vrp_set_zero_nonzero_bits (const tree expr_type,
+ 
+ static bool
+ ranges_from_anti_range (const value_range_base *ar,
+-			value_range_base *vr0, value_range_base *vr1)
++			value_range_base *vr0, value_range_base *vr1,
++			bool handle_pointers)
+ {
+   tree type = ar->type ();
+ 
+@@ -1244,18 +1279,18 @@ ranges_from_anti_range (const value_range_base *ar,
+   if (ar->kind () != VR_ANTI_RANGE
+       || TREE_CODE (ar->min ()) != INTEGER_CST
+       || TREE_CODE (ar->max ()) != INTEGER_CST
+-      || !vrp_val_min (type)
+-      || !vrp_val_max (type))
++      || !vrp_val_min (type, handle_pointers)
++      || !vrp_val_max (type, handle_pointers))
+     return false;
+ 
+-  if (tree_int_cst_lt (vrp_val_min (type), ar->min ()))
++  if (tree_int_cst_lt (vrp_val_min (type, handle_pointers), ar->min ()))
+     vr0->set (VR_RANGE,
+-	      vrp_val_min (type),
++	      vrp_val_min (type, handle_pointers),
+ 	      wide_int_to_tree (type, wi::to_wide (ar->min ()) - 1));
+-  if (tree_int_cst_lt (ar->max (), vrp_val_max (type)))
++  if (tree_int_cst_lt (ar->max (), vrp_val_max (type, handle_pointers)))
+     vr1->set (VR_RANGE,
+ 	      wide_int_to_tree (type, wi::to_wide (ar->max ()) + 1),
+-	      vrp_val_max (type));
++	      vrp_val_max (type, handle_pointers));
+   if (vr0->undefined_p ())
+     {
+       *vr0 = *vr1;
+@@ -1266,21 +1301,20 @@ ranges_from_anti_range (const value_range_base *ar,
+ }
+ 
+ /* Extract the components of a value range into a pair of wide ints in
+-   [WMIN, WMAX].
+-
+-   If the value range is anything but a VR_*RANGE of constants, the
+-   resulting wide ints are set to [-MIN, +MAX] for the type.  */
++   [WMIN, WMAX], after having normalized any symbolics from the input.  */
+ 
+ static void inline
+-extract_range_into_wide_ints (const value_range_base *vr,
+-			      signop sign, unsigned prec,
+-			      wide_int &wmin, wide_int &wmax)
++extract_range_into_wide_ints (const value_range_base *vr_,
++			      tree type, wide_int &wmin, wide_int &wmax)
+ {
+-  gcc_assert (vr->kind () != VR_ANTI_RANGE || vr->symbolic_p ());
+-  if (range_int_cst_p (vr))
++  signop sign = TYPE_SIGN (type);
++  unsigned int prec = TYPE_PRECISION (type);
++  gcc_assert (vr_->kind () != VR_ANTI_RANGE || vr_->symbolic_p ());
++  value_range vr = vr_->normalize_symbolics ();
++  if (range_int_cst_p (&vr))
+     {
+-      wmin = wi::to_wide (vr->min ());
+-      wmax = wi::to_wide (vr->max ());
++      wmin = wi::to_wide (vr.min ());
++      wmax = wi::to_wide (vr.max ());
+     }
+   else
+     {
+@@ -1295,7 +1329,7 @@ extract_range_into_wide_ints (const value_range_base *vr,
+ 
+ static void
+ extract_range_from_multiplicative_op (value_range_base *vr,
+-				      enum tree_code code,
++				      enum tree_code code, tree type,
+ 				      const value_range_base *vr0,
+ 				      const value_range_base *vr1)
+ {
+@@ -1307,13 +1341,31 @@ extract_range_from_multiplicative_op (value_range_base *vr,
+ 	      || code == ROUND_DIV_EXPR
+ 	      || code == RSHIFT_EXPR
+ 	      || code == LSHIFT_EXPR);
+-  gcc_assert (vr0->kind () == VR_RANGE
+-	      && vr0->kind () == vr1->kind ());
++  if (!range_int_cst_p (vr1))
++    {
++      vr->set_varying ();
++      return;
++    }
++
++  /* Even if vr0 is VARYING or otherwise not usable, we can derive
++     useful ranges just from the shift count.  E.g.
++     x >> 63 for signed 64-bit x is always [-1, 0].  */
++  value_range_base tem = vr0->normalize_symbolics ();
++  tree vr0_min, vr0_max;
++  if (tem.kind () == VR_RANGE)
++    {
++      vr0_min = tem.min ();
++      vr0_max = tem.max ();
++    }
++  else
++    {
++      vr0_min = vrp_val_min (type);
++      vr0_max = vrp_val_max (type);
++    }
+ 
+-  tree type = vr0->type ();
+   wide_int res_lb, res_ub;
+-  wide_int vr0_lb = wi::to_wide (vr0->min ());
+-  wide_int vr0_ub = wi::to_wide (vr0->max ());
++  wide_int vr0_lb = wi::to_wide (vr0_min);
++  wide_int vr0_ub = wi::to_wide (vr0_max);
+   wide_int vr1_lb = wi::to_wide (vr1->min ());
+   wide_int vr1_ub = wi::to_wide (vr1->max ());
+   bool overflow_undefined = TYPE_OVERFLOW_UNDEFINED (type);
+@@ -1323,9 +1375,8 @@ extract_range_from_multiplicative_op (value_range_base *vr,
+ 					code, TYPE_SIGN (type), prec,
+ 					vr0_lb, vr0_ub, vr1_lb, vr1_ub,
+ 					overflow_undefined))
+-    vr->set_and_canonicalize (VR_RANGE,
+-			      wide_int_to_tree (type, res_lb),
+-			      wide_int_to_tree (type, res_ub));
++    vr->set (VR_RANGE, wide_int_to_tree (type, res_lb),
++	     wide_int_to_tree (type, res_ub));
+   else
+     vr->set_varying ();
+ }
+@@ -1583,9 +1634,9 @@ extract_range_from_binary_expr (value_range_base *vr,
+      code is EXACT_DIV_EXPR.  We could mask out bits in the resulting
+      range, but then we also need to hack up vrp_union.  It's just
+      easier to special case when vr0 is ~[0,0] for EXACT_DIV_EXPR.  */
+-  if (code == EXACT_DIV_EXPR && range_is_nonnull (&vr0))
++  if (code == EXACT_DIV_EXPR && vr0.nonzero_p ())
+     {
+-      vr->set_nonnull (expr_type);
++      vr->set_nonzero (expr_type);
+       return;
+     }
+ 
+@@ -1663,9 +1714,9 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	     If both are null, then the result is null. Otherwise they
+ 	     are varying.  */
+ 	  if (!range_includes_zero_p (&vr0) && !range_includes_zero_p (&vr1))
+-	    vr->set_nonnull (expr_type);
+-	  else if (range_is_null (&vr0) && range_is_null (&vr1))
+-	    vr->set_null (expr_type);
++	    vr->set_nonzero (expr_type);
++	  else if (vr0.zero_p () && vr1.zero_p ())
++	    vr->set_zero (expr_type);
+ 	  else
+ 	    vr->set_varying ();
+ 	}
+@@ -1692,9 +1743,9 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	      && (flag_delete_null_pointer_checks
+ 		  || (range_int_cst_p (&vr1)
+ 		      && !tree_int_cst_sign_bit (vr1.max ()))))
+-	    vr->set_nonnull (expr_type);
+-	  else if (range_is_null (&vr0) && range_is_null (&vr1))
+-	    vr->set_null (expr_type);
++	    vr->set_nonzero (expr_type);
++	  else if (vr0.zero_p () && vr1.zero_p ())
++	    vr->set_zero (expr_type);
+ 	  else
+ 	    vr->set_varying ();
+ 	}
+@@ -1702,8 +1753,8 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	{
+ 	  /* For pointer types, we are really only interested in asserting
+ 	     whether the expression evaluates to non-NULL.  */
+-	  if (range_is_null (&vr0) || range_is_null (&vr1))
+-	    vr->set_null (expr_type);
++	  if (vr0.zero_p () || vr1.zero_p ())
++	    vr->set_zero (expr_type);
+ 	  else
+ 	    vr->set_varying ();
+ 	}
+@@ -1717,19 +1768,30 @@ extract_range_from_binary_expr (value_range_base *vr,
+      range and see what we end up with.  */
+   if (code == PLUS_EXPR || code == MINUS_EXPR)
+     {
++      value_range_kind vr0_kind = vr0.kind (), vr1_kind = vr1.kind ();
++      tree vr0_min = vr0.min (), vr0_max = vr0.max ();
++      tree vr1_min = vr1.min (), vr1_max = vr1.max ();
+       /* This will normalize things such that calculating
+ 	 [0,0] - VR_VARYING is not dropped to varying, but is
+ 	 calculated as [MIN+1, MAX].  */
+       if (vr0.varying_p ())
+-	vr0.set (VR_RANGE, vrp_val_min (expr_type), vrp_val_max (expr_type));
++	{
++	  vr0_kind = VR_RANGE;
++	  vr0_min = vrp_val_min (expr_type);
++	  vr0_max = vrp_val_max (expr_type);
++	}
+       if (vr1.varying_p ())
+-	vr1.set (VR_RANGE, vrp_val_min (expr_type), vrp_val_max (expr_type));
++	{
++	  vr1_kind = VR_RANGE;
++	  vr1_min = vrp_val_min (expr_type);
++	  vr1_max = vrp_val_max (expr_type);
++	}
+ 
+       const bool minus_p = (code == MINUS_EXPR);
+-      tree min_op0 = vr0.min ();
+-      tree min_op1 = minus_p ? vr1.max () : vr1.min ();
+-      tree max_op0 = vr0.max ();
+-      tree max_op1 = minus_p ? vr1.min () : vr1.max ();
++      tree min_op0 = vr0_min;
++      tree min_op1 = minus_p ? vr1_max : vr1_min;
++      tree max_op0 = vr0_max;
++      tree max_op1 = minus_p ? vr1_min : vr1_max;
+       tree sym_min_op0 = NULL_TREE;
+       tree sym_min_op1 = NULL_TREE;
+       tree sym_max_op0 = NULL_TREE;
+@@ -1742,7 +1804,7 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	 single-symbolic ranges, try to compute the precise resulting range,
+ 	 but only if we know that this resulting range will also be constant
+ 	 or single-symbolic.  */
+-      if (vr0.kind () == VR_RANGE && vr1.kind () == VR_RANGE
++      if (vr0_kind == VR_RANGE && vr1_kind == VR_RANGE
+ 	  && (TREE_CODE (min_op0) == INTEGER_CST
+ 	      || (sym_min_op0
+ 		  = get_single_symbol (min_op0, &neg_min_op0, &min_op0)))
+@@ -1823,8 +1885,8 @@ extract_range_from_binary_expr (value_range_base *vr,
+       wide_int wmin, wmax;
+       wide_int vr0_min, vr0_max;
+       wide_int vr1_min, vr1_max;
+-      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
+-      extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
++      extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
++      extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
+       if (wide_int_range_min_max (wmin, wmax, code, sign, prec,
+ 				  vr0_min, vr0_max, vr1_min, vr1_max))
+ 	vr->set (VR_RANGE, wide_int_to_tree (expr_type, wmin),
+@@ -1841,7 +1903,7 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	  vr->set_varying ();
+ 	  return;
+ 	}
+-      extract_range_from_multiplicative_op (vr, code, &vr0, &vr1);
++      extract_range_from_multiplicative_op (vr, code, expr_type, &vr0, &vr1);
+       return;
+     }
+   else if (code == RSHIFT_EXPR
+@@ -1856,13 +1918,8 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	{
+ 	  if (code == RSHIFT_EXPR)
+ 	    {
+-	      /* Even if vr0 is VARYING or otherwise not usable, we can derive
+-		 useful ranges just from the shift count.  E.g.
+-		 x >> 63 for signed 64-bit x is always [-1, 0].  */
+-	      if (vr0.kind () != VR_RANGE || vr0.symbolic_p ())
+-		vr0.set (VR_RANGE, vrp_val_min (expr_type),
+-			 vrp_val_max (expr_type));
+-	      extract_range_from_multiplicative_op (vr, code, &vr0, &vr1);
++	      extract_range_from_multiplicative_op (vr, code, expr_type,
++						    &vr0, &vr1);
+ 	      return;
+ 	    }
+ 	  else if (code == LSHIFT_EXPR
+@@ -1878,7 +1935,7 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 		{
+ 		  min = wide_int_to_tree (expr_type, res_lb);
+ 		  max = wide_int_to_tree (expr_type, res_ub);
+-		  vr->set_and_canonicalize (VR_RANGE, min, max);
++		  vr->set (VR_RANGE, min, max);
+ 		  return;
+ 		}
+ 	    }
+@@ -1897,7 +1954,7 @@ extract_range_from_binary_expr (value_range_base *vr,
+       bool extra_range_p;
+ 
+       /* Special case explicit division by zero as undefined.  */
+-      if (range_is_null (&vr1))
++      if (vr1.zero_p ())
+ 	{
+ 	  vr->set_undefined ();
+ 	  return;
+@@ -1910,9 +1967,9 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 	 NOTE: As a future improvement, we may be able to do better
+ 	 with mixed symbolic (anti-)ranges like [0, A].  See note in
+ 	 ranges_from_anti_range.  */
+-      extract_range_into_wide_ints (&vr0, sign, prec,
++      extract_range_into_wide_ints (&vr0, expr_type,
+ 				    dividend_min, dividend_max);
+-      extract_range_into_wide_ints (&vr1, sign, prec,
++      extract_range_into_wide_ints (&vr1, expr_type,
+ 				    divisor_min, divisor_max);
+       if (!wide_int_range_div (wmin, wmax, code, sign, prec,
+ 			       dividend_min, dividend_max,
+@@ -1936,15 +1993,15 @@ extract_range_from_binary_expr (value_range_base *vr,
+     }
+   else if (code == TRUNC_MOD_EXPR)
+     {
+-      if (range_is_null (&vr1))
++      if (vr1.zero_p ())
+ 	{
+ 	  vr->set_undefined ();
+ 	  return;
+ 	}
+       wide_int wmin, wmax, tmp;
+       wide_int vr0_min, vr0_max, vr1_min, vr1_max;
+-      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
+-      extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
++      extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
++      extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
+       wide_int_range_trunc_mod (wmin, wmax, sign, prec,
+ 				vr0_min, vr0_max, vr1_min, vr1_max);
+       min = wide_int_to_tree (expr_type, wmin);
+@@ -1962,8 +2019,8 @@ extract_range_from_binary_expr (value_range_base *vr,
+ 				 &may_be_nonzero0, &must_be_nonzero0);
+       vrp_set_zero_nonzero_bits (expr_type, &vr1,
+ 				 &may_be_nonzero1, &must_be_nonzero1);
+-      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
+-      extract_range_into_wide_ints (&vr1, sign, prec, vr1_min, vr1_max);
++      extract_range_into_wide_ints (&vr0, expr_type, vr0_min, vr0_max);
++      extract_range_into_wide_ints (&vr1, expr_type, vr1_min, vr1_max);
+       if (code == BIT_AND_EXPR)
+ 	{
+ 	  if (wide_int_range_bit_and (wmin, wmax, sign, prec,
+@@ -2140,9 +2197,9 @@ extract_range_from_unary_expr (value_range_base *vr,
+       if (POINTER_TYPE_P (type) || POINTER_TYPE_P (op0_type))
+ 	{
+ 	  if (!range_includes_zero_p (&vr0))
+-	    vr->set_nonnull (type);
+-	  else if (range_is_null (&vr0))
+-	    vr->set_null (type);
++	    vr->set_nonzero (type);
++	  else if (vr0.zero_p ())
++	    vr->set_zero (type);
+ 	  else
+ 	    vr->set_varying ();
+ 	  return;
+@@ -2167,8 +2224,7 @@ extract_range_from_unary_expr (value_range_base *vr,
+       signop outer_sign = TYPE_SIGN (outer_type);
+       unsigned inner_prec = TYPE_PRECISION (inner_type);
+       unsigned outer_prec = TYPE_PRECISION (outer_type);
+-      extract_range_into_wide_ints (&vr0, inner_sign, inner_prec,
+-				    vr0_min, vr0_max);
++      extract_range_into_wide_ints (&vr0, inner_type, vr0_min, vr0_max);
+       if (wide_int_range_convert (wmin, wmax,
+ 				  inner_sign, inner_prec,
+ 				  outer_sign, outer_prec,
+@@ -2176,7 +2232,7 @@ extract_range_from_unary_expr (value_range_base *vr,
+ 	{
+ 	  tree min = wide_int_to_tree (outer_type, wmin);
+ 	  tree max = wide_int_to_tree (outer_type, wmax);
+-	  vr->set_and_canonicalize (VR_RANGE, min, max);
++	  vr->set (VR_RANGE, min, max);
+ 	}
+       else
+ 	vr->set_varying ();
+@@ -2186,7 +2242,7 @@ extract_range_from_unary_expr (value_range_base *vr,
+     {
+       wide_int wmin, wmax;
+       wide_int vr0_min, vr0_max;
+-      extract_range_into_wide_ints (&vr0, sign, prec, vr0_min, vr0_max);
++      extract_range_into_wide_ints (&vr0, type, vr0_min, vr0_max);
+       if (wide_int_range_abs (wmin, wmax, sign, prec, vr0_min, vr0_max,
+ 			      TYPE_OVERFLOW_UNDEFINED (type)))
+ 	vr->set (VR_RANGE, wide_int_to_tree (type, wmin),
+@@ -2199,7 +2255,8 @@ extract_range_from_unary_expr (value_range_base *vr,
+     {
+       wide_int wmin, wmax;
+       wide_int vr0_min, vr0_max;
+-      extract_range_into_wide_ints (&vr0, SIGNED, prec, vr0_min, vr0_max);
++      tree signed_type = make_signed_type (TYPE_PRECISION (type));
++      extract_range_into_wide_ints (&vr0, signed_type, vr0_min, vr0_max);
+       wide_int_range_absu (wmin, wmax, prec, vr0_min, vr0_max);
+       vr->set (VR_RANGE, wide_int_to_tree (type, wmin),
+ 	       wide_int_to_tree (type, wmax));
+@@ -5468,8 +5525,10 @@ union_ranges (enum value_range_kind *vr0type,
+ 	      enum value_range_kind vr1type,
+ 	      tree vr1min, tree vr1max)
+ {
+-  bool mineq = vrp_operand_equal_p (*vr0min, vr1min);
+-  bool maxeq = vrp_operand_equal_p (*vr0max, vr1max);
++  int cmpmin = compare_values (*vr0min, vr1min);
++  int cmpmax = compare_values (*vr0max, vr1max);
++  bool mineq = cmpmin == 0;
++  bool maxeq = cmpmax == 0;
+ 
+   /* [] is vr0, () is vr1 in the following classification comments.  */
+   if (mineq && maxeq)
+@@ -5569,8 +5628,8 @@ union_ranges (enum value_range_kind *vr0type,
+       else
+ 	gcc_unreachable ();
+     }
+-  else if ((maxeq || operand_less_p (vr1max, *vr0max) == 1)
+-	   && (mineq || operand_less_p (*vr0min, vr1min) == 1))
++  else if ((maxeq || cmpmax == 1)
++	   && (mineq || cmpmin == -1))
+     {
+       /* [ (  ) ] or [(  ) ] or [ (  )] */
+       if (*vr0type == VR_RANGE
+@@ -5603,8 +5662,8 @@ union_ranges (enum value_range_kind *vr0type,
+       else
+ 	gcc_unreachable ();
+     }
+-  else if ((maxeq || operand_less_p (*vr0max, vr1max) == 1)
+-	   && (mineq || operand_less_p (vr1min, *vr0min) == 1))
++  else if ((maxeq || cmpmax == -1)
++	   && (mineq || cmpmin == 1))
+     {
+       /* ( [  ] ) or ([  ] ) or ( [  ]) */
+       if (*vr0type == VR_RANGE
+@@ -5643,10 +5702,10 @@ union_ranges (enum value_range_kind *vr0type,
+       else
+ 	gcc_unreachable ();
+     }
+-  else if ((operand_less_p (vr1min, *vr0max) == 1
+-	    || operand_equal_p (vr1min, *vr0max, 0))
+-	   && operand_less_p (*vr0min, vr1min) == 1
+-	   && operand_less_p (*vr0max, vr1max) == 1)
++  else if (cmpmin == -1
++	   && cmpmax == -1
++	   && (operand_less_p (vr1min, *vr0max) == 1
++	       || operand_equal_p (vr1min, *vr0max, 0)))
+     {
+       /* [  (  ]  ) or [   ](   ) */
+       if (*vr0type == VR_RANGE
+@@ -5680,10 +5739,10 @@ union_ranges (enum value_range_kind *vr0type,
+       else
+ 	gcc_unreachable ();
+     }
+-  else if ((operand_less_p (*vr0min, vr1max) == 1
+-	    || operand_equal_p (*vr0min, vr1max, 0))
+-	   && operand_less_p (vr1min, *vr0min) == 1
+-	   && operand_less_p (vr1max, *vr0max) == 1)
++  else if (cmpmin == 1
++	   && cmpmax == 1
++	   && (operand_less_p (*vr0min, vr1max) == 1
++	       || operand_equal_p (*vr0min, vr1max, 0)))
+     {
+       /* (  [  )  ] or (   )[   ] */
+       if (*vr0type == VR_RANGE
+@@ -6083,7 +6142,7 @@ value_range::intersect_helper (value_range *vr0, const value_range *vr1)
+      VR_RANGE can still be a VR_RANGE.  Work on a temporary so we can
+      fall back to vr0 when this turns things to varying.  */
+   value_range tem;
+-  tem.set_and_canonicalize (vr0type, vr0min, vr0max);
++  tem.set (vr0type, vr0min, vr0max);
+   /* If that failed, use the saved original VR0.  */
+   if (tem.varying_p ())
+     return;
+@@ -6152,8 +6211,8 @@ value_range_base::union_helper (const value_range_base *vr0,
+ 		vr1->kind (), vr1->min (), vr1->max ());
+ 
+   /* Work on a temporary so we can still use vr0 when union returns varying.  */
+-  value_range tem;
+-  tem.set_and_canonicalize (vr0type, vr0min, vr0max);
++  value_range_base tem;
++  tem.set (vr0type, vr0min, vr0max);
+ 
+   /* Failed to find an efficient meet.  Before giving up and setting
+      the result to VARYING, see if we can at least derive a useful
+@@ -6162,7 +6221,7 @@ value_range_base::union_helper (const value_range_base *vr0,
+       && range_includes_zero_p (vr0) == 0
+       && range_includes_zero_p (vr1) == 0)
+     {
+-      tem.set_nonnull (vr0->type ());
++      tem.set_nonzero (vr0->type ());
+       return tem;
+     }
+ 
+@@ -6233,6 +6292,58 @@ value_range::union_ (const value_range *other)
+     }
+ }
+ 
++/* Normalize symbolics into constants.  */
++
++value_range_base
++value_range_base::normalize_symbolics () const
++{
++  if (varying_p () || undefined_p ())
++    return *this;
++  tree ttype = type ();
++  bool min_symbolic = !is_gimple_min_invariant (min ());
++  bool max_symbolic = !is_gimple_min_invariant (max ());
++  if (!min_symbolic && !max_symbolic)
++    return *this;
++
++  // [SYM, SYM] -> VARYING
++  if (min_symbolic && max_symbolic)
++    {
++      value_range_base var;
++      var.set_varying ();
++      return var;
++    }
++  if (kind () == VR_RANGE)
++    {
++      // [SYM, NUM] -> [-MIN, NUM]
++      if (min_symbolic)
++	return value_range_base (VR_RANGE, vrp_val_min (ttype), max ());
++      // [NUM, SYM] -> [NUM, +MAX]
++      return value_range_base (VR_RANGE, min (), vrp_val_max (ttype));
++    }
++  gcc_assert (kind () == VR_ANTI_RANGE);
++  // ~[SYM, NUM] -> [NUM + 1, +MAX]
++  if (min_symbolic)
++    {
++      if (!vrp_val_is_max (max ()))
++	{
++	  tree n = wide_int_to_tree (ttype, wi::to_wide (max ()) + 1);
++	  return value_range_base (VR_RANGE, n, vrp_val_max (ttype));
++	}
++      value_range_base var;
++      var.set_varying ();
++      return var;
++    }
++  // ~[NUM, SYM] -> [-MIN, NUM - 1]
++  if (!vrp_val_is_min (min ()))
++    {
++      tree n = wide_int_to_tree (ttype, wi::to_wide (min ()) - 1);
++      return value_range_base (VR_RANGE, vrp_val_min (ttype), n);
++    }
++  value_range_base var;
++  var.set_varying ();
++  return var;
++}
++
+ /* Visit all arguments for PHI node PHI that flow through executable
+    edges.  If a valid value range can be derived from all the incoming
+    value ranges, set a new range for the LHS of PHI.  */
+diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
+index 9d52b428d05..4bcff924b58 100644
+--- a/gcc/tree-vrp.h
++++ b/gcc/tree-vrp.h
+@@ -46,8 +46,8 @@ public:
+ 
+   void set (value_range_kind, tree, tree);
+   void set (tree);
+-  void set_nonnull (tree);
+-  void set_null (tree);
++  void set_nonzero (tree);
++  void set_zero (tree);
+ 
+   enum value_range_kind kind () const;
+   tree min () const;
+@@ -70,11 +70,13 @@ public:
+   /* Misc methods.  */
+   tree type () const;
+   bool may_contain_p (tree) const;
+-  void set_and_canonicalize (enum value_range_kind, tree, tree);
+   bool zero_p () const;
++  bool nonzero_p () const;
+   bool singleton_p (tree *result = NULL) const;
+   void dump (FILE *) const;
+ 
++  value_range_base normalize_symbolics () const;
++
+ protected:
+   void check ();
+   static value_range_base union_helper (const value_range_base *,
+@@ -118,8 +120,6 @@ class GTY((user)) value_range : public value_range_base
+   /* Deep-copies equiv bitmap argument.  */
+   void set (value_range_kind, tree, tree, bitmap = NULL);
+   void set (tree);
+-  void set_nonnull (tree);
+-  void set_null (tree);
+ 
+   bool operator== (const value_range &) const /* = delete */;
+   bool operator!= (const value_range &) const /* = delete */;
+@@ -138,7 +138,6 @@ class GTY((user)) value_range : public value_range_base
+ 
+   /* Misc methods.  */
+   void deep_copy (const value_range *);
+-  void set_and_canonicalize (enum value_range_kind, tree, tree, bitmap = NULL);
+   void dump (FILE *) const;
+ 
+  private:
+@@ -222,6 +221,16 @@ value_range_base::zero_p () const
+ 	  && integer_zerop (m_max));
+ }
+ 
++/* Return TRUE if range is nonzero.  */
++
++inline bool
++value_range_base::nonzero_p () const
++{
++  return (m_kind == VR_ANTI_RANGE
++	  && integer_zerop (m_min)
++	  && integer_zerop (m_max));
++}
++
+ extern void dump_value_range (FILE *, const value_range *);
+ extern void dump_value_range (FILE *, const value_range_base *);
+ 
+@@ -259,8 +268,8 @@ extern bool vrp_val_is_min (const_tree);
+ extern bool vrp_val_is_max (const_tree);
+ extern int value_inside_range (tree, tree, tree);
+ 
+-extern tree vrp_val_min (const_tree);
+-extern tree vrp_val_max (const_tree);
++extern tree vrp_val_min (const_tree, bool handle_pointers = false);
++extern tree vrp_val_max (const_tree, bool handle_pointers = false);
+ 
+ extern void extract_range_from_unary_expr (value_range_base *vr,
+ 					   enum tree_code code,
+diff --git a/gcc/tree.c b/gcc/tree.c
+index 32e94e48132..c4b8eea675f 100644
+--- a/gcc/tree.c
++++ b/gcc/tree.c
+@@ -8213,8 +8213,6 @@ build_nonstandard_integer_type (unsigned HOST_WIDE_INT precision,
+   else
+     fixup_signed_type (itype);
+ 
+-  ret = itype;
+-
+   inchash::hash hstate;
+   inchash::add_expr (TYPE_MAX_VALUE (itype), hstate);
+   ret = type_hash_canon (hstate.end (), itype);
+@@ -11079,44 +11077,44 @@ build_vector_type (tree innertype, poly_int64 nunits)
+   return make_vector_type (innertype, nunits, VOIDmode);
+ }
+ 
+-/* Build truth vector with specified length and number of units.  */
++/* Build a truth vector with NUNITS units, giving it mode MASK_MODE.  */
+ 
+ tree
+-build_truth_vector_type (poly_uint64 nunits, poly_uint64 vector_size)
++build_truth_vector_type_for_mode (poly_uint64 nunits, machine_mode mask_mode)
+ {
+-  machine_mode mask_mode
+-    = targetm.vectorize.get_mask_mode (nunits, vector_size).else_blk ();
+-
+-  poly_uint64 vsize;
+-  if (mask_mode == BLKmode)
+-    vsize = vector_size * BITS_PER_UNIT;
+-  else
+-    vsize = GET_MODE_BITSIZE (mask_mode);
++  gcc_assert (mask_mode != BLKmode);
+ 
++  poly_uint64 vsize = GET_MODE_BITSIZE (mask_mode);
+   unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits);
+-
+   tree bool_type = build_nonstandard_boolean_type (esize);
+ 
+   return make_vector_type (bool_type, nunits, mask_mode);
+ }
+ 
+-/* Returns a vector type corresponding to a comparison of VECTYPE.  */
++/* Build a vector type that holds one boolean result for each element of
++   vector type VECTYPE.  The public interface for this operation is
++   truth_type_for.  */
+ 
+-tree
+-build_same_sized_truth_vector_type (tree vectype)
++static tree
++build_truth_vector_type_for (tree vectype)
+ {
+-  if (VECTOR_BOOLEAN_TYPE_P (vectype))
+-    return vectype;
++  machine_mode vector_mode = TYPE_MODE (vectype);
++  poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
+ 
+-  poly_uint64 size = GET_MODE_SIZE (TYPE_MODE (vectype));
++  machine_mode mask_mode;
++  if (VECTOR_MODE_P (vector_mode)
++      && targetm.vectorize.get_mask_mode (vector_mode).exists (&mask_mode))
++    return build_truth_vector_type_for_mode (nunits, mask_mode);
+ 
+-  if (known_eq (size, 0U))
+-    size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
++  poly_uint64 vsize = tree_to_poly_uint64 (TYPE_SIZE (vectype));
++  unsigned HOST_WIDE_INT esize = vector_element_size (vsize, nunits);
++  tree bool_type = build_nonstandard_boolean_type (esize);
+ 
+-  return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype), size);
++  return make_vector_type (bool_type, nunits, BLKmode);
+ }
+ 
+-/* Similarly, but builds a variant type with TYPE_VECTOR_OPAQUE set.  */
++/* Like build_vector_type, but builds a variant type with TYPE_VECTOR_OPAQUE
++   set.  */
+ 
+ tree
+ build_opaque_vector_type (tree innertype, poly_int64 nunits)
+@@ -11915,8 +11913,7 @@ truth_type_for (tree type)
+     {
+       if (VECTOR_BOOLEAN_TYPE_P (type))
+ 	return type;
+-      return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (type),
+-				      GET_MODE_SIZE (TYPE_MODE (type)));
++      return build_truth_vector_type_for (type);
+     }
+   else
+     return boolean_type_node;
+diff --git a/gcc/tree.h b/gcc/tree.h
+index 2f8e37bb356..6f73593faa7 100644
+--- a/gcc/tree.h
++++ b/gcc/tree.h
+@@ -4272,8 +4272,7 @@ extern tree build_reference_type_for_mode (tree, machine_mode, bool);
+ extern tree build_reference_type (tree);
+ extern tree build_vector_type_for_mode (tree, machine_mode);
+ extern tree build_vector_type (tree, poly_int64);
+-extern tree build_truth_vector_type (poly_uint64, poly_uint64);
+-extern tree build_same_sized_truth_vector_type (tree vectype);
++extern tree build_truth_vector_type_for_mode (poly_uint64, machine_mode);
+ extern tree build_opaque_vector_type (tree, poly_int64);
+ extern tree build_index_type (tree);
+ extern tree build_array_type (tree, tree, bool = false);
+diff --git a/gcc/vr-values.c b/gcc/vr-values.c
+index 0e10aca92bb..02c89ab030a 100644
+--- a/gcc/vr-values.c
++++ b/gcc/vr-values.c
+@@ -118,7 +118,10 @@ vr_values::get_value_range (const_tree var)
+ 	  if (POINTER_TYPE_P (TREE_TYPE (sym))
+ 	      && (nonnull_arg_p (sym)
+ 		  || get_ptr_nonnull (var)))
+-	    vr->set_nonnull (TREE_TYPE (sym));
++	    {
++	      vr->set_nonzero (TREE_TYPE (sym));
++	      vr->equiv_clear ();
++	    }
+ 	  else if (INTEGRAL_TYPE_P (TREE_TYPE (sym)))
+ 	    {
+ 	      get_range_info (var, *vr);
+@@ -130,7 +133,10 @@ vr_values::get_value_range (const_tree var)
+ 	}
+       else if (TREE_CODE (sym) == RESULT_DECL
+ 	       && DECL_BY_REFERENCE (sym))
+-	vr->set_nonnull (TREE_TYPE (sym));
++	{
++	  vr->set_nonzero (TREE_TYPE (sym));
++	  vr->equiv_clear ();
++	}
+     }
+ 
+   return vr;
+@@ -491,9 +497,9 @@ vr_values::extract_range_for_var_from_comparison_expr (tree var,
+          vice-versa.  Use set_and_canonicalize which does this for
+          us.  */
+       if (cond_code == LE_EXPR)
+-        vr_p->set_and_canonicalize (VR_RANGE, min, max, vr_p->equiv ());
++        vr_p->set (VR_RANGE, min, max, vr_p->equiv ());
+       else if (cond_code == GT_EXPR)
+-        vr_p->set_and_canonicalize (VR_ANTI_RANGE, min, max, vr_p->equiv ());
++        vr_p->set (VR_ANTI_RANGE, min, max, vr_p->equiv ());
+       else
+ 	gcc_unreachable ();
+     }
+@@ -565,7 +571,7 @@ vr_values::extract_range_for_var_from_comparison_expr (tree var,
+ 	  && vrp_val_is_max (max))
+ 	min = max = limit;
+ 
+-      vr_p->set_and_canonicalize (VR_ANTI_RANGE, min, max, vr_p->equiv ());
++      vr_p->set (VR_ANTI_RANGE, min, max, vr_p->equiv ());
+     }
+   else if (cond_code == LE_EXPR || cond_code == LT_EXPR)
+     {
+@@ -858,7 +864,10 @@ vr_values::extract_range_from_binary_expr (value_range *vr,
+ 	  || (vr1.kind () == VR_ANTI_RANGE
+ 	      && vr1.min () == op0
+ 	      && vr1.min () == vr1.max ())))
+-      vr->set_nonnull (expr_type);
++    {
++      vr->set_nonzero (expr_type);
++      vr->equiv_clear ();
++    }
+ }
+ 
+ /* Extract range information from a unary expression CODE OP0 based on
+@@ -1085,7 +1094,8 @@ vr_values::extract_range_basic (value_range *vr, gimple *stmt)
+ 	      && TREE_CODE (SSA_NAME_VAR (arg)) == PARM_DECL
+ 	      && cfun->after_inlining)
+ 	    {
+-	      vr->set_null (type);
++	      vr->set_zero (type);
++	      vr->equiv_clear ();
+ 	      return;
+ 	    }
+ 	  break;
+@@ -1392,7 +1402,10 @@ vr_values::extract_range_basic (value_range *vr, gimple *stmt)
+       && gimple_stmt_nonnegative_warnv_p (stmt, &sop))
+     set_value_range_to_nonnegative (vr, type);
+   else if (vrp_stmt_computes_nonzero (stmt))
+-    vr->set_nonnull (type);
++    {
++      vr->set_nonzero (type);
++      vr->equiv_clear ();
++    }
+   else
+     vr->set_varying ();
+ }
+diff --git a/libgcc/libgcov-driver-system.c b/libgcc/libgcov-driver-system.c
+index b5f3e89ebdc..0d106002098 100644
+--- a/libgcc/libgcov-driver-system.c
++++ b/libgcc/libgcov-driver-system.c
+@@ -262,10 +262,8 @@ static int
+ gcov_exit_open_gcda_file (struct gcov_info *gi_ptr,
+ 			  struct gcov_filename *gf)
+ {
+-  const char *fname = gi_ptr->filename;
+   int append_slash = 0;
+-
+-  fname = gi_ptr->filename;
++  const char *fname = gi_ptr->filename;
+ 
+   /* Build relocated filename, stripping off leading
+      directories from the initial filename if requested. */
+diff --git a/libgcc/libgcov-util.c b/libgcc/libgcov-util.c
+index ae0dd017204..e672768966b 100644
+--- a/libgcc/libgcov-util.c
++++ b/libgcc/libgcov-util.c
+@@ -461,10 +461,9 @@ gcov_read_profile_dir (const char* dir_name, int recompute_summary ATTRIBUTE_UNU
+ #ifdef HAVE_FTW_H
+   ftw (".", ftw_read_file, 50);
+ #endif
+-  ret = chdir (pwd);
++  chdir (pwd);
+   free (pwd);
+ 
+-
+   return gcov_info_head;;
+ }
+ 
-- 
Gitee