diff --git a/aarch64-ilp32-call-addr-dimode.patch b/aarch64-ilp32-call-addr-dimode.patch
deleted file mode 100644
index 0a04debb803cf81faeb6867016e635083db10fb4..0000000000000000000000000000000000000000
--- a/aarch64-ilp32-call-addr-dimode.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
---- a/gcc/config/aarch64/aarch64.md 2018-10-09 11:30:50.000000000 +0800
-+++ b/gcc/config/aarch64/aarch64.md 2018-10-09 11:52:54.000000000 +0800
-@@ -857,6 +857,13 @@
- : !REG_P (callee))
- XEXP (operands[0], 0) = force_reg (Pmode, callee);
-
-+ if (TARGET_ILP32
-+ && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF
-+ && GET_MODE (XEXP (operands[0], 0)) == SImode)
-+ XEXP (operands[0], 0) = convert_memory_address (DImode,
-+ XEXP (operands[0], 0));
-+
-+
- if (operands[2] == NULL_RTX)
- operands[2] = const0_rtx;
-
-@@ -889,6 +896,13 @@
- : !REG_P (callee))
- XEXP (operands[1], 0) = force_reg (Pmode, callee);
-
-+ if (TARGET_ILP32
-+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
-+ && GET_MODE (XEXP (operands[1], 0)) == SImode)
-+ XEXP (operands[1], 0) = convert_memory_address (DImode,
-+ XEXP (operands[1], 0));
-+
-+
- if (operands[3] == NULL_RTX)
- operands[3] = const0_rtx;
-
diff --git a/arm-adjust-be-ldrd-strd.patch b/arm-adjust-be-ldrd-strd.patch
deleted file mode 100644
index 90278d3e6e8af0f9a66bb68c4f92222043098d10..0000000000000000000000000000000000000000
--- a/arm-adjust-be-ldrd-strd.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-diff -urp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
---- a/gcc/config/arm/arm.c 2019-01-18 11:25:20.840179114 +0800
-+++ b/gcc/config/arm/arm.c 2019-01-18 11:25:47.548179817 +0800
-@@ -14306,18 +14306,36 @@ gen_movmem_ldrd_strd (rtx *operands)
- emit_move_insn (reg0, src);
- else
- {
-- emit_insn (gen_unaligned_loadsi (low_reg, src));
-- src = next_consecutive_mem (src);
-- emit_insn (gen_unaligned_loadsi (hi_reg, src));
-+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
-+ {
-+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
-+ src = next_consecutive_mem (src);
-+ emit_insn (gen_unaligned_loadsi (low_reg, src));
-+ }
-+ else
-+ {
-+ emit_insn (gen_unaligned_loadsi (low_reg, src));
-+ src = next_consecutive_mem (src);
-+ emit_insn (gen_unaligned_loadsi (hi_reg, src));
-+ }
- }
-
- if (dst_aligned)
- emit_move_insn (dst, reg0);
- else
- {
-- emit_insn (gen_unaligned_storesi (dst, low_reg));
-- dst = next_consecutive_mem (dst);
-- emit_insn (gen_unaligned_storesi (dst, hi_reg));
-+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN)
-+ {
-+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
-+ dst = next_consecutive_mem (dst);
-+ emit_insn (gen_unaligned_storesi (dst, low_reg));
-+ }
-+ else
-+ {
-+ emit_insn (gen_unaligned_storesi (dst, low_reg));
-+ dst = next_consecutive_mem (dst);
-+ emit_insn (gen_unaligned_storesi (dst, hi_reg));
-+ }
- }
-
- src = next_consecutive_mem (src);
-diff -urp a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
---- a/gcc/config/arm/arm.opt 2019-01-18 11:25:20.840179114 +0800
-+++ b/gcc/config/arm/arm.opt 2019-01-18 11:28:51.744184666 +0800
-@@ -274,6 +274,10 @@ masm-syntax-unified
- Target Report Var(inline_asm_unified) Init(0) Save
- Assume unified syntax for inline assembly code.
-
-+mlsrd-be-adjust
-+Target Report Var(flag_lsrd_be_adjust) Init(1)
-+Adjust ldrd/strd splitting order when it's big-endian.
-+
- mpure-code
- Target Report Var(target_pure_code) Init(0)
- Do not allow constant data to be placed in code sections.
diff --git a/floop-interchange.patch b/floop-interchange.patch
deleted file mode 100644
index 6657eede161b6f1f3bdfe001e2e69ee70b15cb3d..0000000000000000000000000000000000000000
--- a/floop-interchange.patch
+++ /dev/null
@@ -1,2680 +0,0 @@
-diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in
---- a/gcc/Makefile.in 2018-11-15 15:59:30.435048460 +0800
-+++ b/gcc/Makefile.in 2018-11-15 16:04:16.735055997 +0800
-@@ -1293,6 +1293,7 @@ OBJS = \
- gimple-fold.o \
- gimple-laddress.o \
- gimple-loop-jam.o \
-+ gimple-loop-interchange.o \
- gimple-low.o \
- gimple-pretty-print.o \
- gimple-ssa-backprop.o \
-diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h
---- a/gcc/cfgloop.h 2018-11-15 15:59:30.439048461 +0800
-+++ b/gcc/cfgloop.h 2018-11-15 16:03:17.431054436 +0800
-@@ -225,6 +225,16 @@ struct GTY ((chain_next ("%h.next"))) lo
- builtins. */
- tree simduid;
-
-+ /* In loop optimization, it's common to generate loops from the original
-+ loop. This field records the index of the original loop which can be
-+ used to track the original loop from newly generated loops. This can
-+ be done by calling function get_loop (cfun, orig_loop_num). Note the
-+ original loop could be destroyed for various reasons thus no longer
-+ exists, as a result, function call to get_loop returns NULL pointer.
-+ In this case, this field should not be used and needs to be cleared
-+ whenever possible. */
-+ int orig_loop_num;
-+
- /* Upper bound on number of iterations of a loop. */
- struct nb_iter_bound *bounds;
-
-diff -N -urp a/gcc/common.opt b/gcc/common.opt
---- a/gcc/common.opt 2018-11-15 15:59:30.447048461 +0800
-+++ b/gcc/common.opt 2018-11-15 16:03:17.431054436 +0800
-@@ -1488,8 +1488,8 @@ Common Alias(floop-nest-optimize)
- Enable loop nest transforms. Same as -floop-nest-optimize.
-
- floop-interchange
--Common Alias(floop-nest-optimize)
--Enable loop nest transforms. Same as -floop-nest-optimize.
-+Common Report Var(flag_loop_interchange) Optimization
-+Enable loop interchange on trees.
-
- floop-block
- Common Alias(floop-nest-optimize)
-diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
---- a/gcc/doc/invoke.texi 2018-11-15 15:59:30.451048461 +0800
-+++ b/gcc/doc/invoke.texi 2018-11-15 16:05:06.803057315 +0800
-@@ -8224,11 +8224,9 @@ Perform loop optimizations on trees. Th
- at @option{-O} and higher.
-
- @item -ftree-loop-linear
--@itemx -floop-interchange
- @itemx -floop-strip-mine
- @itemx -floop-block
- @opindex ftree-loop-linear
--@opindex floop-interchange
- @opindex floop-strip-mine
- @opindex floop-block
- Perform loop nest optimizations. Same as
-@@ -8328,6 +8326,25 @@ Apply unroll and jam transformations on
- nest this unrolls the outer loop by some factor and fuses the resulting
- multiple inner loops. This flag is enabled by default at @option{-O3}.
-
-+@item -floop-interchange
-+@opindex floop-interchange
-+Perform loop interchange outside of graphite. This flag can improve cache
-+performance on loop nest and allow further loop optimizations, like
-+vectorization, to take place. For example, the loop
-+@smallexample
-+for (int i = 0; i < N; i++)
-+ for (int j = 0; j < N; j++)
-+ for (int k = 0; k < N; k++)
-+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
-+@end smallexample
-+is transformed to
-+@smallexample
-+for (int i = 0; i < N; i++)
-+ for (int k = 0; k < N; k++)
-+ for (int j = 0; j < N; j++)
-+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
-+@end smallexample
-+
- @item -ftree-loop-im
- @opindex ftree-loop-im
- Perform loop invariant motion on trees. This pass moves only invariants that
-@@ -10203,6 +10220,12 @@ The size of L1 cache, in kilobytes.
- @item l2-cache-size
- The size of L2 cache, in kilobytes.
-
-+@item loop-interchange-max-num-stmts
-+The maximum number of stmts in a loop to be interchanged.
-+
-+@item loop-interchange-stride-ratio
-+The minimum ratio between stride of two loops for interchange to be profitable.
-+
- @item min-insn-to-prefetch-ratio
- The minimum ratio between the number of instructions and the
- number of prefetches to enable prefetching in a loop.
-diff -N -urp a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc
---- a/gcc/gimple-loop-interchange.cc 1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/gimple-loop-interchange.cc 2018-11-15 16:03:17.443054436 +0800
-@@ -0,0 +1,2039 @@
-+/* Loop interchange.
-+ Copyright (C) 2017 Free Software Foundation, Inc.
-+ Contributed by ARM Ltd.
-+
-+This file is part of GCC.
-+
-+GCC is free software; you can redistribute it and/or modify it
-+under the terms of the GNU General Public License as published by the
-+Free Software Foundation; either version 3, or (at your option) any
-+later version.
-+
-+GCC is distributed in the hope that it will be useful, but WITHOUT
-+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+for more details.
-+
-+You should have received a copy of the GNU General Public License
-+along with GCC; see the file COPYING3. If not see
-+. */
-+
-+#include "config.h"
-+#include "system.h"
-+#include "coretypes.h"
-+#include "backend.h"
-+#include "is-a.h"
-+#include "tree.h"
-+#include "gimple.h"
-+#include "tree-pass.h"
-+#include "ssa.h"
-+#include "gimple-pretty-print.h"
-+#include "fold-const.h"
-+#include "gimplify.h"
-+#include "gimple-iterator.h"
-+#include "gimplify-me.h"
-+#include "cfgloop.h"
-+#include "params.h"
-+#include "tree-ssa.h"
-+#include "tree-scalar-evolution.h"
-+#include "tree-ssa-loop-manip.h"
-+#include "tree-ssa-loop-niter.h"
-+#include "tree-ssa-loop-ivopts.h"
-+#include "tree-ssa-dce.h"
-+#include "tree-data-ref.h"
-+#include "tree-vectorizer.h"
-+
-+/* This pass performs loop interchange: for example, the loop nest
-+
-+ for (int j = 0; j < N; j++)
-+ for (int k = 0; k < N; k++)
-+ for (int i = 0; i < N; i++)
-+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
-+
-+ is transformed to
-+
-+ for (int i = 0; i < N; i++)
-+ for (int j = 0; j < N; j++)
-+ for (int k = 0; k < N; k++)
-+ c[i][j] = c[i][j] + a[i][k]*b[k][j];
-+
-+ This pass implements loop interchange in the following steps:
-+
-+ 1) Find perfect loop nest for each innermost loop and compute data
-+ dependence relations for it. For above example, loop nest is
-+ .
-+ 2) From innermost to outermost loop, this pass tries to interchange
-+ each loop pair. For above case, it firstly tries to interchange
-+ and loop nest becomes .
-+ Then it tries to interchange and loop nest becomes
-+ . The overall effect is to move innermost
-+ loop to the outermost position. For loop pair
-+ to be interchanged, we:
-+ 3) Check if data dependence relations are valid for loop interchange.
-+ 4) Check if both loops can be interchanged in terms of transformation.
-+ 5) Check if interchanging the two loops is profitable.
-+ 6) Interchange the two loops by mapping induction variables.
-+
-+ This pass also handles reductions in loop nest. So far we only support
-+ simple reduction of inner loop and double reduction of the loop nest. */
-+
-+/* Maximum number of stmts in each loop that should be interchanged. */
-+#define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
-+/* Maximum number of data references in loop nest. */
-+#define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
-+
-+/* Comparison ratio of access stride between inner/outer loops to be
-+ interchanged. This is the minimum stride ratio for loop interchange
-+ to be profitable. */
-+#define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
-+/* The same as above, but we require higher ratio for interchanging the
-+ innermost two loops. */
-+#define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
-+
-+/* Vector of strides that DR accesses in each level loop of a loop nest. */
-+#define DR_ACCESS_STRIDE(dr) ((vec *) dr->aux)
-+
-+/* Structure recording loop induction variable. */
-+typedef struct induction
-+{
-+ /* IV itself. */
-+ tree var;
-+ /* IV's initializing value, which is the init arg of the IV PHI node. */
-+ tree init_val;
-+ /* IV's initializing expr, which is (the expanded result of) init_val. */
-+ tree init_expr;
-+ /* IV's step. */
-+ tree step;
-+} *induction_p;
-+
-+/* Enum type for loop reduction variable. */
-+enum reduction_type
-+{
-+ UNKNOWN_RTYPE = 0,
-+ SIMPLE_RTYPE,
-+ DOUBLE_RTYPE
-+};
-+
-+/* Structure recording loop reduction variable. */
-+typedef struct reduction
-+{
-+ /* Reduction itself. */
-+ tree var;
-+ /* PHI node defining reduction variable. */
-+ gphi *phi;
-+ /* Init and next variables of the reduction. */
-+ tree init;
-+ tree next;
-+ /* Lcssa PHI node if reduction is used outside of its definition loop. */
-+ gphi *lcssa_phi;
-+ /* Stmts defining init and next. */
-+ gimple *producer;
-+ gimple *consumer;
-+ /* If init is loaded from memory, this is the loading memory reference. */
-+ tree init_ref;
-+ /* If reduction is finally stored to memory, this is the stored memory
-+ reference. */
-+ tree fini_ref;
-+ enum reduction_type type;
-+} *reduction_p;
-+
-+
-+/* Dump reduction RE. */
-+
-+static void
-+dump_reduction (reduction_p re)
-+{
-+ if (re->type == SIMPLE_RTYPE)
-+ fprintf (dump_file, " Simple reduction: ");
-+ else if (re->type == DOUBLE_RTYPE)
-+ fprintf (dump_file, " Double reduction: ");
-+ else
-+ fprintf (dump_file, " Unknown reduction: ");
-+
-+ print_gimple_stmt (dump_file, re->phi, 0);
-+}
-+
-+/* Dump LOOP's induction IV. */
-+static void
-+dump_induction (struct loop *loop, induction_p iv)
-+{
-+ fprintf (dump_file, " Induction: ");
-+ print_generic_expr (dump_file, iv->var, TDF_SLIM);
-+ fprintf (dump_file, " = {");
-+ print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
-+ fprintf (dump_file, ", ");
-+ print_generic_expr (dump_file, iv->step, TDF_SLIM);
-+ fprintf (dump_file, "}_%d\n", loop->num);
-+}
-+
-+/* Loop candidate for interchange. */
-+
-+struct loop_cand
-+{
-+ loop_cand (struct loop *, struct loop *);
-+ ~loop_cand ();
-+
-+ reduction_p find_reduction_by_stmt (gimple *);
-+ void classify_simple_reduction (reduction_p);
-+ bool analyze_iloop_reduction_var (tree);
-+ bool analyze_oloop_reduction_var (loop_cand *, tree);
-+ bool analyze_induction_var (tree, tree);
-+ bool analyze_carried_vars (loop_cand *);
-+ bool analyze_lcssa_phis (void);
-+ bool can_interchange_p (loop_cand *);
-+ bool supported_operations (basic_block, loop_cand *, int *);
-+ void undo_simple_reduction (reduction_p, bitmap);
-+
-+ /* The loop itself. */
-+ struct loop *m_loop;
-+ /* The outer loop for interchange. It equals to loop if this loop cand
-+ itself represents the outer loop. */
-+ struct loop *m_outer;
-+ /* Vector of induction variables in loop. */
-+ vec m_inductions;
-+ /* Vector of reduction variables in loop. */
-+ vec m_reductions;
-+ /* Lcssa PHI nodes of this loop. */
-+ vec m_lcssa_nodes;
-+ /* Single exit edge of this loop. */
-+ edge m_exit;
-+ /* Basic blocks of this loop. */
-+ basic_block *m_bbs;
-+};
-+
-+/* Constructor. */
-+
-+loop_cand::loop_cand (struct loop *loop, struct loop *outer)
-+ : m_loop (loop), m_outer (outer),
-+ m_exit (single_exit (loop)), m_bbs (get_loop_body (loop))
-+{
-+ m_inductions.create (3);
-+ m_reductions.create (3);
-+ m_lcssa_nodes.create (3);
-+}
-+
-+/* Destructor. */
-+
-+loop_cand::~loop_cand ()
-+{
-+ induction_p iv;
-+ for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
-+ free (iv);
-+
-+ reduction_p re;
-+ for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
-+ free (re);
-+
-+ m_inductions.release ();
-+ m_reductions.release ();
-+ m_lcssa_nodes.release ();
-+ free (m_bbs);
-+}
-+
-+/* Return single use stmt of VAR in LOOP, otherwise return NULL. */
-+
-+static gimple *
-+single_use_in_loop (tree var, struct loop *loop)
-+{
-+ gimple *stmt, *res = NULL;
-+ use_operand_p use_p;
-+ imm_use_iterator iterator;
-+
-+ FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
-+ {
-+ stmt = USE_STMT (use_p);
-+ if (is_gimple_debug (stmt))
-+ continue;
-+
-+ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
-+ continue;
-+
-+ if (res)
-+ return NULL;
-+
-+ res = stmt;
-+ }
-+ return res;
-+}
-+
-+/* Return true if E is unsupported in loop interchange, i.e, E is a complex
-+ edge or part of irreducible loop. */
-+
-+static inline bool
-+unsupported_edge (edge e)
-+{
-+ return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
-+}
-+
-+/* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
-+ stmt. */
-+
-+reduction_p
-+loop_cand::find_reduction_by_stmt (gimple *stmt)
-+{
-+ gphi *phi = dyn_cast (stmt);
-+ reduction_p re;
-+
-+ for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
-+ if ((phi != NULL && phi == re->lcssa_phi)
-+ || (stmt == re->producer || stmt == re->consumer))
-+ return re;
-+
-+ return NULL;
-+}
-+
-+/* Return true if all stmts in BB can be supported by loop interchange,
-+ otherwise return false. ILOOP is not NULL if this loop_cand is the
-+ outer loop in loop nest. Add the number of supported statements to
-+ NUM_STMTS. */
-+
-+bool
-+loop_cand::supported_operations (basic_block bb, loop_cand *iloop,
-+ int *num_stmts)
-+{
-+ int bb_num_stmts = 0;
-+ gphi_iterator psi;
-+ gimple_stmt_iterator gsi;
-+
-+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-+ {
-+ gimple *stmt = gsi_stmt (gsi);
-+ if (is_gimple_debug (stmt))
-+ continue;
-+
-+ if (gimple_has_side_effects (stmt))
-+ return false;
-+
-+ bb_num_stmts++;
-+ if (gcall *call = dyn_cast (stmt))
-+ {
-+ /* In basic block of outer loop, the call should be cheap since
-+ it will be moved to inner loop. */
-+ if (iloop != NULL
-+ && !gimple_inexpensive_call_p (call))
-+ return false;
-+ continue;
-+ }
-+
-+ if (!iloop || !gimple_vuse (stmt))
-+ continue;
-+
-+ /* Support stmt accessing memory in outer loop only if it is for inner
-+ loop's reduction. */
-+ if (iloop->find_reduction_by_stmt (stmt))
-+ continue;
-+
-+ tree lhs;
-+ /* Support loop invariant memory reference if it's only used once by
-+ inner loop. */
-+ /* ??? How's this checking for invariantness? */
-+ if (gimple_assign_single_p (stmt)
-+ && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
-+ && TREE_CODE (lhs) == SSA_NAME
-+ && single_use_in_loop (lhs, iloop->m_loop))
-+ continue;
-+
-+ return false;
-+ }
-+ *num_stmts += bb_num_stmts;
-+
-+ /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
-+ loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
-+ if (!iloop || bb == m_loop->header
-+ || bb == iloop->m_exit->dest)
-+ return true;
-+
-+ /* Don't allow any other PHI nodes. */
-+ for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
-+ if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
-+ return false;
-+
-+ return true;
-+}
-+
-+/* Return true if current loop_cand be interchanged. ILOOP is not NULL if
-+ current loop_cand is outer loop in loop nest. */
-+
-+bool
-+loop_cand::can_interchange_p (loop_cand *iloop)
-+{
-+ /* For now we only support at most one reduction. */
-+ unsigned allowed_reduction_num = 1;
-+
-+ /* Only support reduction if the loop nest to be interchanged is the
-+ innermostin two loops. */
-+ if ((iloop == NULL && m_loop->inner != NULL)
-+ || (iloop != NULL && iloop->m_loop->inner != NULL))
-+ allowed_reduction_num = 0;
-+
-+ if (m_reductions.length () > allowed_reduction_num
-+ || (m_reductions.length () == 1
-+ && m_reductions[0]->type == UNKNOWN_RTYPE))
-+ return false;
-+
-+ /* Only support lcssa PHI node which is for reduction. */
-+ if (m_lcssa_nodes.length () > allowed_reduction_num)
-+ return false;
-+
-+ int num_stmts = 0;
-+ /* Check basic blocks other than loop header/exit. */
-+ for (unsigned i = 0; i < m_loop->num_nodes; i++)
-+ {
-+ basic_block bb = m_bbs[i];
-+
-+ /* Skip basic blocks of inner loops. */
-+ if (bb->loop_father != m_loop)
-+ continue;
-+
-+ /* Check if basic block has any unsupported operation. */
-+ if (!supported_operations (bb, iloop, &num_stmts))
-+ return false;
-+
-+ /* Check if loop has too many stmts. */
-+ if (num_stmts > MAX_NUM_STMT)
-+ return false;
-+ }
-+
-+ return true;
-+}
-+
-+/* Programmers and optimizers (like loop store motion) may optimize code:
-+
-+ for (int i = 0; i < N; i++)
-+ for (int j = 0; j < N; j++)
-+ a[i] += b[j][i] * c[j][i];
-+
-+ into reduction:
-+
-+ for (int i = 0; i < N; i++)
-+ {
-+ // producer. Note sum can be intitialized to a constant.
-+ int sum = a[i];
-+ for (int j = 0; j < N; j++)
-+ {
-+ sum += b[j][i] * c[j][i];
-+ }
-+ // consumer.
-+ a[i] = sum;
-+ }
-+
-+ The result code can't be interchanged without undoing the optimization.
-+ This function classifies this kind reduction and records information so
-+ that we can undo the store motion during interchange. */
-+
-+void
-+loop_cand::classify_simple_reduction (reduction_p re)
-+{
-+ gimple *producer, *consumer;
-+
-+ /* Check init variable of reduction and how it is initialized. */
-+ if (TREE_CODE (re->init) == SSA_NAME)
-+ {
-+ producer = SSA_NAME_DEF_STMT (re->init);
-+ re->producer = producer;
-+ basic_block bb = gimple_bb (producer);
-+ if (!bb || bb->loop_father != m_outer)
-+ return;
-+
-+ if (!gimple_assign_load_p (producer))
-+ return;
-+
-+ re->init_ref = gimple_assign_rhs1 (producer);
-+ }
-+ else if (!CONSTANT_CLASS_P (re->init))
-+ return;
-+
-+ /* Check how reduction variable is used. */
-+ consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
-+ if (!consumer
-+ || !gimple_store_p (consumer))
-+ return;
-+
-+ re->fini_ref = gimple_get_lhs (consumer);
-+ re->consumer = consumer;
-+
-+ /* Simple reduction with constant initializer. */
-+ if (!re->init_ref)
-+ {
-+ gcc_assert (CONSTANT_CLASS_P (re->init));
-+ re->init_ref = unshare_expr (re->fini_ref);
-+ }
-+
-+ /* Require memory references in producer and consumer are the same so
-+ that we can undo reduction during interchange. */
-+ if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
-+ return;
-+
-+ re->type = SIMPLE_RTYPE;
-+}
-+
-+/* Analyze reduction variable VAR for inner loop of the loop nest to be
-+ interchanged. Return true if analysis succeeds. */
-+
-+bool
-+loop_cand::analyze_iloop_reduction_var (tree var)
-+{
-+ gphi *phi = as_a (SSA_NAME_DEF_STMT (var));
-+ gphi *lcssa_phi = NULL, *use_phi;
-+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
-+ tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
-+ reduction_p re;
-+ gimple *stmt, *next_def, *single_use = NULL;
-+ use_operand_p use_p;
-+ imm_use_iterator iterator;
-+
-+ if (TREE_CODE (next) != SSA_NAME)
-+ return false;
-+
-+ next_def = SSA_NAME_DEF_STMT (next);
-+ basic_block bb = gimple_bb (next_def);
-+ if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
-+ return false;
-+
-+ /* In restricted reduction, the var is (and must be) used in defining
-+ the updated var. The process can be depicted as below:
-+
-+ var ;; = PHI
-+ |
-+ |
-+ v
-+ +---------------------+
-+ | reduction operators | <-- other operands
-+ +---------------------+
-+ |
-+ |
-+ v
-+ next
-+
-+ In terms loop interchange, we don't change how NEXT is computed based
-+ on VAR and OTHER OPERANDS. In case of double reduction in loop nest
-+ to be interchanged, we don't changed it at all. In the case of simple
-+ reduction in inner loop, we only make change how VAR/NEXT is loaded or
-+ stored. With these conditions, we can relax restrictions on reduction
-+ in a way that reduction operation is seen as black box. In general,
-+ we can ignore reassociation of reduction operator; we can handle fake
-+ reductions in which VAR is not even used to compute NEXT. */
-+ if (! single_imm_use (var, &use_p, &single_use)
-+ || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
-+ return false;
-+
-+ /* Check the reduction operation. We require a left-associative operation.
-+ For FP math we also need to be allowed to associate operations. */
-+ if (gassign *ass = dyn_cast (single_use))
-+ {
-+ enum tree_code code = gimple_assign_rhs_code (ass);
-+ if (! (associative_tree_code (code)
-+ || (code == MINUS_EXPR
-+ && use_p->use == gimple_assign_rhs1_ptr (ass)))
-+ || (FLOAT_TYPE_P (TREE_TYPE (var))
-+ && ! flag_associative_math))
-+ return false;
-+ }
-+ else
-+ return false;
-+
-+ /* Handle and verify a series of stmts feeding the reduction op. */
-+ if (single_use != next_def
-+ && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next,
-+ gimple_assign_rhs_code (single_use)))
-+ return false;
-+
-+ /* Only support cases in which INIT is used in inner loop. */
-+ if (TREE_CODE (init) == SSA_NAME)
-+ FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
-+ {
-+ stmt = USE_STMT (use_p);
-+ if (is_gimple_debug (stmt))
-+ continue;
-+
-+ if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
-+ return false;
-+ }
-+
-+ FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
-+ {
-+ stmt = USE_STMT (use_p);
-+ if (is_gimple_debug (stmt))
-+ continue;
-+
-+ /* Or else it's used in PHI itself. */
-+ use_phi = dyn_cast (stmt);
-+ if (use_phi == phi)
-+ continue;
-+
-+ if (use_phi != NULL
-+ && lcssa_phi == NULL
-+ && gimple_bb (stmt) == m_exit->dest
-+ && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
-+ lcssa_phi = use_phi;
-+ else
-+ return false;
-+ }
-+ if (!lcssa_phi)
-+ return false;
-+
-+ re = XCNEW (struct reduction);
-+ re->var = var;
-+ re->init = init;
-+ re->next = next;
-+ re->phi = phi;
-+ re->lcssa_phi = lcssa_phi;
-+
-+ classify_simple_reduction (re);
-+
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ dump_reduction (re);
-+
-+ m_reductions.safe_push (re);
-+ return true;
-+}
-+
-+/* Analyze reduction variable VAR for outer loop of the loop nest to be
-+ interchanged. ILOOP is not NULL and points to inner loop. For the
-+ moment, we only support double reduction for outer loop, like:
-+
-+ for (int i = 0; i < n; i++)
-+ {
-+ int sum = 0;
-+
-+ for (int j = 0; j < n; j++) // outer loop
-+ for (int k = 0; k < n; k++) // inner loop
-+ sum += a[i][k]*b[k][j];
-+
-+ s[i] = sum;
-+ }
-+
-+ Note the innermost two loops are the loop nest to be interchanged.
-+ Return true if analysis succeeds. */
-+
-+bool
-+loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
-+{
-+ gphi *phi = as_a (SSA_NAME_DEF_STMT (var));
-+ gphi *lcssa_phi = NULL, *use_phi;
-+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
-+ tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
-+ reduction_p re;
-+ gimple *stmt, *next_def;
-+ use_operand_p use_p;
-+ imm_use_iterator iterator;
-+
-+ if (TREE_CODE (next) != SSA_NAME)
-+ return false;
-+
-+ next_def = SSA_NAME_DEF_STMT (next);
-+ basic_block bb = gimple_bb (next_def);
-+ if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
-+ return false;
-+
-+ /* Find inner loop's simple reduction that uses var as initializer. */
-+ reduction_p inner_re = NULL;
-+ for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
-+ if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
-+ break;
-+
-+ if (inner_re == NULL
-+ || inner_re->type != UNKNOWN_RTYPE
-+ || inner_re->producer != phi)
-+ return false;
-+
-+ /* In case of double reduction, outer loop's reduction should be updated
-+ by inner loop's simple reduction. */
-+ if (next_def != inner_re->lcssa_phi)
-+ return false;
-+
-+ /* Outer loop's reduction should only be used to initialize inner loop's
-+ simple reduction. */
-+ if (! single_imm_use (var, &use_p, &stmt)
-+ || stmt != inner_re->phi)
-+ return false;
-+
-+ /* Check this reduction is correctly used outside of loop via lcssa phi. */
-+ FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
-+ {
-+ stmt = USE_STMT (use_p);
-+ if (is_gimple_debug (stmt))
-+ continue;
-+
-+ /* Or else it's used in PHI itself. */
-+ use_phi = dyn_cast (stmt);
-+ if (use_phi == phi)
-+ continue;
-+
-+ if (lcssa_phi == NULL
-+ && use_phi != NULL
-+ && gimple_bb (stmt) == m_exit->dest
-+ && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
-+ lcssa_phi = use_phi;
-+ else
-+ return false;
-+ }
-+ if (!lcssa_phi)
-+ return false;
-+
-+ re = XCNEW (struct reduction);
-+ re->var = var;
-+ re->init = init;
-+ re->next = next;
-+ re->phi = phi;
-+ re->lcssa_phi = lcssa_phi;
-+ re->type = DOUBLE_RTYPE;
-+ inner_re->type = DOUBLE_RTYPE;
-+
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ dump_reduction (re);
-+
-+ m_reductions.safe_push (re);
-+ return true;
-+}
-+
-+/* Return true if VAR is induction variable of current loop whose scev is
-+ specified by CHREC. */
-+
-+bool
-+loop_cand::analyze_induction_var (tree var, tree chrec)
-+{
-+ gphi *phi = as_a (SSA_NAME_DEF_STMT (var));
-+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
-+
-+ /* Var is loop invariant, though it's unlikely to happen. */
-+ if (tree_does_not_contain_chrecs (chrec))
-+ {
-+ struct induction *iv = XCNEW (struct induction);
-+ iv->var = var;
-+ iv->init_val = init;
-+ iv->init_expr = chrec;
-+ iv->step = build_int_cst (TREE_TYPE (chrec), 0);
-+ m_inductions.safe_push (iv);
-+ return true;
-+ }
-+
-+ if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
-+ || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
-+ || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
-+ || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
-+ return false;
-+
-+ struct induction *iv = XCNEW (struct induction);
-+ iv->var = var;
-+ iv->init_val = init;
-+ iv->init_expr = CHREC_LEFT (chrec);
-+ iv->step = CHREC_RIGHT (chrec);
-+
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ dump_induction (m_loop, iv);
-+
-+ m_inductions.safe_push (iv);
-+ return true;
-+}
-+
-+/* Return true if all loop carried variables defined in loop header can
-+ be successfully analyzed. */
-+
-+bool
-+loop_cand::analyze_carried_vars (loop_cand *iloop)
-+{
-+ edge e = loop_preheader_edge (m_outer);
-+ gphi_iterator gsi;
-+
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
-+
-+ for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
-+ {
-+ gphi *phi = gsi.phi ();
-+
-+ tree var = PHI_RESULT (phi);
-+ if (virtual_operand_p (var))
-+ continue;
-+
-+ tree chrec = analyze_scalar_evolution (m_loop, var);
-+ chrec = instantiate_scev (e, m_loop, chrec);
-+
-+ /* Analyze var as reduction variable. */
-+ if (chrec_contains_undetermined (chrec)
-+ || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
-+ {
-+ if (iloop && !analyze_oloop_reduction_var (iloop, var))
-+ return false;
-+ if (!iloop && !analyze_iloop_reduction_var (var))
-+ return false;
-+ }
-+ /* Analyze var as induction variable. */
-+ else if (!analyze_induction_var (var, chrec))
-+ return false;
-+ }
-+
-+ return true;
-+}
-+
-+/* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
-+
-+bool
-+loop_cand::analyze_lcssa_phis (void)
-+{
-+ gphi_iterator gsi;
-+ for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
-+ {
-+ gphi *phi = gsi.phi ();
-+
-+ if (virtual_operand_p (PHI_RESULT (phi)))
-+ continue;
-+
-+ /* TODO: We only support lcssa phi for reduction for now. */
-+ if (!find_reduction_by_stmt (phi))
-+ return false;
-+ }
-+
-+ return true;
-+}
-+
-+/* CONSUMER is a stmt in BB storing reduction result into memory object.
-+ When the reduction is intialized from constant value, we need to add
-+ a stmt loading from the memory object to target basic block in inner
-+ loop during undoing the reduction. Problem is that memory reference
-+ may use ssa variables not dominating the target basic block. This
-+ function finds all stmts on which CONSUMER depends in basic block BB,
-+ records and returns them via STMTS. */
-+
-+static void
-+find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
-+{
-+ auto_vec worklist;
-+ use_operand_p use_p;
-+ ssa_op_iter iter;
-+ gimple *stmt, *def_stmt;
-+ gimple_stmt_iterator gsi;
-+
-+ /* First clear flag for stmts in bb. */
-+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-+ gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
-+
-+ /* DFS search all depended stmts in bb and mark flag for these stmts. */
-+ worklist.safe_push (consumer);
-+ while (!worklist.is_empty ())
-+ {
-+ stmt = worklist.pop ();
-+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
-+ {
-+ def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
-+
-+ if (is_a (def_stmt)
-+ || gimple_bb (def_stmt) != bb
-+ || gimple_plf (def_stmt, GF_PLF_1))
-+ continue;
-+
-+ worklist.safe_push (def_stmt);
-+ }
-+ gimple_set_plf (stmt, GF_PLF_1, true);
-+ }
-+ for (gsi = gsi_start_bb_nondebug (bb);
-+ !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
-+ {
-+ /* Move dep stmts to sequence STMTS. */
-+ if (gimple_plf (stmt, GF_PLF_1))
-+ {
-+ gsi_remove (&gsi, false);
-+ gimple_seq_add_stmt_without_update (stmts, stmt);
-+ }
-+ else
-+ gsi_next_nondebug (&gsi);
-+ }
-+}
-+
-+/* User can write, optimizers can generate simple reduction RE for inner
-+ loop. In order to make interchange valid, we have to undo reduction by
-+ moving producer and consumer stmts into the inner loop. For example,
-+ below code:
-+
-+ init = MEM_REF[idx]; //producer
-+ loop:
-+ var = phi
-+ next = var op ...
-+ reduc_sum = phi
-+ MEM_REF[idx] = reduc_sum //consumer
-+
-+ is transformed into:
-+
-+ loop:
-+ new_var = MEM_REF[idx]; //producer after moving
-+ next = new_var op ...
-+ MEM_REF[idx] = next; //consumer after moving
-+
-+ Note if the reduction variable is initialized to constant, like:
-+
-+ var = phi<0.0, next>
-+
-+ we compute new_var as below:
-+
-+ loop:
-+ tmp = MEM_REF[idx];
-+ new_var = !first_iteration ? tmp : 0.0;
-+
-+ so that the initial const is used in the first iteration of loop. Also
-+ record ssa variables for dead code elimination in DCE_SEEDS. */
-+
-+void
-+loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
-+{
-+ gimple *stmt;
-+ gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
-+ gimple_seq stmts = NULL;
-+ tree new_var;
-+
-+ /* Prepare the initialization stmts and insert it to inner loop. */
-+ if (re->producer != NULL)
-+ {
-+ gimple_set_vuse (re->producer, NULL_TREE);
-+ from = gsi_for_stmt (re->producer);
-+ gsi_remove (&from, false);
-+ gimple_seq_add_stmt_without_update (&stmts, re->producer);
-+ new_var = re->init;
-+ }
-+ else
-+ {
-+ /* Find all stmts on which expression "MEM_REF[idx]" depends. */
-+ find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
-+ /* Because we generate new stmt loading from the MEM_REF to TMP. */
-+ tree cond, tmp = copy_ssa_name (re->var);
-+ stmt = gimple_build_assign (tmp, re->init_ref);
-+ gimple_seq_add_stmt_without_update (&stmts, stmt);
-+
-+ /* Init new_var to MEM_REF or CONST depending on if it is the first
-+ iteration. */
-+ induction_p iv = m_inductions[0];
-+ cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
-+ new_var = copy_ssa_name (re->var);
-+ stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
-+ gimple_seq_add_stmt_without_update (&stmts, stmt);
-+ }
-+ gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
-+
-+ /* Replace all uses of reduction var with new variable. */
-+ use_operand_p use_p;
-+ imm_use_iterator iterator;
-+ FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
-+ {
-+ FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
-+ SET_USE (use_p, new_var);
-+
-+ update_stmt (stmt);
-+ }
-+
-+ /* Move consumer stmt into inner loop, just after reduction next's def. */
-+ unlink_stmt_vdef (re->consumer);
-+ release_ssa_name (gimple_vdef (re->consumer));
-+ gimple_set_vdef (re->consumer, NULL_TREE);
-+ gimple_set_vuse (re->consumer, NULL_TREE);
-+ gimple_assign_set_rhs1 (re->consumer, re->next);
-+ from = gsi_for_stmt (re->consumer);
-+ to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
-+ gsi_move_after (&from, &to);
-+
-+ /* Mark the reduction variables for DCE. */
-+ bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
-+ bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
-+}
-+
-+/* Free DATAREFS and its auxiliary memory. */
-+
-+static void
-+free_data_refs_with_aux (vec datarefs)
-+{
-+ data_reference_p dr;
-+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
-+ if (dr->aux != NULL)
-+ {
-+ DR_ACCESS_STRIDE (dr)->release ();
-+ free (dr->aux);
-+ }
-+
-+ free_data_refs (datarefs);
-+}
-+
-+/* Class for loop interchange transformation. */
-+
-+class tree_loop_interchange
-+{
-+public:
-+ tree_loop_interchange (vec loop_nest)
-+ : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
-+ m_dce_seeds (BITMAP_ALLOC (NULL)) { }
-+ ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
-+ bool interchange (vec, vec);
-+
-+private:
-+ void update_data_info (unsigned, unsigned, vec, vec);
-+ bool valid_data_dependences (unsigned, unsigned, vec);
-+ void interchange_loops (loop_cand &, loop_cand &);
-+ void map_inductions_to_loop (loop_cand &, loop_cand &);
-+ void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
-+
-+ /* The whole loop nest in which interchange is ongoing. */
-+ vec m_loop_nest;
-+ /* We create new IV which is only used in loop's exit condition check.
-+ In case of 3-level loop nest interchange, when we interchange the
-+ innermost two loops, new IV created in the middle level loop does
-+ not need to be preserved in interchanging the outermost two loops
-+ later. We record the IV so that it can be skipped. */
-+ tree m_niters_iv_var;
-+ /* Bitmap of seed variables for dead code elimination after interchange. */
-+ bitmap m_dce_seeds;
-+};
-+
-+/* Update data refs' access stride and dependence information after loop
-+ interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
-+ nest. DATAREFS are data references. DDRS are data dependences. */
-+
-+void
-+tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
-+ vec datarefs,
-+ vec ddrs)
-+{
-+ struct data_reference *dr;
-+ struct data_dependence_relation *ddr;
-+
-+ /* Update strides of data references. */
-+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ vec *stride = DR_ACCESS_STRIDE (dr);
-+ gcc_assert (stride->length () > i_idx);
-+ std::swap ((*stride)[i_idx], (*stride)[o_idx]);
-+ }
-+ /* Update data dependences. */
-+ for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
-+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
-+ {
-+ for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
-+ {
-+ lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
-+ std::swap (dist_vect[i_idx], dist_vect[o_idx]);
-+ }
-+ }
-+}
-+
-+/* Check data dependence relations, return TRUE if it's valid to interchange
-+ two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
-+ loops is valid only if dist vector, after interchanging, doesn't have '>'
-+ as the leftmost non-'=' direction. Practically, this function have been
-+ conservative here by not checking some valid cases. */
-+
-+bool
-+tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
-+ vec ddrs)
-+{
-+ struct data_dependence_relation *ddr;
-+
-+ for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
-+ {
-+ /* Skip no-dependence case. */
-+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
-+ continue;
-+
-+ for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
-+ {
-+ lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
-+ unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
-+
-+ /* If there is no carried dependence. */
-+ if (level == 0)
-+ continue;
-+
-+ level --;
-+
-+ /* If dependence is not carried by any loop in between the two
-+ loops [oloop, iloop] to interchange. */
-+ if (level < o_idx || level > i_idx)
-+ continue;
-+
-+ /* Be conservative, skip case if either direction at i_idx/o_idx
-+ levels is not '=' or '<'. */
-+ if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
-+ return false;
-+ }
-+ }
-+
-+ return true;
-+}
-+
-+/* Interchange two loops specified by ILOOP and OLOOP. */
-+
-+void
-+tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
-+{
-+ reduction_p re;
-+ gimple_stmt_iterator gsi;
-+ tree i_niters, o_niters, var_after;
-+
-+ /* Undo inner loop's simple reduction. */
-+ for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
-+ if (re->type != DOUBLE_RTYPE)
-+ {
-+ if (re->producer)
-+ reset_debug_uses (re->producer);
-+
-+ iloop.undo_simple_reduction (re, m_dce_seeds);
-+ }
-+
-+ /* Only need to reset debug uses for double reduction. */
-+ for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
-+ {
-+ gcc_assert (re->type == DOUBLE_RTYPE);
-+ reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
-+ reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
-+ }
-+
-+ /* Prepare niters for both loops. */
-+ struct loop *loop_nest = m_loop_nest[0];
-+ edge instantiate_below = loop_preheader_edge (loop_nest);
-+ gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
-+ i_niters = number_of_latch_executions (iloop.m_loop);
-+ i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
-+ i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
-+ i_niters);
-+ i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
-+ NULL_TREE, false, GSI_CONTINUE_LINKING);
-+ o_niters = number_of_latch_executions (oloop.m_loop);
-+ if (oloop.m_loop != loop_nest)
-+ {
-+ o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
-+ o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
-+ o_niters);
-+ }
-+ o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
-+ NULL_TREE, false, GSI_CONTINUE_LINKING);
-+
-+ /* Move src's code to tgt loop. This is necessary when src is the outer
-+ loop and tgt is the inner loop. */
-+ move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
-+
-+ /* Map outer loop's IV to inner loop, and vice versa. */
-+ map_inductions_to_loop (oloop, iloop);
-+ map_inductions_to_loop (iloop, oloop);
-+
-+ /* Create canonical IV for both loops. Note canonical IV for outer/inner
-+ loop is actually from inner/outer loop. Also we record the new IV
-+ created for the outer loop so that it can be skipped in later loop
-+ interchange. */
-+ create_canonical_iv (oloop.m_loop, oloop.m_exit,
-+ i_niters, &m_niters_iv_var, &var_after);
-+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
-+ create_canonical_iv (iloop.m_loop, iloop.m_exit,
-+ o_niters, NULL, &var_after);
-+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
-+
-+ /* Scrap niters estimation of interchanged loops. */
-+ iloop.m_loop->any_upper_bound = false;
-+ iloop.m_loop->any_likely_upper_bound = false;
-+ free_numbers_of_iterations_estimates_loop (iloop.m_loop);
-+ oloop.m_loop->any_upper_bound = false;
-+ oloop.m_loop->any_likely_upper_bound = false;
-+ free_numbers_of_iterations_estimates_loop (oloop.m_loop);
-+
-+ /* ??? The association between the loop data structure and the
-+ CFG changed, so what was loop N at the source level is now
-+ loop M. We should think of retaining the association or breaking
-+ it fully by creating a new loop instead of re-using the "wrong" one. */
-+}
-+
-+/* Map induction variables of SRC loop to TGT loop. The function firstly
-+ creates the same IV of SRC loop in TGT loop, then deletes the original
-+ IV and re-initialize it using the newly created IV. For example, loop
-+ nest:
-+
-+ for (i = 0; i < N; i++)
-+ for (j = 0; j < M; j++)
-+ {
-+ //use of i;
-+ //use of j;
-+ }
-+
-+ will be transformed into:
-+
-+ for (jj = 0; jj < M; jj++)
-+ for (ii = 0; ii < N; ii++)
-+ {
-+ //use of ii;
-+ //use of jj;
-+ }
-+
-+ after loop interchange. */
-+
-+void
-+tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
-+{
-+ induction_p iv;
-+ edge e = tgt.m_exit;
-+ gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
-+
-+ /* Map source loop's IV to target loop. */
-+ for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
-+ {
-+ gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
-+ gcc_assert (is_a (stmt));
-+
-+ use_operand_p use_p;
-+ /* Only map original IV to target loop. */
-+ if (m_niters_iv_var != iv->var)
-+ {
-+ /* Map the IV by creating the same one in target loop. */
-+ tree var_before, var_after;
-+ tree base = unshare_expr (iv->init_expr);
-+ tree step = unshare_expr (iv->step);
-+ create_iv (base, step, SSA_NAME_VAR (iv->var),
-+ tgt.m_loop, &incr_pos, false, &var_before, &var_after);
-+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
-+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
-+
-+ /* Replace uses of the original IV var with newly created IV var. */
-+ imm_use_iterator imm_iter;
-+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
-+ {
-+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
-+ SET_USE (use_p, var_before);
-+
-+ update_stmt (use_stmt);
-+ }
-+ }
-+
-+ /* Mark all uses for DCE. */
-+ ssa_op_iter op_iter;
-+ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
-+ {
-+ tree use = USE_FROM_PTR (use_p);
-+ if (TREE_CODE (use) == SSA_NAME
-+ && ! SSA_NAME_IS_DEFAULT_DEF (use))
-+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
-+ }
-+
-+ /* Delete definition of the original IV in the source loop. */
-+ gsi = gsi_for_stmt (stmt);
-+ remove_phi_node (&gsi, true);
-+ }
-+}
-+
-+/* Move stmts of outer loop to inner loop. */
-+
-+void
-+tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
-+ struct loop *inner,
-+ basic_block *outer_bbs)
-+{
-+ basic_block oloop_exit_bb = single_exit (outer)->src;
-+ gimple_stmt_iterator gsi, to;
-+
-+ for (unsigned i = 0; i < outer->num_nodes; i++)
-+ {
-+ basic_block bb = outer_bbs[i];
-+
-+ /* Skip basic blocks of inner loop. */
-+ if (flow_bb_inside_loop_p (inner, bb))
-+ continue;
-+
-+ /* Move code from header/latch to header/latch. */
-+ if (bb == outer->header)
-+ to = gsi_after_labels (inner->header);
-+ else if (bb == outer->latch)
-+ to = gsi_after_labels (inner->latch);
-+ else
-+ /* Otherwise, simply move to exit->src. */
-+ to = gsi_last_bb (single_exit (inner)->src);
-+
-+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
-+ {
-+ gimple *stmt = gsi_stmt (gsi);
-+
-+ if (oloop_exit_bb == bb
-+ && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
-+ {
-+ gsi_next (&gsi);
-+ continue;
-+ }
-+
-+ if (gimple_vuse (stmt))
-+ gimple_set_vuse (stmt, NULL_TREE);
-+ if (gimple_vdef (stmt))
-+ {
-+ unlink_stmt_vdef (stmt);
-+ release_ssa_name (gimple_vdef (stmt));
-+ gimple_set_vdef (stmt, NULL_TREE);
-+ }
-+
-+ reset_debug_uses (stmt);
-+ gsi_move_before (&gsi, &to);
-+ }
-+ }
-+}
-+
-+/* Given data reference DR in LOOP_NEST, the function computes DR's access
-+ stride at each level of loop from innermost LOOP to outer. On success,
-+ it saves access stride at each level loop in a vector which is pointed
-+ by DR->aux. For example:
-+
-+ int arr[100][100][100];
-+ for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
-+ for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
-+ for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
-+ arr[i][j - 1][k] = 0; */
-+
-+static void
-+compute_access_stride (struct loop *loop_nest, struct loop *loop,
-+ data_reference_p dr)
-+{
-+ vec *strides = new vec ();
-+ basic_block bb = gimple_bb (DR_STMT (dr));
-+
-+ while (!flow_bb_inside_loop_p (loop, bb))
-+ {
-+ strides->safe_push (build_int_cst (sizetype, 0));
-+ loop = loop_outer (loop);
-+ }
-+ gcc_assert (loop == bb->loop_father);
-+
-+ tree ref = DR_REF (dr);
-+ tree scev_base = build_fold_addr_expr (ref);
-+ tree scev = analyze_scalar_evolution (loop, scev_base);
-+ scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
-+ if (! chrec_contains_undetermined (scev))
-+ {
-+ tree sl = scev;
-+ struct loop *expected = loop;
-+ while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
-+ {
-+ struct loop *sl_loop = get_chrec_loop (sl);
-+ while (sl_loop != expected)
-+ {
-+ strides->safe_push (size_int (0));
-+ expected = loop_outer (expected);
-+ }
-+ strides->safe_push (CHREC_RIGHT (sl));
-+ sl = CHREC_LEFT (sl);
-+ expected = loop_outer (expected);
-+ }
-+ if (! tree_contains_chrecs (sl, NULL))
-+ while (expected != loop_outer (loop_nest))
-+ {
-+ strides->safe_push (size_int (0));
-+ expected = loop_outer (expected);
-+ }
-+ }
-+
-+ dr->aux = strides;
-+}
-+
-+/* Given loop nest LOOP_NEST with innermost LOOP, the function computes
-+ access strides with respect to each level loop for all data refs in
-+ DATAREFS from inner loop to outer loop. On success, it returns the
-+ outermost loop that access strides can be computed successfully for
-+ all data references. If access strides cannot be computed at least
-+ for two levels of loop for any data reference, it returns NULL. */
-+
-+static struct loop *
-+compute_access_strides (struct loop *loop_nest, struct loop *loop,
-+ vec datarefs)
-+{
-+ unsigned i, j, num_loops = (unsigned) -1;
-+ data_reference_p dr;
-+ vec *stride;
-+
-+ for (i = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ compute_access_stride (loop_nest, loop, dr);
-+ stride = DR_ACCESS_STRIDE (dr);
-+ if (stride->length () < num_loops)
-+ {
-+ num_loops = stride->length ();
-+ if (num_loops < 2)
-+ return NULL;
-+ }
-+ }
-+
-+ for (i = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ stride = DR_ACCESS_STRIDE (dr);
-+ if (stride->length () > num_loops)
-+ stride->truncate (num_loops);
-+
-+ for (j = 0; j < (num_loops >> 1); ++j)
-+ std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
-+ }
-+
-+ loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
-+ gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
-+ return loop;
-+}
-+
-+/* Prune access strides for data references in DATAREFS by removing strides
-+ of loops that isn't in current LOOP_NEST. */
-+
-+static void
-+prune_access_strides_not_in_loop (struct loop *loop_nest,
-+ struct loop *innermost,
-+ vec datarefs)
-+{
-+ data_reference_p dr;
-+ unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
-+ gcc_assert (num_loops > 1);
-+
-+ /* Block remove strides of loops that is not in current loop nest. */
-+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ vec *stride = DR_ACCESS_STRIDE (dr);
-+ if (stride->length () > num_loops)
-+ stride->block_remove (0, stride->length () - num_loops);
-+ }
-+}
-+
-+/* Dump access strides for all DATAREFS. */
-+
-+static void
-+dump_access_strides (vec datarefs)
-+{
-+ data_reference_p dr;
-+ fprintf (dump_file, "Access Strides for DRs:\n");
-+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ fprintf (dump_file, " ");
-+ print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
-+ fprintf (dump_file, ":\t\t<");
-+
-+ vec *stride = DR_ACCESS_STRIDE (dr);
-+ unsigned num_loops = stride->length ();
-+ for (unsigned j = 0; j < num_loops; ++j)
-+ {
-+ print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
-+ fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
-+ }
-+ }
-+}
-+
-+/* Return true if it's profitable to interchange two loops whose index
-+ in whole loop nest vector are I_IDX/O_IDX respectively. The function
-+ computes and compares three types information from all DATAREFS:
-+ 1) Access stride for loop I_IDX and O_IDX.
-+ 2) Number of invariant memory references with respect to I_IDX before
-+ and after loop interchange.
-+ 3) Flags indicating if all memory references access sequential memory
-+ in ILOOP, before and after loop interchange.
-+ If INNMOST_LOOP_P is true, the two loops for interchanging are the two
-+ innermost loops in loop nest. This function also dumps information if
-+ DUMP_INFO_P is true. */
-+
-+static bool
-+should_interchange_loops (unsigned i_idx, unsigned o_idx,
-+ vec datarefs,
-+ bool innermost_loops_p, bool dump_info_p = true)
-+{
-+ unsigned HOST_WIDE_INT ratio;
-+ unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
-+ struct data_reference *dr;
-+ bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
-+ widest_int iloop_strides = 0, oloop_strides = 0;
-+ unsigned num_unresolved_drs = 0;
-+ unsigned num_resolved_ok_drs = 0;
-+ unsigned num_resolved_not_ok_drs = 0;
-+
-+ if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
-+
-+ for (i = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ vec *stride = DR_ACCESS_STRIDE (dr);
-+ tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
-+
-+ bool subloop_stride_p = false;
-+ /* Data ref can't be invariant or sequential access at current loop if
-+ its address changes with respect to any subloops. */
-+ for (j = i_idx + 1; j < stride->length (); ++j)
-+ if (!integer_zerop ((*stride)[j]))
-+ {
-+ subloop_stride_p = true;
-+ break;
-+ }
-+
-+ if (integer_zerop (iloop_stride))
-+ {
-+ if (!subloop_stride_p)
-+ num_old_inv_drs++;
-+ }
-+ if (integer_zerop (oloop_stride))
-+ {
-+ if (!subloop_stride_p)
-+ num_new_inv_drs++;
-+ }
-+
-+ if (TREE_CODE (iloop_stride) == INTEGER_CST
-+ && TREE_CODE (oloop_stride) == INTEGER_CST)
-+ {
-+ iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
-+ oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
-+ }
-+ else if (multiple_of_p (TREE_TYPE (iloop_stride),
-+ iloop_stride, oloop_stride))
-+ num_resolved_ok_drs++;
-+ else if (multiple_of_p (TREE_TYPE (iloop_stride),
-+ oloop_stride, iloop_stride))
-+ num_resolved_not_ok_drs++;
-+ else
-+ num_unresolved_drs++;
-+
-+ /* Data ref can't be sequential access if its address changes in sub
-+ loop. */
-+ if (subloop_stride_p)
-+ {
-+ all_seq_dr_before_p = false;
-+ all_seq_dr_after_p = false;
-+ continue;
-+ }
-+ /* Track if all data references are sequential accesses before/after loop
-+ interchange. Note invariant is considered sequential here. */
-+ tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
-+ if (all_seq_dr_before_p
-+ && ! (integer_zerop (iloop_stride)
-+ || operand_equal_p (access_size, iloop_stride, 0)))
-+ all_seq_dr_before_p = false;
-+ if (all_seq_dr_after_p
-+ && ! (integer_zerop (oloop_stride)
-+ || operand_equal_p (access_size, oloop_stride, 0)))
-+ all_seq_dr_after_p = false;
-+ }
-+
-+ if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
-+ {
-+ fprintf (dump_file, "\toverall:\t\t");
-+ print_decu (iloop_strides, dump_file);
-+ fprintf (dump_file, "\t");
-+ print_decu (oloop_strides, dump_file);
-+ fprintf (dump_file, "\n");
-+
-+ fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
-+ num_old_inv_drs, num_new_inv_drs);
-+ fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
-+ all_seq_dr_before_p ? "true" : "false",
-+ all_seq_dr_after_p ? "true" : "false");
-+ fprintf (dump_file, "OK to interchage with variable strides: %d\n",
-+ num_resolved_ok_drs);
-+ fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
-+ num_resolved_not_ok_drs);
-+ fprintf (dump_file, "Variable strides we cannot decide: %d\n",
-+ num_unresolved_drs);
-+ }
-+
-+ if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
-+ return false;
-+
-+ /* We use different stride comparison ratio for interchanging innermost
-+ two loops or not. The idea is to be conservative in interchange for
-+ the innermost loops. */
-+ ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
-+ /* Do interchange if it gives better data locality behavior. */
-+ if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
-+ return true;
-+ if (wi::gtu_p (iloop_strides, oloop_strides))
-+ {
-+ /* Or it creates more invariant memory references. */
-+ if ((!all_seq_dr_before_p || all_seq_dr_after_p)
-+ && num_new_inv_drs > num_old_inv_drs)
-+ return true;
-+ /* Or it makes all memory references sequential. */
-+ if (num_new_inv_drs >= num_old_inv_drs
-+ && !all_seq_dr_before_p && all_seq_dr_after_p)
-+ return true;
-+ }
-+
-+ return false;
-+}
-+
-+/* Try to interchange inner loop of a loop nest to outer level. */
-+
-+bool
-+tree_loop_interchange::interchange (vec datarefs,
-+ vec ddrs)
-+{
-+ bool changed_p = false;
-+ /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
-+ The overall effect is to push inner loop to outermost level in whole
-+ loop nest. */
-+ for (unsigned i = m_loop_nest.length (); i > 1; --i)
-+ {
-+ unsigned i_idx = i - 1, o_idx = i - 2;
-+
-+ /* Check validity for loop interchange. */
-+ if (!valid_data_dependences (i_idx, o_idx, ddrs))
-+ break;
-+
-+ loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
-+ loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
-+
-+ /* Check if we can do transformation for loop interchange. */
-+ if (!iloop.analyze_carried_vars (NULL)
-+ || !iloop.analyze_lcssa_phis ()
-+ || !oloop.analyze_carried_vars (&iloop)
-+ || !oloop.analyze_lcssa_phis ()
-+ || !iloop.can_interchange_p (NULL)
-+ || !oloop.can_interchange_p (&iloop))
-+ break;
-+
-+ /* Check profitability for loop interchange. */
-+ if (should_interchange_loops (i_idx, o_idx, datarefs,
-+ iloop.m_loop->inner == NULL))
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file,
-+ "Loop_pair is interchanged\n\n",
-+ oloop.m_loop->num, iloop.m_loop->num);
-+
-+ changed_p = true;
-+ interchange_loops (iloop, oloop);
-+ /* No need to update if there is no further loop interchange. */
-+ if (o_idx > 0)
-+ update_data_info (i_idx, o_idx, datarefs, ddrs);
-+ }
-+ else
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file,
-+ "Loop_pair is not interchanged\n\n",
-+ oloop.m_loop->num, iloop.m_loop->num);
-+ }
-+ }
-+
-+ simple_dce_from_worklist (m_dce_seeds);
-+ return changed_p;
-+}
-+
-+
-+/* Loop interchange pass. */
-+
-+namespace {
-+
-+const pass_data pass_data_linterchange =
-+{
-+ GIMPLE_PASS, /* type */
-+ "linterchange", /* name */
-+ OPTGROUP_LOOP, /* optinfo_flags */
-+ TV_LINTERCHANGE, /* tv_id */
-+ PROP_cfg, /* properties_required */
-+ 0, /* properties_provided */
-+ 0, /* properties_destroyed */
-+ 0, /* todo_flags_start */
-+ 0, /* todo_flags_finish */
-+};
-+
-+class pass_linterchange : public gimple_opt_pass
-+{
-+public:
-+ pass_linterchange (gcc::context *ctxt)
-+ : gimple_opt_pass (pass_data_linterchange, ctxt)
-+ {}
-+
-+ /* opt_pass methods: */
-+ opt_pass * clone () { return new pass_linterchange (m_ctxt); }
-+ virtual bool gate (function *) { return flag_loop_interchange; }
-+ virtual unsigned int execute (function *);
-+
-+}; // class pass_linterchange
-+
-+
-+/* Return true if LOOP has proper form for interchange. We check three
-+ conditions in the function:
-+ 1) In general, a loop can be interchanged only if it doesn't have
-+ basic blocks other than header, exit and latch besides possible
-+ inner loop nest. This basically restricts loop interchange to
-+ below form loop nests:
-+
-+ header<---+
-+ | |
-+ v |
-+ INNER_LOOP |
-+ | |
-+ v |
-+ exit--->latch
-+
-+ 2) Data reference in basic block that executes in different times
-+ than loop head/exit is not allowed.
-+ 3) Record the innermost outer loop that doesn't form rectangle loop
-+ nest with LOOP. */
-+
-+static bool
-+proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
-+{
-+ edge e0, e1, exit;
-+
-+ /* Don't interchange if loop has unsupported information for the moment. */
-+ if (loop->safelen > 0
-+ || loop->constraints != 0
-+ || loop->can_be_parallel
-+ || loop->dont_vectorize
-+ || loop->force_vectorize
-+ || loop->in_oacc_kernels_region
-+ || loop->orig_loop_num != 0
-+ || loop->simduid != NULL_TREE)
-+ return false;
-+
-+ /* Don't interchange if outer loop has basic block other than header, exit
-+ and latch. */
-+ if (loop->inner != NULL
-+ && loop->num_nodes != loop->inner->num_nodes + 3)
-+ return false;
-+
-+ if ((exit = single_dom_exit (loop)) == NULL)
-+ return false;
-+
-+ /* Check control flow on loop header/exit blocks. */
-+ if (loop->header == exit->src
-+ && (EDGE_COUNT (loop->header->preds) != 2
-+ || EDGE_COUNT (loop->header->succs) != 2))
-+ return false;
-+ else if (loop->header != exit->src
-+ && (EDGE_COUNT (loop->header->preds) != 2
-+ || !single_succ_p (loop->header)
-+ || unsupported_edge (single_succ_edge (loop->header))
-+ || EDGE_COUNT (exit->src->succs) != 2
-+ || !single_pred_p (exit->src)
-+ || unsupported_edge (single_pred_edge (exit->src))))
-+ return false;
-+
-+ e0 = EDGE_PRED (loop->header, 0);
-+ e1 = EDGE_PRED (loop->header, 1);
-+ if (unsupported_edge (e0) || unsupported_edge (e1)
-+ || (e0->src != loop->latch && e1->src != loop->latch)
-+ || (e0->src->loop_father == loop && e1->src->loop_father == loop))
-+ return false;
-+
-+ e0 = EDGE_SUCC (exit->src, 0);
-+ e1 = EDGE_SUCC (exit->src, 1);
-+ if (unsupported_edge (e0) || unsupported_edge (e1)
-+ || (e0->dest != loop->latch && e1->dest != loop->latch)
-+ || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
-+ return false;
-+
-+ /* Don't interchange if any reference is in basic block that doesn't
-+ dominate exit block. */
-+ basic_block *bbs = get_loop_body (loop);
-+ for (unsigned i = 0; i < loop->num_nodes; i++)
-+ {
-+ basic_block bb = bbs[i];
-+
-+ if (bb->loop_father != loop
-+ || bb == loop->header || bb == exit->src
-+ || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
-+ continue;
-+
-+ for (gimple_stmt_iterator gsi = gsi_start_bb_nondebug (bb);
-+ !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
-+ if (gimple_vuse (gsi_stmt (gsi)))
-+ {
-+ free (bbs);
-+ return false;
-+ }
-+ }
-+ free (bbs);
-+
-+ tree niters = number_of_latch_executions (loop);
-+ niters = analyze_scalar_evolution (loop_outer (loop), niters);
-+ if (!niters || chrec_contains_undetermined (niters))
-+ return false;
-+
-+ /* Record the innermost outer loop that doesn't form rectangle loop nest. */
-+ for (loop_p loop2 = loop_outer (loop);
-+ loop2 && flow_loop_nested_p (*min_outer, loop2);
-+ loop2 = loop_outer (loop2))
-+ {
-+ niters = instantiate_scev (loop_preheader_edge (loop2),
-+ loop_outer (loop), niters);
-+ if (!evolution_function_is_invariant_p (niters, loop2->num))
-+ {
-+ *min_outer = loop2;
-+ break;
-+ }
-+ }
-+ return true;
-+}
-+
-+/* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
-+ should be interchanged by looking into all DATAREFS. */
-+
-+static bool
-+should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
-+ vec datarefs)
-+{
-+ unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
-+ gcc_assert (idx > 0);
-+
-+ /* Check if any two adjacent loops should be interchanged. */
-+ for (struct loop *loop = innermost;
-+ loop != loop_nest; loop = loop_outer (loop), idx--)
-+ if (should_interchange_loops (idx, idx - 1, datarefs,
-+ loop == innermost, false))
-+ return true;
-+
-+ return false;
-+}
-+
-+/* Given loop nest LOOP_NEST and data references DATAREFS, compute data
-+ dependences for loop interchange and store it in DDRS. Note we compute
-+ dependences directly rather than call generic interface so that we can
-+ return on unknown dependence instantly. */
-+
-+static bool
-+tree_loop_interchange_compute_ddrs (vec loop_nest,
-+ vec datarefs,
-+ vec *ddrs)
-+{
-+ struct data_reference *a, *b;
-+ struct loop *innermost = loop_nest.last ();
-+
-+ for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
-+ {
-+ bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
-+ for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
-+ if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
-+ {
-+ bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
-+ /* Don't support multiple write references in outer loop. */
-+ if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
-+ return false;
-+
-+ ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
-+ ddrs->safe_push (ddr);
-+ compute_affine_dependence (ddr, loop_nest[0]);
-+
-+ /* Give up if ddr is unknown dependence or classic direct vector
-+ is not available. */
-+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
-+ || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
-+ && DDR_NUM_DIR_VECTS (ddr) == 0))
-+ return false;
-+
-+ /* If either data references is in outer loop of nest, we require
-+ no dependence here because the data reference need to be moved
-+ into inner loop during interchange. */
-+ if (a_outer_p && b_outer_p
-+ && operand_equal_p (DR_REF (a), DR_REF (b), 0))
-+ continue;
-+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known
-+ && (a_outer_p || b_outer_p))
-+ return false;
-+ }
-+ }
-+
-+ return true;
-+}
-+
-+/* Prune DATAREFS by removing any data reference not inside of LOOP. */
-+
-+static inline void
-+prune_datarefs_not_in_loop (struct loop *loop, vec datarefs)
-+{
-+ unsigned i, j;
-+ struct data_reference *dr;
-+
-+ for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
-+ {
-+ if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
-+ datarefs[j++] = dr;
-+ else
-+ {
-+ if (dr->aux)
-+ {
-+ DR_ACCESS_STRIDE (dr)->release ();
-+ free (dr->aux);
-+ }
-+ free_data_ref (dr);
-+ }
-+ }
-+ datarefs.truncate (j);
-+}
-+
-+/* Find and store data references in DATAREFS for LOOP nest. If there's
-+ difficult data reference in a basic block, we shrink the loop nest to
-+ inner loop of that basic block's father loop. On success, return the
-+ outer loop of the result loop nest. */
-+
-+static struct loop *
-+prepare_data_references (struct loop *loop, vec *datarefs)
-+{
-+ struct loop *loop_nest = loop;
-+ vec *bb_refs;
-+ basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
-+
-+ for (unsigned i = 0; i < loop->num_nodes; i++)
-+ bbs[i]->aux = NULL;
-+
-+ /* Find data references for all basic blocks. Shrink loop nest on difficult
-+ data reference. */
-+ for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
-+ {
-+ bb = bbs[i];
-+ if (!flow_bb_inside_loop_p (loop_nest, bb))
-+ continue;
-+
-+ bb_refs = new vec ();
-+ if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
-+ {
-+ loop_nest = bb->loop_father->inner;
-+ if (loop_nest && !loop_nest->inner)
-+ loop_nest = NULL;
-+
-+ free_data_refs (*bb_refs);
-+ delete bb_refs;
-+ }
-+ else if (bb_refs->is_empty ())
-+ delete bb_refs;
-+ else
-+ bb->aux = bb_refs;
-+ }
-+
-+ /* Collect all data references in loop nest. */
-+ for (unsigned i = 0; i < loop->num_nodes; i++)
-+ {
-+ bb = bbs[i];
-+ if (!bb->aux)
-+ continue;
-+
-+ bb_refs = (vec *) bb->aux;
-+ if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
-+ datarefs->safe_splice (*bb_refs);
-+ else
-+ free_data_refs (*bb_refs);
-+
-+ delete bb_refs;
-+ bb->aux = NULL;
-+ }
-+ free (bbs);
-+
-+ return loop_nest;
-+}
-+
-+/* Given innermost LOOP, return true if perfect loop nest can be found and
-+ data dependences can be computed. If succeed, record the perfect loop
-+ nest in LOOP_NEST; record all data references in DATAREFS and record all
-+ data dependence relations in DDRS.
-+
-+ We do support a restricted form of imperfect loop nest, i.e, loop nest
-+ with load/store in outer loop initializing/finalizing simple reduction
-+ of the innermost loop. For such outer loop reference, we require that
-+ it has no dependence with others sinve it will be moved to inner loop
-+ in interchange. */
-+
-+static bool
-+prepare_perfect_loop_nest (struct loop *loop, vec *loop_nest,
-+ vec *datarefs, vec *ddrs)
-+{
-+ struct loop *start_loop = NULL, *innermost = loop;
-+ struct loop *outermost = loops_for_fn (cfun)->tree_root;
-+
-+ /* Find loop nest from the innermost loop. The outermost is the innermost
-+ outer*/
-+ while (loop->num != 0 && loop->inner == start_loop
-+ && flow_loop_nested_p (outermost, loop))
-+ {
-+ if (!proper_loop_form_for_interchange (loop, &outermost))
-+ break;
-+
-+ start_loop = loop;
-+ /* If this loop has sibling loop, the father loop won't be in perfect
-+ loop nest. */
-+ if (loop->next != NULL)
-+ break;
-+
-+ loop = loop_outer (loop);
-+ }
-+ if (!start_loop || !start_loop->inner)
-+ return false;
-+
-+ /* Prepare the data reference vector for the loop nest, pruning outer
-+ loops we cannot handle. */
-+ start_loop = prepare_data_references (start_loop, datarefs);
-+ if (!start_loop
-+ /* Check if there is no data reference. */
-+ || datarefs->is_empty ()
-+ /* Check if there are too many of data references. */
-+ || (int) datarefs->length () > MAX_DATAREFS)
-+ return false;
-+
-+ /* Compute access strides for all data references, pruning outer
-+ loops we cannot analyze refs in. */
-+ start_loop = compute_access_strides (start_loop, innermost, *datarefs);
-+ if (!start_loop)
-+ return false;
-+
-+ /* Check if any interchange is profitable in the loop nest. */
-+ if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
-+ return false;
-+
-+ /* Check if data dependences can be computed for loop nest starting from
-+ start_loop. */
-+ loop = start_loop;
-+ do {
-+ loop_nest->truncate (0);
-+
-+ if (loop != start_loop)
-+ prune_datarefs_not_in_loop (start_loop, *datarefs);
-+
-+ if (find_loop_nest (start_loop, loop_nest)
-+ && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
-+ {
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ fprintf (dump_file,
-+ "\nConsider loop interchange for loop_nest<%d - %d>\n",
-+ start_loop->num, innermost->num);
-+
-+ if (loop != start_loop)
-+ prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
-+
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ dump_access_strides (*datarefs);
-+
-+ return true;
-+ }
-+
-+ free_dependence_relations (*ddrs);
-+ *ddrs = vNULL;
-+ /* Try to compute data dependences with the outermost loop stripped. */
-+ loop = start_loop;
-+ start_loop = start_loop->inner;
-+ } while (start_loop && start_loop->inner);
-+
-+ return false;
-+}
-+
-+/* Main entry for loop interchange pass. */
-+
-+unsigned int
-+pass_linterchange::execute (function *fun)
-+{
-+ if (number_of_loops (fun) <= 2)
-+ return 0;
-+
-+ bool changed_p = false;
-+ struct loop *loop;
-+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
-+ {
-+ vec loop_nest = vNULL;
-+ vec datarefs = vNULL;
-+ vec ddrs = vNULL;
-+ if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
-+ {
-+ tree_loop_interchange loop_interchange (loop_nest);
-+ changed_p |= loop_interchange.interchange (datarefs, ddrs);
-+ }
-+ free_dependence_relations (ddrs);
-+ free_data_refs_with_aux (datarefs);
-+ loop_nest.release ();
-+ }
-+
-+ if (changed_p)
-+ scev_reset_htab ();
-+
-+ return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
-+}
-+
-+} // anon namespace
-+
-+gimple_opt_pass *
-+make_pass_linterchange (gcc::context *ctxt)
-+{
-+ return new pass_linterchange (ctxt);
-+}
-diff -N -urp a/gcc/gimple-pretty-print.h b/gcc/gimple-pretty-print.h
---- a/gcc/gimple-pretty-print.h 2018-11-15 15:54:01.223039794 +0800
-+++ b/gcc/gimple-pretty-print.h 2018-11-15 16:03:17.447054436 +0800
-@@ -27,10 +27,10 @@ along with GCC; see the file COPYING3.
- extern void debug_gimple_stmt (gimple *);
- extern void debug_gimple_seq (gimple_seq);
- extern void print_gimple_seq (FILE *, gimple_seq, int, int);
--extern void print_gimple_stmt (FILE *, gimple *, int, int);
-+extern void print_gimple_stmt (FILE *, gimple *, int, int = 0);
- extern void debug (gimple &ref);
- extern void debug (gimple *ptr);
--extern void print_gimple_expr (FILE *, gimple *, int, int);
-+extern void print_gimple_expr (FILE *, gimple *, int, int = 0);
- extern void pp_gimple_stmt_1 (pretty_printer *, gimple *, int, int);
- extern void gimple_dump_bb (FILE *, basic_block, int, int);
- extern void gimple_dump_bb_for_graph (pretty_printer *, basic_block);
-diff -N -urp a/gcc/opts.c b/gcc/opts.c
---- a/gcc/opts.c 2018-11-15 15:59:30.459048461 +0800
-+++ b/gcc/opts.c 2018-11-15 16:03:17.447054436 +0800
-@@ -538,6 +538,7 @@ static const struct default_options defa
- { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 },
-+ { OPT_LEVELS_3_PLUS, OPT_floop_interchange, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC },
- { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 },
- { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 },
-diff -N -urp a/gcc/params.def b/gcc/params.def
---- a/gcc/params.def 2018-11-15 15:59:30.459048461 +0800
-+++ b/gcc/params.def 2018-11-15 16:03:17.451054437 +0800
-@@ -780,6 +780,20 @@ DEFPARAM (PARAM_L2_CACHE_SIZE,
- "The size of L2 cache.",
- 512, 0, 0)
-
-+/* Maximum number of statements in loop nest for loop interchange. */
-+
-+DEFPARAM (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS,
-+ "loop-interchange-max-num-stmts",
-+ "The maximum number of stmts in loop nest for loop interchange.",
-+ 64, 0, 0)
-+
-+/* Minimum stride ratio for loop interchange to be profitiable. */
-+
-+DEFPARAM (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO,
-+ "loop-interchange-stride-ratio",
-+ "The minimum stride ratio for loop interchange to be profitable",
-+ 2, 0, 0)
-+
- /* Whether we should use canonical types rather than deep "structural"
- type checking. Setting this value to 1 (the default) improves
- compilation performance in the C++ and Objective-C++ front end;
-diff -N -urp a/gcc/passes.def b/gcc/passes.def
---- a/gcc/passes.def 2018-11-15 15:59:30.463048461 +0800
-+++ b/gcc/passes.def 2018-11-15 16:03:17.451054437 +0800
-@@ -278,6 +278,7 @@ along with GCC; see the file COPYING3.
- NEXT_PASS (pass_cd_dce);
- NEXT_PASS (pass_record_bounds);
- NEXT_PASS (pass_loop_distribution);
-+ NEXT_PASS (pass_linterchange);
- NEXT_PASS (pass_copy_prop);
- NEXT_PASS (pass_graphite);
- PUSH_INSERT_PASSES_WITHIN (pass_graphite)
-diff -N -urp a/gcc/timevar.def b/gcc/timevar.def
---- a/gcc/timevar.def 2018-11-15 15:59:30.463048461 +0800
-+++ b/gcc/timevar.def 2018-11-15 16:03:17.455054437 +0800
-@@ -182,6 +182,7 @@ DEFTIMEVAR (TV_TREE_LOOP , "tree lo
- DEFTIMEVAR (TV_TREE_NOLOOP , "loopless fn")
- DEFTIMEVAR (TV_TREE_LOOP_BOUNDS , "tree loop bounds")
- DEFTIMEVAR (TV_LIM , "tree loop invariant motion")
-+DEFTIMEVAR (TV_LINTERCHANGE , "tree loop interchange")
- DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv")
- DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop")
- DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching")
-diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h
---- a/gcc/tree-pass.h 2018-11-15 15:59:30.467048461 +0800
-+++ b/gcc/tree-pass.h 2018-11-15 16:03:17.455054437 +0800
-@@ -367,6 +367,7 @@ extern gimple_opt_pass *make_pass_tree_l
- extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt);
- extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt);
- extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt);
-+extern gimple_opt_pass *make_pass_linterchange (gcc::context *ctxt);
- extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt);
- extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt);
- extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt);
-diff -N -urp a/gcc/tree-pretty-print.h b/gcc/tree-pretty-print.h
---- a/gcc/tree-pretty-print.h 2018-11-15 15:54:01.439039800 +0800
-+++ b/gcc/tree-pretty-print.h 2018-11-15 16:03:17.455054437 +0800
-@@ -37,7 +37,7 @@ extern void debug_tree_chain (tree);
- extern void print_generic_decl (FILE *, tree, int);
- extern void print_generic_stmt (FILE *, tree, int);
- extern void print_generic_stmt_indented (FILE *, tree, int, int);
--extern void print_generic_expr (FILE *, tree, int);
-+extern void print_generic_expr (FILE *, tree, int = 0);
- extern void dump_omp_clauses (pretty_printer *, tree, int, int);
- extern int dump_generic_node (pretty_printer *, tree, int, int, bool);
- extern void print_declaration (pretty_printer *, tree, int, int);
-diff -N -urp a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
---- a/gcc/tree-scalar-evolution.c 2018-11-15 15:54:01.443039800 +0800
-+++ b/gcc/tree-scalar-evolution.c 2018-11-15 16:03:17.459054437 +0800
-@@ -3000,6 +3000,50 @@ instantiate_scev (basic_block instantiat
- return res;
- }
-
-+tree
-+instantiate_scev (edge instantiate_below, struct loop *evolution_loop,
-+ tree chrec)
-+{
-+ tree res;
-+
-+ if (dump_file && (dump_flags & TDF_SCEV))
-+ {
-+ fprintf (dump_file, "(instantiate_scev \n");
-+ fprintf (dump_file, " (instantiate_below = %d -> %d)\n",
-+ instantiate_below->src->index, instantiate_below->dest->index);
-+ if (evolution_loop)
-+ fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num);
-+ fprintf (dump_file, " (chrec = ");
-+ print_generic_expr (dump_file, chrec);
-+ fprintf (dump_file, ")\n");
-+ }
-+
-+ bool destr = false;
-+ if (!global_cache)
-+ {
-+ global_cache = new instantiate_cache_type;
-+ destr = true;
-+ }
-+
-+ res = instantiate_scev_r (instantiate_below->src, evolution_loop,
-+ NULL, chrec, NULL, 0);
-+
-+ if (destr)
-+ {
-+ delete global_cache;
-+ global_cache = NULL;
-+ }
-+
-+ if (dump_file && (dump_flags & TDF_SCEV))
-+ {
-+ fprintf (dump_file, " (res = ");
-+ print_generic_expr (dump_file, res);
-+ fprintf (dump_file, "))\n");
-+ }
-+
-+ return res;
-+}
-+
- /* Similar to instantiate_parameters, but does not introduce the
- evolutions in outer loops for LOOP invariants in CHREC, and does not
- care about causing overflows, as long as they do not affect value
-diff -N -urp a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
---- a/gcc/tree-scalar-evolution.h 2018-11-15 15:54:01.443039800 +0800
-+++ b/gcc/tree-scalar-evolution.h 2018-11-15 16:03:17.459054437 +0800
-@@ -31,6 +31,7 @@ extern void scev_reset_htab (void);
- extern void scev_finalize (void);
- extern tree analyze_scalar_evolution (struct loop *, tree);
- extern tree instantiate_scev (basic_block, struct loop *, tree);
-+extern tree instantiate_scev (edge, struct loop *, tree);
- extern tree resolve_mixers (struct loop *, tree, bool *);
- extern void gather_stats_on_scev_database (void);
- extern void final_value_replacement_loop (struct loop *);
-diff -N -urp a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
---- a/gcc/tree-ssa-dce.c 2018-11-15 15:54:01.443039800 +0800
-+++ b/gcc/tree-ssa-dce.c 2018-11-15 16:03:17.463054437 +0800
-@@ -1729,3 +1729,55 @@ make_pass_cd_dce (gcc::context *ctxt)
- {
- return new pass_cd_dce (ctxt);
- }
-+
-+
-+/* A cheap DCE interface. WORKLIST is a list of possibly dead stmts and
-+ is consumed by this function. The function has linear complexity in
-+ the number of dead stmts with a constant factor like the average SSA
-+ use operands number. */
-+
-+void
-+simple_dce_from_worklist (bitmap worklist)
-+{
-+ while (! bitmap_empty_p (worklist))
-+ {
-+ /* Pop item. */
-+ unsigned i = bitmap_first_set_bit (worklist);
-+ bitmap_clear_bit (worklist, i);
-+
-+ tree def = ssa_name (i);
-+ /* Removed by somebody else or still in use. */
-+ if (! def || ! has_zero_uses (def))
-+ continue;
-+
-+ gimple *t = SSA_NAME_DEF_STMT (def);
-+ if (gimple_has_side_effects (t))
-+ continue;
-+
-+ /* Add uses to the worklist. */
-+ ssa_op_iter iter;
-+ use_operand_p use_p;
-+ FOR_EACH_PHI_OR_STMT_USE (use_p, t, iter, SSA_OP_USE)
-+ {
-+ tree use = USE_FROM_PTR (use_p);
-+ if (TREE_CODE (use) == SSA_NAME
-+ && ! SSA_NAME_IS_DEFAULT_DEF (use))
-+ bitmap_set_bit (worklist, SSA_NAME_VERSION (use));
-+ }
-+
-+ /* Remove stmt. */
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ {
-+ fprintf (dump_file, "Removing dead stmt:");
-+ print_gimple_stmt (dump_file, t, 0);
-+ }
-+ gimple_stmt_iterator gsi = gsi_for_stmt (t);
-+ if (gimple_code (t) == GIMPLE_PHI)
-+ remove_phi_node (&gsi, true);
-+ else
-+ {
-+ gsi_remove (&gsi, true);
-+ release_defs (t);
-+ }
-+ }
-+}
-diff -N -urp a/gcc/tree-ssa-dce.h b/gcc/tree-ssa-dce.h
---- a/gcc/tree-ssa-dce.h 1970-01-01 08:00:00.000000000 +0800
-+++ b/gcc/tree-ssa-dce.h 2018-11-15 16:03:17.463054437 +0800
-@@ -0,0 +1,22 @@
-+/* Copyright (C) 2017 Free Software Foundation, Inc.
-+
-+This file is part of GCC.
-+
-+GCC is free software; you can redistribute it and/or modify it
-+under the terms of the GNU General Public License as published by the
-+Free Software Foundation; either version 3, or (at your option) any
-+later version.
-+
-+GCC is distributed in the hope that it will be useful, but WITHOUT
-+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
-+for more details.
-+
-+You should have received a copy of the GNU General Public License
-+along with GCC; see the file COPYING3. If not see
-+. */
-+
-+#ifndef TREE_SSA_DCE_H
-+#define TREE_SSA_DCE_H
-+extern void simple_dce_from_worklist (bitmap);
-+#endif
-diff -N -urp a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c
---- a/gcc/tree-ssa-loop-ivcanon.c 2018-11-15 15:54:01.447039800 +0800
-+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-11-15 16:03:17.467054437 +0800
-@@ -76,10 +76,13 @@ enum unroll_level
- };
-
- /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT
-- is the exit edge whose condition is replaced. */
--
--static void
--create_canonical_iv (struct loop *loop, edge exit, tree niter)
-+ is the exit edge whose condition is replaced. The ssa versions of the new
-+ IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER
-+ if they are not NULL. */
-+
-+void
-+create_canonical_iv (struct loop *loop, edge exit, tree niter,
-+ tree *var_before = NULL, tree *var_after = NULL)
- {
- edge in;
- tree type, var;
-@@ -112,7 +115,9 @@ create_canonical_iv (struct loop *loop,
- create_iv (niter,
- build_int_cst (type, -1),
- NULL_TREE, loop,
-- &incr_at, false, NULL, &var);
-+ &incr_at, false, var_before, &var);
-+ if (var_after)
-+ *var_after = var;
-
- cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
- gimple_cond_set_code (cond, cmp);
-diff -N -urp a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h
---- a/gcc/tree-ssa-loop-ivopts.h 2018-11-15 15:54:01.447039800 +0800
-+++ b/gcc/tree-ssa-loop-ivopts.h 2018-11-15 16:03:17.467054437 +0800
-@@ -33,4 +33,6 @@ bool multiplier_allowed_in_address_p (HO
- addr_space_t);
- void tree_ssa_iv_optimize (void);
-
-+void create_canonical_iv (struct loop *, edge, tree,
-+ tree * = NULL, tree * = NULL);
- #endif /* GCC_TREE_SSA_LOOP_IVOPTS_H */
-diff -N -urp a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c
---- a/gcc/tree-ssa-pre.c 2018-11-15 15:54:01.447039800 +0800
-+++ b/gcc/tree-ssa-pre.c 2018-11-15 16:03:17.471054437 +0800
-@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3.
- #include "gimplify.h"
- #include "gimple-iterator.h"
- #include "tree-cfg.h"
-+#include "tree-ssa-dce.h"
- #include "tree-ssa-loop.h"
- #include "tree-into-ssa.h"
- #include "tree-dfa.h"
-@@ -4908,99 +4909,6 @@ mark_operand_necessary (tree op)
- return stmt;
- }
-
--/* Because we don't follow exactly the standard PRE algorithm, and decide not
-- to insert PHI nodes sometimes, and because value numbering of casts isn't
-- perfect, we sometimes end up inserting dead code. This simple DCE-like
-- pass removes any insertions we made that weren't actually used. */
--
--static void
--remove_dead_inserted_code (void)
--{
-- bitmap worklist;
-- unsigned i;
-- bitmap_iterator bi;
-- gimple *t;
--
-- worklist = BITMAP_ALLOC (NULL);
-- EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi)
-- {
-- t = SSA_NAME_DEF_STMT (ssa_name (i));
-- if (gimple_plf (t, NECESSARY))
-- bitmap_set_bit (worklist, i);
-- }
-- while (!bitmap_empty_p (worklist))
-- {
-- i = bitmap_first_set_bit (worklist);
-- bitmap_clear_bit (worklist, i);
-- t = SSA_NAME_DEF_STMT (ssa_name (i));
--
-- /* PHI nodes are somewhat special in that each PHI alternative has
-- data and control dependencies. All the statements feeding the
-- PHI node's arguments are always necessary. */
-- if (gimple_code (t) == GIMPLE_PHI)
-- {
-- unsigned k;
--
-- for (k = 0; k < gimple_phi_num_args (t); k++)
-- {
-- tree arg = PHI_ARG_DEF (t, k);
-- if (TREE_CODE (arg) == SSA_NAME)
-- {
-- gimple *n = mark_operand_necessary (arg);
-- if (n)
-- bitmap_set_bit (worklist, SSA_NAME_VERSION (arg));
-- }
-- }
-- }
-- else
-- {
-- /* Propagate through the operands. Examine all the USE, VUSE and
-- VDEF operands in this statement. Mark all the statements
-- which feed this statement's uses as necessary. */
-- ssa_op_iter iter;
-- tree use;
--
-- /* The operands of VDEF expressions are also needed as they
-- represent potential definitions that may reach this
-- statement (VDEF operands allow us to follow def-def
-- links). */
--
-- FOR_EACH_SSA_TREE_OPERAND (use, t, iter, SSA_OP_ALL_USES)
-- {
-- gimple *n = mark_operand_necessary (use);
-- if (n)
-- bitmap_set_bit (worklist, SSA_NAME_VERSION (use));
-- }
-- }
-- }
--
-- EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi)
-- {
-- t = SSA_NAME_DEF_STMT (ssa_name (i));
-- if (!gimple_plf (t, NECESSARY))
-- {
-- gimple_stmt_iterator gsi;
--
-- if (dump_file && (dump_flags & TDF_DETAILS))
-- {
-- fprintf (dump_file, "Removing unnecessary insertion:");
-- print_gimple_stmt (dump_file, t, 0, 0);
-- }
--
-- gsi = gsi_for_stmt (t);
-- if (gimple_code (t) == GIMPLE_PHI)
-- remove_phi_node (&gsi, true);
-- else
-- {
-- gsi_remove (&gsi, true);
-- release_defs (t);
-- }
-- }
-- }
-- BITMAP_FREE (worklist);
--}
--
--
- /* Initialize data structures used by PRE. */
-
- static void
-@@ -5142,8 +5050,7 @@ pass_pre::execute (function *fun)
- statistics_counter_event (fun, "Eliminated", pre_stats.eliminations);
-
- clear_expression_ids ();
-- remove_dead_inserted_code ();
--
-+
- scev_finalize ();
- fini_pre ();
- todo |= fini_eliminate ();
-diff -N -urp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
---- a/gcc/tree-vect-loop.c 2018-11-15 15:54:01.447039800 +0800
-+++ b/gcc/tree-vect-loop.c 2018-11-15 16:03:17.471054437 +0800
-@@ -2632,6 +2632,112 @@ vect_is_slp_reduction (loop_vec_info loo
- return true;
- }
-
-+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
-+ reduction operation CODE has a handled computation expression. */
-+
-+bool
-+check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg,
-+ enum tree_code code)
-+{
-+ auto_vec > path;
-+ auto_bitmap visited;
-+ tree lookfor = PHI_RESULT (phi);
-+ ssa_op_iter curri;
-+ use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
-+ while (USE_FROM_PTR (curr) != loop_arg)
-+ curr = op_iter_next_use (&curri);
-+ curri.i = curri.numops;
-+ do
-+ {
-+ path.safe_push (std::make_pair (curri, curr));
-+ tree use = USE_FROM_PTR (curr);
-+ if (use == lookfor)
-+ break;
-+ gimple *def = SSA_NAME_DEF_STMT (use);
-+ if (gimple_nop_p (def)
-+ || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
-+ {
-+pop:
-+ do
-+ {
-+ std::pair x = path.pop ();
-+ curri = x.first;
-+ curr = x.second;
-+ do
-+ curr = op_iter_next_use (&curri);
-+ /* Skip already visited or non-SSA operands (from iterating
-+ over PHI args). */
-+ while (curr != NULL_USE_OPERAND_P
-+ && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
-+ || ! bitmap_set_bit (visited,
-+ SSA_NAME_VERSION
-+ (USE_FROM_PTR (curr)))));
-+ }
-+ while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
-+ if (curr == NULL_USE_OPERAND_P)
-+ break;
-+ }
-+ else
-+ {
-+ if (gimple_code (def) == GIMPLE_PHI)
-+ curr = op_iter_init_phiuse (&curri, as_a (def), SSA_OP_USE);
-+ else
-+ curr = op_iter_init_use (&curri, def, SSA_OP_USE);
-+ while (curr != NULL_USE_OPERAND_P
-+ && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
-+ || ! bitmap_set_bit (visited,
-+ SSA_NAME_VERSION
-+ (USE_FROM_PTR (curr)))))
-+ curr = op_iter_next_use (&curri);
-+ if (curr == NULL_USE_OPERAND_P)
-+ goto pop;
-+ }
-+ }
-+ while (1);
-+ if (dump_file && (dump_flags & TDF_DETAILS))
-+ {
-+ dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
-+ unsigned i;
-+ std::pair *x;
-+ FOR_EACH_VEC_ELT (path, i, x)
-+ {
-+ dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
-+ dump_printf (MSG_NOTE, " ");
-+ }
-+ dump_printf (MSG_NOTE, "\n");
-+ }
-+
-+ /* Check whether the reduction path detected is valid. */
-+ bool fail = path.length () == 0;
-+ bool neg = false;
-+ for (unsigned i = 1; i < path.length (); ++i)
-+ {
-+ gimple *use_stmt = USE_STMT (path[i].second);
-+ tree op = USE_FROM_PTR (path[i].second);
-+ if (! has_single_use (op)
-+ || ! is_gimple_assign (use_stmt))
-+ {
-+ fail = true;
-+ break;
-+ }
-+ if (gimple_assign_rhs_code (use_stmt) != code)
-+ {
-+ if (code == PLUS_EXPR
-+ && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
-+ {
-+ /* Track whether we negate the reduction value each iteration. */
-+ if (gimple_assign_rhs2 (use_stmt) == op)
-+ neg = ! neg;
-+ }
-+ else
-+ {
-+ fail = true;
-+ break;
-+ }
-+ }
-+ }
-+ return ! fail && ! neg;
-+}
-
- /* Function vect_is_simple_reduction_1
-
-diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h
---- a/gcc/tree-vectorizer.h 2018-11-15 15:54:01.451039800 +0800
-+++ b/gcc/tree-vectorizer.h 2018-11-15 16:03:17.475054437 +0800
-@@ -1166,6 +1166,9 @@ extern tree vect_create_addr_base_for_ve
- extern void destroy_loop_vec_info (loop_vec_info, bool);
- extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool,
- bool *, bool);
-+/* Used in gimple-loop-interchange.c. */
-+extern bool check_reduction_path (location_t, loop_p, gphi *, tree,
-+ enum tree_code);
- /* Drive for loop analysis stage. */
- extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info);
- extern tree vect_build_loop_niters (loop_vec_info);
diff --git a/gcc.spec b/gcc.spec
index c69f413ab23bd7aa899f8d1c032936b24e945233..cab523fe92ddec899e1038a07a5503aff721c802 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -41,7 +41,7 @@ Version: 7.3.0
# number 2020033101 meaning the openEuler 20.03 release date plus 01 to
# replace DATE and will never change it in the future.
%global openEulerDATE 2020033101
-Release: %{openEulerDATE}.53
+Release: %{openEulerDATE}.54
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
Group: Development/Languages
#Source0: hcc-aarch64-linux-release.tar.bz2
@@ -73,18 +73,12 @@ Patch2: gcc-adapt-to-isl.patch
Patch3: sanitizer-pr-85835.patch
Patch4: CVE-2018-12886.patch
Patch5: CVE-2019-15847.patch
-Patch6: option-mlong-calls.patch
Patch7: add-tsv110-pipeline-scheduling.patch
-Patch8: option-mfentry-and-mlong-calls-bugfix.patch
-Patch10: aarch64-ilp32-call-addr-dimode.patch
Patch12: aarch64-fix-tls-negative-offset.patch
Patch14: arm-fix-push-minipool.patch
Patch22: arm-bigendian-disable-interleaved-LS-vectorize.patch
Patch23: floop-unroll-and-jam.patch
-Patch24: floop-interchange.patch
Patch25: constructor-priority-bugfix.patch
-Patch26: arm-adjust-be-ldrd-strd.patch
-Patch28: try-unroll.patch
Patch29: Big-endian-union-bitfield-bugfix.patch
Patch31: fstack-clash-protection.patch
Patch34: mark-pattern-as-clobbering-CC-REGNUM.patch
@@ -569,18 +563,12 @@ package or when debugging this package.
%patch3 -p1
%patch4 -p1
%patch5 -p1
-%patch6 -p1
%patch7 -p1
-%patch8 -p1
-%patch10 -p1
%patch12 -p1
%patch14 -p1
%patch22 -p1
%patch23 -p1
-%patch24 -p1
%patch25 -p1
-%patch26 -p1
-%patch28 -p1
%patch29 -p1
%patch31 -p1
%patch34 -p1
@@ -3376,6 +3364,9 @@ fi
%changelog
+* Tue Nov 21 2023 eastb233 - 7.3.0-2020033101.54
+- Delete several patches which cause some failures.
+
* Tue Oct 10 2023 Xiong Zhou -7.3.0-2020033101.53
- Fix CVE-2023-4039. Delete abnormal rpaths in shared objects.
diff --git a/option-mfentry-and-mlong-calls-bugfix.patch b/option-mfentry-and-mlong-calls-bugfix.patch
deleted file mode 100644
index c242567f7a4ae29d94cb82236385a734a860ca2e..0000000000000000000000000000000000000000
--- a/option-mfentry-and-mlong-calls-bugfix.patch
+++ /dev/null
@@ -1,108 +0,0 @@
-diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
---- a/gcc/config/aarch64/aarch64.c 2018-09-19 17:11:42.583520820 +0800
-+++ b/gcc/config/aarch64/aarch64.c 2018-09-19 17:10:22.715520820 +0800
-@@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym)
- void
- aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
- {
-- if (!TARGET_LONG_CALLS)
-+ if (flag_fentry)
- {
-- fprintf (file, "\tmov\tx9, x30\n");
-- fprintf (file, "\tbl\t__fentry__\n");
-- fprintf (file, "\tmov\tx30, x9\n");
-- }
-- else
-- {
-- if (flag_pic)
-+ if (!TARGET_LONG_CALLS)
- {
- fprintf (file, "\tmov\tx9, x30\n");
-- fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
-- fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
-- fprintf (file, "\tblr\tx10\n");
-+ fprintf (file, "\tbl\t__fentry__\n");
- fprintf (file, "\tmov\tx30, x9\n");
- }
- else
- {
-- fprintf (file, "\tmov\tx9, x30\n");
-- fprintf (file, "\tadrp\tx10, __fentry__\n");
-- fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
-- fprintf (file, "\tblr\tx10\n");
-- fprintf (file, "\tmov\tx30, x9\n");
-+ if (flag_pic)
-+ {
-+ fprintf (file, "\tmov\tx9, x30\n");
-+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
-+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
-+ fprintf (file, "\tblr\tx10\n");
-+ fprintf (file, "\tmov\tx30, x9\n");
-+ }
-+ else
-+ {
-+ fprintf (file, "\tmov\tx9, x30\n");
-+ fprintf (file, "\tadrp\tx10, __fentry__\n");
-+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
-+ fprintf (file, "\tblr\tx10\n");
-+ fprintf (file, "\tmov\tx30, x9\n");
-+ }
- }
- }
- }
-@@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn)
- add_int_reg_note (jump, REG_BR_PROB, very_unlikely);
- }
-
-+/* Return true, if profiling code should be emitted before
-+ prologue. Otherwise it returns false.
-+ Note: For x86 with "hotfix" it is sorried. */
-+static bool
-+aarch64_profile_before_prologue (void)
-+{
-+ return flag_fentry != 0;
-+}
-+
- /* Expand a compare and swap pattern. */
-
- void
-@@ -14952,6 +14964,9 @@ aarch64_run_selftests (void)
- #undef TARGET_ASM_ALIGNED_SI_OP
- #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
-
-+#undef TARGET_PROFILE_BEFORE_PROLOGUE
-+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue
-+
- #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
- #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
- hook_bool_const_tree_hwi_hwi_const_tree_true
-diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
---- a/gcc/config/aarch64/aarch64.h 2018-09-19 17:11:42.587520820 +0800
-+++ b/gcc/config/aarch64/aarch64.h 2018-09-19 17:10:22.715520820 +0800
-@@ -850,9 +850,12 @@ typedef struct
- { \
- rtx fun, lr; \
- const rtx_insn* tmp = get_insns (); \
-- lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
-- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
-- emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
-+ if (!flag_fentry) \
-+ { \
-+ lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
-+ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
-+ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
-+ } \
- if (TARGET_LONG_CALLS) \
- { \
- emit_insn (gen_blockage ()); \
-diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
---- a/gcc/config/aarch64/aarch64.opt 2018-09-19 17:11:42.587520820 +0800
-+++ b/gcc/config/aarch64/aarch64.opt 2018-09-19 17:10:22.715520820 +0800
-@@ -192,3 +192,7 @@ single precision and to 32 bits for doub
- mverbose-cost-dump
- Common Undocumented Var(flag_aarch64_verbose_cost)
- Enables verbose cost model dumping in the debug dump files.
-+
-+mfentry
-+Target Report Var(flag_fentry) Init(0)
-+Emit profiling counter call at function entry immediately after prologue.
diff --git a/option-mlong-calls.patch b/option-mlong-calls.patch
deleted file mode 100644
index 7aadfbe06b96a1319106194b115f7e1534fadc05..0000000000000000000000000000000000000000
--- a/option-mlong-calls.patch
+++ /dev/null
@@ -1,362 +0,0 @@
-diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
---- a/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:43:27.862079389 +0800
-+++ b/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:44:34.930081154 +0800
-@@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void);
- const char *aarch64_mangle_builtin_type (const_tree);
- const char *aarch64_output_casesi (rtx *);
-
-+extern void aarch64_pr_long_calls (struct cpp_reader *);
-+extern void aarch64_pr_no_long_calls (struct cpp_reader *);
-+extern void aarch64_pr_long_calls_off (struct cpp_reader *);
-+
- enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx);
- enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx);
- enum reg_class aarch64_regno_regclass (unsigned);
-@@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool);
- void aarch64_expand_mov_immediate (rtx, rtx);
- void aarch64_expand_prologue (void);
- void aarch64_expand_vector_init (rtx, rtx);
-+void aarch64_function_profiler (FILE *, int);
- void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
- const_tree, unsigned);
- void aarch64_init_expanders (void);
-diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
---- a/gcc/config/aarch64/aarch64.c 2018-11-06 10:43:27.870079389 +0800
-+++ b/gcc/config/aarch64/aarch64.c 2018-11-06 10:44:34.934081154 +0800
-@@ -70,6 +70,9 @@
- /* This file should be included last. */
- #include "target-def.h"
-
-+static void aarch64_set_default_type_attributes (tree);
-+static int aarch64_comp_type_attributes (const_tree, const_tree);
-+
- /* Defined for convenience. */
- #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
-
-@@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns
- return choose_hard_reg_mode (regno, nregs, false);
- }
-
-+/* Table of machine attributes. */
-+static const struct attribute_spec aarch64_attribute_table[] =
-+{
-+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
-+ affects_type_identity }. */
-+ /* Function calls made to this symbol must be done indirectly, because
-+ it may lie outside of the 26 bit addressing range of a normal function
-+ call. */
-+ { "long_call", 0, 0, false, true, true, NULL, false },
-+ /* Whereas these functions are always known to reside within the 26 bit
-+ addressing range. */
-+ { "short_call", 0, 0, false, true, true, NULL, false },
-+ { NULL, 0, 0, false, false, false, NULL, false }
-+};
-+
-+/* Encode the current state of the #pragma[no_]long_calls. */
-+typedef enum
-+{
-+ OFF, /* No #pragma[no_]long_calls is in effect. */
-+ LONG, /* #pragma long_calls is in effect. */
-+ SHORT /* #pragma no_long_calls is in effect. */
-+} aarch64_pragma_enum;
-+
-+static aarch64_pragma_enum aarch64_pragma_long_calls = OFF;
-+
-+void
-+aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
-+{
-+ aarch64_pragma_long_calls = LONG;
-+}
-+
-+void
-+aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
-+{
-+ aarch64_pragma_long_calls = SHORT;
-+}
-+
-+void
-+aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
-+{
-+ aarch64_pragma_long_calls = OFF;
-+}
-+
-+/* Return 0 if the attributes for two types are incompatible, 1 if they
-+ are compatible. */
-+static int
-+aarch64_comp_type_attributes (const_tree type1, const_tree type2)
-+{
-+ int l1, l2, s1, s2;
-+
-+ /* Check for mismatch of non-default calling convention. */
-+ if (TREE_CODE (type1) != FUNCTION_TYPE)
-+ return 1;
-+
-+ /* Check for mismatched call attributes. */
-+ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
-+ l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
-+ s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
-+ s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
-+
-+ /* Only bother to check if an attribute is defined. */
-+ if (l1 | l2 | s1 | s2)
-+ {
-+ /* If one type has an attribute, the other
-+ must have the same attribute. */
-+ if ((l1 != l2) || (s1 != s2))
-+ {
-+ return 0;
-+ }
-+
-+ /* Disallow mixed attributes. */
-+ if ((l1 && s2) || (l2 && s1))
-+ {
-+ return 0;
-+ }
-+ }
-+
-+ return 1;
-+}
-+
-+/* Assigns default attributes to newly defined type. This is used to
-+ set short_call/long_call attributes for function types of
-+ functions defined inside corresponding #pragma scopes. */
-+static void
-+aarch64_set_default_type_attributes (tree type)
-+{
-+ /* Add __attribute__ ((long_call)) to all functions, when
-+ inside #pragma long_calls or __attribute__ ((short_call)),
-+ when inside #pragma no_long_calls. */
-+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
-+ {
-+ tree type_attr_list = NULL;
-+ tree attr_name = NULL;
-+ type_attr_list = TYPE_ATTRIBUTES (type);
-+
-+ if (aarch64_pragma_long_calls == LONG)
-+ {
-+ attr_name = get_identifier ("long_call");
-+ }
-+ else if (aarch64_pragma_long_calls == SHORT)
-+ {
-+ attr_name = get_identifier ("short_call");
-+ }
-+ else
-+ {
-+ return;
-+ }
-+
-+ type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
-+ TYPE_ATTRIBUTES (type) = type_attr_list;
-+ }
-+}
-+
-+/* Return true if DECL is known to be linked into section SECTION. */
-+static bool
-+aarch64_function_in_section_p (tree decl, section *section)
-+{
-+ /* We can only be certain about the prevailing symbol definition. */
-+ if (!decl_binds_to_current_def_p (decl))
-+ return false;
-+
-+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
-+ if (!DECL_SECTION_NAME (decl))
-+ {
-+ /* Make sure that we will not create a unique section for DECL. */
-+ if (flag_function_sections || DECL_COMDAT_GROUP (decl))
-+ return false;
-+ }
-+
-+ return function_section (decl) == section;
-+}
-+
- /* Return true if calls to DECL should be treated as
- long-calls (ie called via a register). */
- static bool
--aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED)
-+aarch64_decl_is_long_call_p (tree decl)
- {
-- return false;
-+ tree attrs = NULL;
-+
-+ if (!decl)
-+ return TARGET_LONG_CALLS;
-+
-+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
-+ if (lookup_attribute ("short_call", attrs))
-+ return false;
-+
-+ /* For "f", be conservative, and only cater for cases in which the
-+ whole of the current function is placed in the same section. */
-+ if (!flag_reorder_blocks_and_partition
-+ && TREE_CODE (decl) == FUNCTION_DECL
-+ && aarch64_function_in_section_p (decl, current_function_section ()))
-+ return false;
-+
-+ if (lookup_attribute ("long_call", attrs))
-+ return true;
-+
-+ return TARGET_LONG_CALLS;
- }
-
- /* Return true if calls to symbol-ref SYM should be treated as
-@@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym)
- return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym));
- }
-
-+void
-+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
-+{
-+ if (!TARGET_LONG_CALLS)
-+ {
-+ fprintf (file, "\tmov\tx9, x30\n");
-+ fprintf (file, "\tbl\t__fentry__\n");
-+ fprintf (file, "\tmov\tx30, x9\n");
-+ }
-+ else
-+ {
-+ if (flag_pic)
-+ {
-+ fprintf (file, "\tmov\tx9, x30\n");
-+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n");
-+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n");
-+ fprintf (file, "\tblr\tx10\n");
-+ fprintf (file, "\tmov\tx30, x9\n");
-+ }
-+ else
-+ {
-+ fprintf (file, "\tmov\tx9, x30\n");
-+ fprintf (file, "\tadrp\tx10, __fentry__\n");
-+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n");
-+ fprintf (file, "\tblr\tx10\n");
-+ fprintf (file, "\tmov\tx30, x9\n");
-+ }
-+ }
-+}
-+
- /* Return true if calls to symbol-ref SYM should not go through
- plt stubs. */
-
-@@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p
- #undef TARGET_SCHED_CAN_SPECULATE_INSN
- #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn
-
-+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
-+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes
-+
-+#undef TARGET_ATTRIBUTE_TABLE
-+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table
-+
-+#undef TARGET_COMP_TYPE_ATTRIBUTES
-+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes
-+
- #undef TARGET_CAN_USE_DOLOOP_P
- #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
-
-diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
---- a/gcc/config/aarch64/aarch64.h 2018-11-06 10:43:27.870079389 +0800
-+++ b/gcc/config/aarch64/aarch64.h 2018-11-06 10:49:29.574088911 +0800
-@@ -28,7 +28,6 @@
-
-
-
--#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas ()
-
- /* Target machine storage layout. */
-
-@@ -659,6 +658,14 @@ typedef struct
- } CUMULATIVE_ARGS;
- #endif
-
-+/* Handle pragmas for compatibility with Intel's compilers. */
-+#define REGISTER_TARGET_PRAGMAS() do { \
-+ c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \
-+ c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \
-+ c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \
-+ aarch64_register_pragmas (); \
-+} while (0)
-+
- #define FUNCTION_ARG_PADDING(MODE, TYPE) \
- (aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward)
-
-@@ -842,13 +849,20 @@ typedef struct
- #define PROFILE_HOOK(LABEL) \
- { \
- rtx fun, lr; \
-+ const rtx_insn* tmp = get_insns (); \
- lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \
- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \
- emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \
-+ if (TARGET_LONG_CALLS) \
-+ { \
-+ emit_insn (gen_blockage ()); \
-+ emit_insn_after (gen_blockage (), NEXT_INSN (tmp)); \
-+ } \
- }
-
- /* All the work done in PROFILE_HOOK, but still required. */
--#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0)
-+#define FUNCTION_PROFILER(STREAM, LABELNO) \
-+ aarch64_function_profiler (STREAM, LABELNO)
-
- /* For some reason, the Linux headers think they know how to define
- these macros. They don't!!! */
-diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
---- a/gcc/config/aarch64/aarch64.md 2018-11-06 10:43:27.874079389 +0800
-+++ b/gcc/config/aarch64/aarch64.md 2018-11-06 10:44:34.934081154 +0800
-@@ -850,9 +850,10 @@
- {
- rtx pat;
- rtx callee = XEXP (operands[0], 0);
-- if (!REG_P (callee)
-- && ((GET_CODE (callee) != SYMBOL_REF)
-- || aarch64_is_noplt_call_p (callee)))
-+
-+ if (GET_CODE (callee) == SYMBOL_REF
-+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
-+ : !REG_P (callee))
- XEXP (operands[0], 0) = force_reg (Pmode, callee);
-
- if (operands[2] == NULL_RTX)
-@@ -881,9 +882,10 @@
- {
- rtx pat;
- rtx callee = XEXP (operands[1], 0);
-- if (!REG_P (callee)
-- && ((GET_CODE (callee) != SYMBOL_REF)
-- || aarch64_is_noplt_call_p (callee)))
-+
-+ if (GET_CODE (callee) == SYMBOL_REF
-+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee))
-+ : !REG_P (callee))
- XEXP (operands[1], 0) = force_reg (Pmode, callee);
-
- if (operands[3] == NULL_RTX)
-diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
---- a/gcc/config/aarch64/aarch64.opt 2018-11-06 10:43:27.874079389 +0800
-+++ b/gcc/config/aarch64/aarch64.opt 2018-11-06 10:44:34.934081154 +0800
-@@ -80,6 +80,10 @@ mlittle-endian
- Target Report RejectNegative InverseMask(BIG_END)
- Assume target CPU is configured as little endian.
-
-+mlong-calls
-+Target Report Mask(LONG_CALLS)
-+Generate call insns as indirect calls, if necessary.
-+
- mcmodel=
- Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save
- Specify the code model.
-diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
---- a/gcc/config/aarch64/predicates.md 2018-11-06 10:43:27.878079389 +0800
-+++ b/gcc/config/aarch64/predicates.md 2018-11-06 10:44:34.938081154 +0800
-@@ -27,8 +27,9 @@
- )
-
- (define_predicate "aarch64_call_insn_operand"
-- (ior (match_code "symbol_ref")
-- (match_operand 0 "register_operand")))
-+ (ior (and (match_code "symbol_ref")
-+ (match_test "!aarch64_is_long_call_p (op)"))
-+ (match_operand 0 "register_operand")))
-
- ;; Return true if OP a (const_int 0) operand.
- (define_predicate "const0_operand"
diff --git a/try-unroll.patch b/try-unroll.patch
deleted file mode 100644
index 6f564f8054e00e95f0a031f785697257b6c3eac3..0000000000000000000000000000000000000000
--- a/try-unroll.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- a/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:05:43.841181211 +0800
-+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:03:17.545185153 +0800
-@@ -726,7 +726,7 @@ try_unroll_loop_completely (struct loop
- edge_to_cancel = NULL;
- }
-
-- if (!n_unroll_found)
-+ if (!n_unroll_found || SCEV_NOT_KNOWN == TREE_CODE (niter))
- return false;
-
- if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))