diff --git a/aarch64-ilp32-call-addr-dimode.patch b/aarch64-ilp32-call-addr-dimode.patch deleted file mode 100644 index 0a04debb803cf81faeb6867016e635083db10fb4..0000000000000000000000000000000000000000 --- a/aarch64-ilp32-call-addr-dimode.patch +++ /dev/null @@ -1,31 +0,0 @@ -diff -urpN a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md ---- a/gcc/config/aarch64/aarch64.md 2018-10-09 11:30:50.000000000 +0800 -+++ b/gcc/config/aarch64/aarch64.md 2018-10-09 11:52:54.000000000 +0800 -@@ -857,6 +857,13 @@ - : !REG_P (callee)) - XEXP (operands[0], 0) = force_reg (Pmode, callee); - -+ if (TARGET_ILP32 -+ && GET_CODE (XEXP (operands[0], 0)) == SYMBOL_REF -+ && GET_MODE (XEXP (operands[0], 0)) == SImode) -+ XEXP (operands[0], 0) = convert_memory_address (DImode, -+ XEXP (operands[0], 0)); -+ -+ - if (operands[2] == NULL_RTX) - operands[2] = const0_rtx; - -@@ -889,6 +896,13 @@ - : !REG_P (callee)) - XEXP (operands[1], 0) = force_reg (Pmode, callee); - -+ if (TARGET_ILP32 -+ && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF -+ && GET_MODE (XEXP (operands[1], 0)) == SImode) -+ XEXP (operands[1], 0) = convert_memory_address (DImode, -+ XEXP (operands[1], 0)); -+ -+ - if (operands[3] == NULL_RTX) - operands[3] = const0_rtx; - diff --git a/arm-adjust-be-ldrd-strd.patch b/arm-adjust-be-ldrd-strd.patch deleted file mode 100644 index 90278d3e6e8af0f9a66bb68c4f92222043098d10..0000000000000000000000000000000000000000 --- a/arm-adjust-be-ldrd-strd.patch +++ /dev/null @@ -1,60 +0,0 @@ -diff -urp a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c ---- a/gcc/config/arm/arm.c 2019-01-18 11:25:20.840179114 +0800 -+++ b/gcc/config/arm/arm.c 2019-01-18 11:25:47.548179817 +0800 -@@ -14306,18 +14306,36 @@ gen_movmem_ldrd_strd (rtx *operands) - emit_move_insn (reg0, src); - else - { -- emit_insn (gen_unaligned_loadsi (low_reg, src)); -- src = next_consecutive_mem (src); -- emit_insn (gen_unaligned_loadsi (hi_reg, src)); -+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN) -+ { -+ emit_insn (gen_unaligned_loadsi (hi_reg, src)); -+ src = next_consecutive_mem (src); -+ emit_insn (gen_unaligned_loadsi (low_reg, src)); -+ } -+ else -+ { -+ emit_insn (gen_unaligned_loadsi (low_reg, src)); -+ src = next_consecutive_mem (src); -+ emit_insn (gen_unaligned_loadsi (hi_reg, src)); -+ } - } - - if (dst_aligned) - emit_move_insn (dst, reg0); - else - { -- emit_insn (gen_unaligned_storesi (dst, low_reg)); -- dst = next_consecutive_mem (dst); -- emit_insn (gen_unaligned_storesi (dst, hi_reg)); -+ if (flag_lsrd_be_adjust && BYTES_BIG_ENDIAN && WORDS_BIG_ENDIAN) -+ { -+ emit_insn (gen_unaligned_storesi (dst, hi_reg)); -+ dst = next_consecutive_mem (dst); -+ emit_insn (gen_unaligned_storesi (dst, low_reg)); -+ } -+ else -+ { -+ emit_insn (gen_unaligned_storesi (dst, low_reg)); -+ dst = next_consecutive_mem (dst); -+ emit_insn (gen_unaligned_storesi (dst, hi_reg)); -+ } - } - - src = next_consecutive_mem (src); -diff -urp a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt ---- a/gcc/config/arm/arm.opt 2019-01-18 11:25:20.840179114 +0800 -+++ b/gcc/config/arm/arm.opt 2019-01-18 11:28:51.744184666 +0800 -@@ -274,6 +274,10 @@ masm-syntax-unified - Target Report Var(inline_asm_unified) Init(0) Save - Assume unified syntax for inline assembly code. - -+mlsrd-be-adjust -+Target Report Var(flag_lsrd_be_adjust) Init(1) -+Adjust ldrd/strd splitting order when it's big-endian. -+ - mpure-code - Target Report Var(target_pure_code) Init(0) - Do not allow constant data to be placed in code sections. diff --git a/floop-interchange.patch b/floop-interchange.patch deleted file mode 100644 index 6657eede161b6f1f3bdfe001e2e69ee70b15cb3d..0000000000000000000000000000000000000000 --- a/floop-interchange.patch +++ /dev/null @@ -1,2680 +0,0 @@ -diff -N -urp a/gcc/Makefile.in b/gcc/Makefile.in ---- a/gcc/Makefile.in 2018-11-15 15:59:30.435048460 +0800 -+++ b/gcc/Makefile.in 2018-11-15 16:04:16.735055997 +0800 -@@ -1293,6 +1293,7 @@ OBJS = \ - gimple-fold.o \ - gimple-laddress.o \ - gimple-loop-jam.o \ -+ gimple-loop-interchange.o \ - gimple-low.o \ - gimple-pretty-print.o \ - gimple-ssa-backprop.o \ -diff -N -urp a/gcc/cfgloop.h b/gcc/cfgloop.h ---- a/gcc/cfgloop.h 2018-11-15 15:59:30.439048461 +0800 -+++ b/gcc/cfgloop.h 2018-11-15 16:03:17.431054436 +0800 -@@ -225,6 +225,16 @@ struct GTY ((chain_next ("%h.next"))) lo - builtins. */ - tree simduid; - -+ /* In loop optimization, it's common to generate loops from the original -+ loop. This field records the index of the original loop which can be -+ used to track the original loop from newly generated loops. This can -+ be done by calling function get_loop (cfun, orig_loop_num). Note the -+ original loop could be destroyed for various reasons thus no longer -+ exists, as a result, function call to get_loop returns NULL pointer. -+ In this case, this field should not be used and needs to be cleared -+ whenever possible. */ -+ int orig_loop_num; -+ - /* Upper bound on number of iterations of a loop. */ - struct nb_iter_bound *bounds; - -diff -N -urp a/gcc/common.opt b/gcc/common.opt ---- a/gcc/common.opt 2018-11-15 15:59:30.447048461 +0800 -+++ b/gcc/common.opt 2018-11-15 16:03:17.431054436 +0800 -@@ -1488,8 +1488,8 @@ Common Alias(floop-nest-optimize) - Enable loop nest transforms. Same as -floop-nest-optimize. - - floop-interchange --Common Alias(floop-nest-optimize) --Enable loop nest transforms. Same as -floop-nest-optimize. -+Common Report Var(flag_loop_interchange) Optimization -+Enable loop interchange on trees. - - floop-block - Common Alias(floop-nest-optimize) -diff -N -urp a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi ---- a/gcc/doc/invoke.texi 2018-11-15 15:59:30.451048461 +0800 -+++ b/gcc/doc/invoke.texi 2018-11-15 16:05:06.803057315 +0800 -@@ -8224,11 +8224,9 @@ Perform loop optimizations on trees. Th - at @option{-O} and higher. - - @item -ftree-loop-linear --@itemx -floop-interchange - @itemx -floop-strip-mine - @itemx -floop-block - @opindex ftree-loop-linear --@opindex floop-interchange - @opindex floop-strip-mine - @opindex floop-block - Perform loop nest optimizations. Same as -@@ -8328,6 +8326,25 @@ Apply unroll and jam transformations on - nest this unrolls the outer loop by some factor and fuses the resulting - multiple inner loops. This flag is enabled by default at @option{-O3}. - -+@item -floop-interchange -+@opindex floop-interchange -+Perform loop interchange outside of graphite. This flag can improve cache -+performance on loop nest and allow further loop optimizations, like -+vectorization, to take place. For example, the loop -+@smallexample -+for (int i = 0; i < N; i++) -+ for (int j = 0; j < N; j++) -+ for (int k = 0; k < N; k++) -+ c[i][j] = c[i][j] + a[i][k]*b[k][j]; -+@end smallexample -+is transformed to -+@smallexample -+for (int i = 0; i < N; i++) -+ for (int k = 0; k < N; k++) -+ for (int j = 0; j < N; j++) -+ c[i][j] = c[i][j] + a[i][k]*b[k][j]; -+@end smallexample -+ - @item -ftree-loop-im - @opindex ftree-loop-im - Perform loop invariant motion on trees. This pass moves only invariants that -@@ -10203,6 +10220,12 @@ The size of L1 cache, in kilobytes. - @item l2-cache-size - The size of L2 cache, in kilobytes. - -+@item loop-interchange-max-num-stmts -+The maximum number of stmts in a loop to be interchanged. -+ -+@item loop-interchange-stride-ratio -+The minimum ratio between stride of two loops for interchange to be profitable. -+ - @item min-insn-to-prefetch-ratio - The minimum ratio between the number of instructions and the - number of prefetches to enable prefetching in a loop. -diff -N -urp a/gcc/gimple-loop-interchange.cc b/gcc/gimple-loop-interchange.cc ---- a/gcc/gimple-loop-interchange.cc 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/gimple-loop-interchange.cc 2018-11-15 16:03:17.443054436 +0800 -@@ -0,0 +1,2039 @@ -+/* Loop interchange. -+ Copyright (C) 2017 Free Software Foundation, Inc. -+ Contributed by ARM Ltd. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it -+under the terms of the GNU General Public License as published by the -+Free Software Foundation; either version 3, or (at your option) any -+later version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT -+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#include "config.h" -+#include "system.h" -+#include "coretypes.h" -+#include "backend.h" -+#include "is-a.h" -+#include "tree.h" -+#include "gimple.h" -+#include "tree-pass.h" -+#include "ssa.h" -+#include "gimple-pretty-print.h" -+#include "fold-const.h" -+#include "gimplify.h" -+#include "gimple-iterator.h" -+#include "gimplify-me.h" -+#include "cfgloop.h" -+#include "params.h" -+#include "tree-ssa.h" -+#include "tree-scalar-evolution.h" -+#include "tree-ssa-loop-manip.h" -+#include "tree-ssa-loop-niter.h" -+#include "tree-ssa-loop-ivopts.h" -+#include "tree-ssa-dce.h" -+#include "tree-data-ref.h" -+#include "tree-vectorizer.h" -+ -+/* This pass performs loop interchange: for example, the loop nest -+ -+ for (int j = 0; j < N; j++) -+ for (int k = 0; k < N; k++) -+ for (int i = 0; i < N; i++) -+ c[i][j] = c[i][j] + a[i][k]*b[k][j]; -+ -+ is transformed to -+ -+ for (int i = 0; i < N; i++) -+ for (int j = 0; j < N; j++) -+ for (int k = 0; k < N; k++) -+ c[i][j] = c[i][j] + a[i][k]*b[k][j]; -+ -+ This pass implements loop interchange in the following steps: -+ -+ 1) Find perfect loop nest for each innermost loop and compute data -+ dependence relations for it. For above example, loop nest is -+ . -+ 2) From innermost to outermost loop, this pass tries to interchange -+ each loop pair. For above case, it firstly tries to interchange -+ and loop nest becomes . -+ Then it tries to interchange and loop nest becomes -+ . The overall effect is to move innermost -+ loop to the outermost position. For loop pair -+ to be interchanged, we: -+ 3) Check if data dependence relations are valid for loop interchange. -+ 4) Check if both loops can be interchanged in terms of transformation. -+ 5) Check if interchanging the two loops is profitable. -+ 6) Interchange the two loops by mapping induction variables. -+ -+ This pass also handles reductions in loop nest. So far we only support -+ simple reduction of inner loop and double reduction of the loop nest. */ -+ -+/* Maximum number of stmts in each loop that should be interchanged. */ -+#define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS)) -+/* Maximum number of data references in loop nest. */ -+#define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) -+ -+/* Comparison ratio of access stride between inner/outer loops to be -+ interchanged. This is the minimum stride ratio for loop interchange -+ to be profitable. */ -+#define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO)) -+/* The same as above, but we require higher ratio for interchanging the -+ innermost two loops. */ -+#define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1) -+ -+/* Vector of strides that DR accesses in each level loop of a loop nest. */ -+#define DR_ACCESS_STRIDE(dr) ((vec *) dr->aux) -+ -+/* Structure recording loop induction variable. */ -+typedef struct induction -+{ -+ /* IV itself. */ -+ tree var; -+ /* IV's initializing value, which is the init arg of the IV PHI node. */ -+ tree init_val; -+ /* IV's initializing expr, which is (the expanded result of) init_val. */ -+ tree init_expr; -+ /* IV's step. */ -+ tree step; -+} *induction_p; -+ -+/* Enum type for loop reduction variable. */ -+enum reduction_type -+{ -+ UNKNOWN_RTYPE = 0, -+ SIMPLE_RTYPE, -+ DOUBLE_RTYPE -+}; -+ -+/* Structure recording loop reduction variable. */ -+typedef struct reduction -+{ -+ /* Reduction itself. */ -+ tree var; -+ /* PHI node defining reduction variable. */ -+ gphi *phi; -+ /* Init and next variables of the reduction. */ -+ tree init; -+ tree next; -+ /* Lcssa PHI node if reduction is used outside of its definition loop. */ -+ gphi *lcssa_phi; -+ /* Stmts defining init and next. */ -+ gimple *producer; -+ gimple *consumer; -+ /* If init is loaded from memory, this is the loading memory reference. */ -+ tree init_ref; -+ /* If reduction is finally stored to memory, this is the stored memory -+ reference. */ -+ tree fini_ref; -+ enum reduction_type type; -+} *reduction_p; -+ -+ -+/* Dump reduction RE. */ -+ -+static void -+dump_reduction (reduction_p re) -+{ -+ if (re->type == SIMPLE_RTYPE) -+ fprintf (dump_file, " Simple reduction: "); -+ else if (re->type == DOUBLE_RTYPE) -+ fprintf (dump_file, " Double reduction: "); -+ else -+ fprintf (dump_file, " Unknown reduction: "); -+ -+ print_gimple_stmt (dump_file, re->phi, 0); -+} -+ -+/* Dump LOOP's induction IV. */ -+static void -+dump_induction (struct loop *loop, induction_p iv) -+{ -+ fprintf (dump_file, " Induction: "); -+ print_generic_expr (dump_file, iv->var, TDF_SLIM); -+ fprintf (dump_file, " = {"); -+ print_generic_expr (dump_file, iv->init_expr, TDF_SLIM); -+ fprintf (dump_file, ", "); -+ print_generic_expr (dump_file, iv->step, TDF_SLIM); -+ fprintf (dump_file, "}_%d\n", loop->num); -+} -+ -+/* Loop candidate for interchange. */ -+ -+struct loop_cand -+{ -+ loop_cand (struct loop *, struct loop *); -+ ~loop_cand (); -+ -+ reduction_p find_reduction_by_stmt (gimple *); -+ void classify_simple_reduction (reduction_p); -+ bool analyze_iloop_reduction_var (tree); -+ bool analyze_oloop_reduction_var (loop_cand *, tree); -+ bool analyze_induction_var (tree, tree); -+ bool analyze_carried_vars (loop_cand *); -+ bool analyze_lcssa_phis (void); -+ bool can_interchange_p (loop_cand *); -+ bool supported_operations (basic_block, loop_cand *, int *); -+ void undo_simple_reduction (reduction_p, bitmap); -+ -+ /* The loop itself. */ -+ struct loop *m_loop; -+ /* The outer loop for interchange. It equals to loop if this loop cand -+ itself represents the outer loop. */ -+ struct loop *m_outer; -+ /* Vector of induction variables in loop. */ -+ vec m_inductions; -+ /* Vector of reduction variables in loop. */ -+ vec m_reductions; -+ /* Lcssa PHI nodes of this loop. */ -+ vec m_lcssa_nodes; -+ /* Single exit edge of this loop. */ -+ edge m_exit; -+ /* Basic blocks of this loop. */ -+ basic_block *m_bbs; -+}; -+ -+/* Constructor. */ -+ -+loop_cand::loop_cand (struct loop *loop, struct loop *outer) -+ : m_loop (loop), m_outer (outer), -+ m_exit (single_exit (loop)), m_bbs (get_loop_body (loop)) -+{ -+ m_inductions.create (3); -+ m_reductions.create (3); -+ m_lcssa_nodes.create (3); -+} -+ -+/* Destructor. */ -+ -+loop_cand::~loop_cand () -+{ -+ induction_p iv; -+ for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i) -+ free (iv); -+ -+ reduction_p re; -+ for (unsigned i = 0; m_reductions.iterate (i, &re); ++i) -+ free (re); -+ -+ m_inductions.release (); -+ m_reductions.release (); -+ m_lcssa_nodes.release (); -+ free (m_bbs); -+} -+ -+/* Return single use stmt of VAR in LOOP, otherwise return NULL. */ -+ -+static gimple * -+single_use_in_loop (tree var, struct loop *loop) -+{ -+ gimple *stmt, *res = NULL; -+ use_operand_p use_p; -+ imm_use_iterator iterator; -+ -+ FOR_EACH_IMM_USE_FAST (use_p, iterator, var) -+ { -+ stmt = USE_STMT (use_p); -+ if (is_gimple_debug (stmt)) -+ continue; -+ -+ if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) -+ continue; -+ -+ if (res) -+ return NULL; -+ -+ res = stmt; -+ } -+ return res; -+} -+ -+/* Return true if E is unsupported in loop interchange, i.e, E is a complex -+ edge or part of irreducible loop. */ -+ -+static inline bool -+unsupported_edge (edge e) -+{ -+ return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP)); -+} -+ -+/* Return the reduction if STMT is one of its lcssa PHI, producer or consumer -+ stmt. */ -+ -+reduction_p -+loop_cand::find_reduction_by_stmt (gimple *stmt) -+{ -+ gphi *phi = dyn_cast (stmt); -+ reduction_p re; -+ -+ for (unsigned i = 0; m_reductions.iterate (i, &re); ++i) -+ if ((phi != NULL && phi == re->lcssa_phi) -+ || (stmt == re->producer || stmt == re->consumer)) -+ return re; -+ -+ return NULL; -+} -+ -+/* Return true if all stmts in BB can be supported by loop interchange, -+ otherwise return false. ILOOP is not NULL if this loop_cand is the -+ outer loop in loop nest. Add the number of supported statements to -+ NUM_STMTS. */ -+ -+bool -+loop_cand::supported_operations (basic_block bb, loop_cand *iloop, -+ int *num_stmts) -+{ -+ int bb_num_stmts = 0; -+ gphi_iterator psi; -+ gimple_stmt_iterator gsi; -+ -+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) -+ { -+ gimple *stmt = gsi_stmt (gsi); -+ if (is_gimple_debug (stmt)) -+ continue; -+ -+ if (gimple_has_side_effects (stmt)) -+ return false; -+ -+ bb_num_stmts++; -+ if (gcall *call = dyn_cast (stmt)) -+ { -+ /* In basic block of outer loop, the call should be cheap since -+ it will be moved to inner loop. */ -+ if (iloop != NULL -+ && !gimple_inexpensive_call_p (call)) -+ return false; -+ continue; -+ } -+ -+ if (!iloop || !gimple_vuse (stmt)) -+ continue; -+ -+ /* Support stmt accessing memory in outer loop only if it is for inner -+ loop's reduction. */ -+ if (iloop->find_reduction_by_stmt (stmt)) -+ continue; -+ -+ tree lhs; -+ /* Support loop invariant memory reference if it's only used once by -+ inner loop. */ -+ /* ??? How's this checking for invariantness? */ -+ if (gimple_assign_single_p (stmt) -+ && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE -+ && TREE_CODE (lhs) == SSA_NAME -+ && single_use_in_loop (lhs, iloop->m_loop)) -+ continue; -+ -+ return false; -+ } -+ *num_stmts += bb_num_stmts; -+ -+ /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer -+ loop's header, or PHI nodes in dest bb of inner loop's exit edge. */ -+ if (!iloop || bb == m_loop->header -+ || bb == iloop->m_exit->dest) -+ return true; -+ -+ /* Don't allow any other PHI nodes. */ -+ for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi)) -+ if (!virtual_operand_p (PHI_RESULT (psi.phi ()))) -+ return false; -+ -+ return true; -+} -+ -+/* Return true if current loop_cand be interchanged. ILOOP is not NULL if -+ current loop_cand is outer loop in loop nest. */ -+ -+bool -+loop_cand::can_interchange_p (loop_cand *iloop) -+{ -+ /* For now we only support at most one reduction. */ -+ unsigned allowed_reduction_num = 1; -+ -+ /* Only support reduction if the loop nest to be interchanged is the -+ innermostin two loops. */ -+ if ((iloop == NULL && m_loop->inner != NULL) -+ || (iloop != NULL && iloop->m_loop->inner != NULL)) -+ allowed_reduction_num = 0; -+ -+ if (m_reductions.length () > allowed_reduction_num -+ || (m_reductions.length () == 1 -+ && m_reductions[0]->type == UNKNOWN_RTYPE)) -+ return false; -+ -+ /* Only support lcssa PHI node which is for reduction. */ -+ if (m_lcssa_nodes.length () > allowed_reduction_num) -+ return false; -+ -+ int num_stmts = 0; -+ /* Check basic blocks other than loop header/exit. */ -+ for (unsigned i = 0; i < m_loop->num_nodes; i++) -+ { -+ basic_block bb = m_bbs[i]; -+ -+ /* Skip basic blocks of inner loops. */ -+ if (bb->loop_father != m_loop) -+ continue; -+ -+ /* Check if basic block has any unsupported operation. */ -+ if (!supported_operations (bb, iloop, &num_stmts)) -+ return false; -+ -+ /* Check if loop has too many stmts. */ -+ if (num_stmts > MAX_NUM_STMT) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Programmers and optimizers (like loop store motion) may optimize code: -+ -+ for (int i = 0; i < N; i++) -+ for (int j = 0; j < N; j++) -+ a[i] += b[j][i] * c[j][i]; -+ -+ into reduction: -+ -+ for (int i = 0; i < N; i++) -+ { -+ // producer. Note sum can be intitialized to a constant. -+ int sum = a[i]; -+ for (int j = 0; j < N; j++) -+ { -+ sum += b[j][i] * c[j][i]; -+ } -+ // consumer. -+ a[i] = sum; -+ } -+ -+ The result code can't be interchanged without undoing the optimization. -+ This function classifies this kind reduction and records information so -+ that we can undo the store motion during interchange. */ -+ -+void -+loop_cand::classify_simple_reduction (reduction_p re) -+{ -+ gimple *producer, *consumer; -+ -+ /* Check init variable of reduction and how it is initialized. */ -+ if (TREE_CODE (re->init) == SSA_NAME) -+ { -+ producer = SSA_NAME_DEF_STMT (re->init); -+ re->producer = producer; -+ basic_block bb = gimple_bb (producer); -+ if (!bb || bb->loop_father != m_outer) -+ return; -+ -+ if (!gimple_assign_load_p (producer)) -+ return; -+ -+ re->init_ref = gimple_assign_rhs1 (producer); -+ } -+ else if (!CONSTANT_CLASS_P (re->init)) -+ return; -+ -+ /* Check how reduction variable is used. */ -+ consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer); -+ if (!consumer -+ || !gimple_store_p (consumer)) -+ return; -+ -+ re->fini_ref = gimple_get_lhs (consumer); -+ re->consumer = consumer; -+ -+ /* Simple reduction with constant initializer. */ -+ if (!re->init_ref) -+ { -+ gcc_assert (CONSTANT_CLASS_P (re->init)); -+ re->init_ref = unshare_expr (re->fini_ref); -+ } -+ -+ /* Require memory references in producer and consumer are the same so -+ that we can undo reduction during interchange. */ -+ if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0)) -+ return; -+ -+ re->type = SIMPLE_RTYPE; -+} -+ -+/* Analyze reduction variable VAR for inner loop of the loop nest to be -+ interchanged. Return true if analysis succeeds. */ -+ -+bool -+loop_cand::analyze_iloop_reduction_var (tree var) -+{ -+ gphi *phi = as_a (SSA_NAME_DEF_STMT (var)); -+ gphi *lcssa_phi = NULL, *use_phi; -+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); -+ tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop)); -+ reduction_p re; -+ gimple *stmt, *next_def, *single_use = NULL; -+ use_operand_p use_p; -+ imm_use_iterator iterator; -+ -+ if (TREE_CODE (next) != SSA_NAME) -+ return false; -+ -+ next_def = SSA_NAME_DEF_STMT (next); -+ basic_block bb = gimple_bb (next_def); -+ if (!bb || !flow_bb_inside_loop_p (m_loop, bb)) -+ return false; -+ -+ /* In restricted reduction, the var is (and must be) used in defining -+ the updated var. The process can be depicted as below: -+ -+ var ;; = PHI -+ | -+ | -+ v -+ +---------------------+ -+ | reduction operators | <-- other operands -+ +---------------------+ -+ | -+ | -+ v -+ next -+ -+ In terms loop interchange, we don't change how NEXT is computed based -+ on VAR and OTHER OPERANDS. In case of double reduction in loop nest -+ to be interchanged, we don't changed it at all. In the case of simple -+ reduction in inner loop, we only make change how VAR/NEXT is loaded or -+ stored. With these conditions, we can relax restrictions on reduction -+ in a way that reduction operation is seen as black box. In general, -+ we can ignore reassociation of reduction operator; we can handle fake -+ reductions in which VAR is not even used to compute NEXT. */ -+ if (! single_imm_use (var, &use_p, &single_use) -+ || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use))) -+ return false; -+ -+ /* Check the reduction operation. We require a left-associative operation. -+ For FP math we also need to be allowed to associate operations. */ -+ if (gassign *ass = dyn_cast (single_use)) -+ { -+ enum tree_code code = gimple_assign_rhs_code (ass); -+ if (! (associative_tree_code (code) -+ || (code == MINUS_EXPR -+ && use_p->use == gimple_assign_rhs1_ptr (ass))) -+ || (FLOAT_TYPE_P (TREE_TYPE (var)) -+ && ! flag_associative_math)) -+ return false; -+ } -+ else -+ return false; -+ -+ /* Handle and verify a series of stmts feeding the reduction op. */ -+ if (single_use != next_def -+ && !check_reduction_path (UNKNOWN_LOCATION, m_loop, phi, next, -+ gimple_assign_rhs_code (single_use))) -+ return false; -+ -+ /* Only support cases in which INIT is used in inner loop. */ -+ if (TREE_CODE (init) == SSA_NAME) -+ FOR_EACH_IMM_USE_FAST (use_p, iterator, init) -+ { -+ stmt = USE_STMT (use_p); -+ if (is_gimple_debug (stmt)) -+ continue; -+ -+ if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt))) -+ return false; -+ } -+ -+ FOR_EACH_IMM_USE_FAST (use_p, iterator, next) -+ { -+ stmt = USE_STMT (use_p); -+ if (is_gimple_debug (stmt)) -+ continue; -+ -+ /* Or else it's used in PHI itself. */ -+ use_phi = dyn_cast (stmt); -+ if (use_phi == phi) -+ continue; -+ -+ if (use_phi != NULL -+ && lcssa_phi == NULL -+ && gimple_bb (stmt) == m_exit->dest -+ && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next) -+ lcssa_phi = use_phi; -+ else -+ return false; -+ } -+ if (!lcssa_phi) -+ return false; -+ -+ re = XCNEW (struct reduction); -+ re->var = var; -+ re->init = init; -+ re->next = next; -+ re->phi = phi; -+ re->lcssa_phi = lcssa_phi; -+ -+ classify_simple_reduction (re); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ dump_reduction (re); -+ -+ m_reductions.safe_push (re); -+ return true; -+} -+ -+/* Analyze reduction variable VAR for outer loop of the loop nest to be -+ interchanged. ILOOP is not NULL and points to inner loop. For the -+ moment, we only support double reduction for outer loop, like: -+ -+ for (int i = 0; i < n; i++) -+ { -+ int sum = 0; -+ -+ for (int j = 0; j < n; j++) // outer loop -+ for (int k = 0; k < n; k++) // inner loop -+ sum += a[i][k]*b[k][j]; -+ -+ s[i] = sum; -+ } -+ -+ Note the innermost two loops are the loop nest to be interchanged. -+ Return true if analysis succeeds. */ -+ -+bool -+loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var) -+{ -+ gphi *phi = as_a (SSA_NAME_DEF_STMT (var)); -+ gphi *lcssa_phi = NULL, *use_phi; -+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); -+ tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop)); -+ reduction_p re; -+ gimple *stmt, *next_def; -+ use_operand_p use_p; -+ imm_use_iterator iterator; -+ -+ if (TREE_CODE (next) != SSA_NAME) -+ return false; -+ -+ next_def = SSA_NAME_DEF_STMT (next); -+ basic_block bb = gimple_bb (next_def); -+ if (!bb || !flow_bb_inside_loop_p (m_loop, bb)) -+ return false; -+ -+ /* Find inner loop's simple reduction that uses var as initializer. */ -+ reduction_p inner_re = NULL; -+ for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i) -+ if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0)) -+ break; -+ -+ if (inner_re == NULL -+ || inner_re->type != UNKNOWN_RTYPE -+ || inner_re->producer != phi) -+ return false; -+ -+ /* In case of double reduction, outer loop's reduction should be updated -+ by inner loop's simple reduction. */ -+ if (next_def != inner_re->lcssa_phi) -+ return false; -+ -+ /* Outer loop's reduction should only be used to initialize inner loop's -+ simple reduction. */ -+ if (! single_imm_use (var, &use_p, &stmt) -+ || stmt != inner_re->phi) -+ return false; -+ -+ /* Check this reduction is correctly used outside of loop via lcssa phi. */ -+ FOR_EACH_IMM_USE_FAST (use_p, iterator, next) -+ { -+ stmt = USE_STMT (use_p); -+ if (is_gimple_debug (stmt)) -+ continue; -+ -+ /* Or else it's used in PHI itself. */ -+ use_phi = dyn_cast (stmt); -+ if (use_phi == phi) -+ continue; -+ -+ if (lcssa_phi == NULL -+ && use_phi != NULL -+ && gimple_bb (stmt) == m_exit->dest -+ && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next) -+ lcssa_phi = use_phi; -+ else -+ return false; -+ } -+ if (!lcssa_phi) -+ return false; -+ -+ re = XCNEW (struct reduction); -+ re->var = var; -+ re->init = init; -+ re->next = next; -+ re->phi = phi; -+ re->lcssa_phi = lcssa_phi; -+ re->type = DOUBLE_RTYPE; -+ inner_re->type = DOUBLE_RTYPE; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ dump_reduction (re); -+ -+ m_reductions.safe_push (re); -+ return true; -+} -+ -+/* Return true if VAR is induction variable of current loop whose scev is -+ specified by CHREC. */ -+ -+bool -+loop_cand::analyze_induction_var (tree var, tree chrec) -+{ -+ gphi *phi = as_a (SSA_NAME_DEF_STMT (var)); -+ tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop)); -+ -+ /* Var is loop invariant, though it's unlikely to happen. */ -+ if (tree_does_not_contain_chrecs (chrec)) -+ { -+ struct induction *iv = XCNEW (struct induction); -+ iv->var = var; -+ iv->init_val = init; -+ iv->init_expr = chrec; -+ iv->step = build_int_cst (TREE_TYPE (chrec), 0); -+ m_inductions.safe_push (iv); -+ return true; -+ } -+ -+ if (TREE_CODE (chrec) != POLYNOMIAL_CHREC -+ || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num -+ || tree_contains_chrecs (CHREC_LEFT (chrec), NULL) -+ || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL)) -+ return false; -+ -+ struct induction *iv = XCNEW (struct induction); -+ iv->var = var; -+ iv->init_val = init; -+ iv->init_expr = CHREC_LEFT (chrec); -+ iv->step = CHREC_RIGHT (chrec); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ dump_induction (m_loop, iv); -+ -+ m_inductions.safe_push (iv); -+ return true; -+} -+ -+/* Return true if all loop carried variables defined in loop header can -+ be successfully analyzed. */ -+ -+bool -+loop_cand::analyze_carried_vars (loop_cand *iloop) -+{ -+ edge e = loop_preheader_edge (m_outer); -+ gphi_iterator gsi; -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num); -+ -+ for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) -+ { -+ gphi *phi = gsi.phi (); -+ -+ tree var = PHI_RESULT (phi); -+ if (virtual_operand_p (var)) -+ continue; -+ -+ tree chrec = analyze_scalar_evolution (m_loop, var); -+ chrec = instantiate_scev (e, m_loop, chrec); -+ -+ /* Analyze var as reduction variable. */ -+ if (chrec_contains_undetermined (chrec) -+ || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num)) -+ { -+ if (iloop && !analyze_oloop_reduction_var (iloop, var)) -+ return false; -+ if (!iloop && !analyze_iloop_reduction_var (var)) -+ return false; -+ } -+ /* Analyze var as induction variable. */ -+ else if (!analyze_induction_var (var, chrec)) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* Return TRUE if loop closed PHI nodes can be analyzed successfully. */ -+ -+bool -+loop_cand::analyze_lcssa_phis (void) -+{ -+ gphi_iterator gsi; -+ for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi)) -+ { -+ gphi *phi = gsi.phi (); -+ -+ if (virtual_operand_p (PHI_RESULT (phi))) -+ continue; -+ -+ /* TODO: We only support lcssa phi for reduction for now. */ -+ if (!find_reduction_by_stmt (phi)) -+ return false; -+ } -+ -+ return true; -+} -+ -+/* CONSUMER is a stmt in BB storing reduction result into memory object. -+ When the reduction is intialized from constant value, we need to add -+ a stmt loading from the memory object to target basic block in inner -+ loop during undoing the reduction. Problem is that memory reference -+ may use ssa variables not dominating the target basic block. This -+ function finds all stmts on which CONSUMER depends in basic block BB, -+ records and returns them via STMTS. */ -+ -+static void -+find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer) -+{ -+ auto_vec worklist; -+ use_operand_p use_p; -+ ssa_op_iter iter; -+ gimple *stmt, *def_stmt; -+ gimple_stmt_iterator gsi; -+ -+ /* First clear flag for stmts in bb. */ -+ for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) -+ gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false); -+ -+ /* DFS search all depended stmts in bb and mark flag for these stmts. */ -+ worklist.safe_push (consumer); -+ while (!worklist.is_empty ()) -+ { -+ stmt = worklist.pop (); -+ FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) -+ { -+ def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p)); -+ -+ if (is_a (def_stmt) -+ || gimple_bb (def_stmt) != bb -+ || gimple_plf (def_stmt, GF_PLF_1)) -+ continue; -+ -+ worklist.safe_push (def_stmt); -+ } -+ gimple_set_plf (stmt, GF_PLF_1, true); -+ } -+ for (gsi = gsi_start_bb_nondebug (bb); -+ !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;) -+ { -+ /* Move dep stmts to sequence STMTS. */ -+ if (gimple_plf (stmt, GF_PLF_1)) -+ { -+ gsi_remove (&gsi, false); -+ gimple_seq_add_stmt_without_update (stmts, stmt); -+ } -+ else -+ gsi_next_nondebug (&gsi); -+ } -+} -+ -+/* User can write, optimizers can generate simple reduction RE for inner -+ loop. In order to make interchange valid, we have to undo reduction by -+ moving producer and consumer stmts into the inner loop. For example, -+ below code: -+ -+ init = MEM_REF[idx]; //producer -+ loop: -+ var = phi -+ next = var op ... -+ reduc_sum = phi -+ MEM_REF[idx] = reduc_sum //consumer -+ -+ is transformed into: -+ -+ loop: -+ new_var = MEM_REF[idx]; //producer after moving -+ next = new_var op ... -+ MEM_REF[idx] = next; //consumer after moving -+ -+ Note if the reduction variable is initialized to constant, like: -+ -+ var = phi<0.0, next> -+ -+ we compute new_var as below: -+ -+ loop: -+ tmp = MEM_REF[idx]; -+ new_var = !first_iteration ? tmp : 0.0; -+ -+ so that the initial const is used in the first iteration of loop. Also -+ record ssa variables for dead code elimination in DCE_SEEDS. */ -+ -+void -+loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds) -+{ -+ gimple *stmt; -+ gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header); -+ gimple_seq stmts = NULL; -+ tree new_var; -+ -+ /* Prepare the initialization stmts and insert it to inner loop. */ -+ if (re->producer != NULL) -+ { -+ gimple_set_vuse (re->producer, NULL_TREE); -+ from = gsi_for_stmt (re->producer); -+ gsi_remove (&from, false); -+ gimple_seq_add_stmt_without_update (&stmts, re->producer); -+ new_var = re->init; -+ } -+ else -+ { -+ /* Find all stmts on which expression "MEM_REF[idx]" depends. */ -+ find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer); -+ /* Because we generate new stmt loading from the MEM_REF to TMP. */ -+ tree cond, tmp = copy_ssa_name (re->var); -+ stmt = gimple_build_assign (tmp, re->init_ref); -+ gimple_seq_add_stmt_without_update (&stmts, stmt); -+ -+ /* Init new_var to MEM_REF or CONST depending on if it is the first -+ iteration. */ -+ induction_p iv = m_inductions[0]; -+ cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val); -+ new_var = copy_ssa_name (re->var); -+ stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init); -+ gimple_seq_add_stmt_without_update (&stmts, stmt); -+ } -+ gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT); -+ -+ /* Replace all uses of reduction var with new variable. */ -+ use_operand_p use_p; -+ imm_use_iterator iterator; -+ FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var) -+ { -+ FOR_EACH_IMM_USE_ON_STMT (use_p, iterator) -+ SET_USE (use_p, new_var); -+ -+ update_stmt (stmt); -+ } -+ -+ /* Move consumer stmt into inner loop, just after reduction next's def. */ -+ unlink_stmt_vdef (re->consumer); -+ release_ssa_name (gimple_vdef (re->consumer)); -+ gimple_set_vdef (re->consumer, NULL_TREE); -+ gimple_set_vuse (re->consumer, NULL_TREE); -+ gimple_assign_set_rhs1 (re->consumer, re->next); -+ from = gsi_for_stmt (re->consumer); -+ to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next)); -+ gsi_move_after (&from, &to); -+ -+ /* Mark the reduction variables for DCE. */ -+ bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var)); -+ bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi))); -+} -+ -+/* Free DATAREFS and its auxiliary memory. */ -+ -+static void -+free_data_refs_with_aux (vec datarefs) -+{ -+ data_reference_p dr; -+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) -+ if (dr->aux != NULL) -+ { -+ DR_ACCESS_STRIDE (dr)->release (); -+ free (dr->aux); -+ } -+ -+ free_data_refs (datarefs); -+} -+ -+/* Class for loop interchange transformation. */ -+ -+class tree_loop_interchange -+{ -+public: -+ tree_loop_interchange (vec loop_nest) -+ : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE), -+ m_dce_seeds (BITMAP_ALLOC (NULL)) { } -+ ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); } -+ bool interchange (vec, vec); -+ -+private: -+ void update_data_info (unsigned, unsigned, vec, vec); -+ bool valid_data_dependences (unsigned, unsigned, vec); -+ void interchange_loops (loop_cand &, loop_cand &); -+ void map_inductions_to_loop (loop_cand &, loop_cand &); -+ void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *); -+ -+ /* The whole loop nest in which interchange is ongoing. */ -+ vec m_loop_nest; -+ /* We create new IV which is only used in loop's exit condition check. -+ In case of 3-level loop nest interchange, when we interchange the -+ innermost two loops, new IV created in the middle level loop does -+ not need to be preserved in interchanging the outermost two loops -+ later. We record the IV so that it can be skipped. */ -+ tree m_niters_iv_var; -+ /* Bitmap of seed variables for dead code elimination after interchange. */ -+ bitmap m_dce_seeds; -+}; -+ -+/* Update data refs' access stride and dependence information after loop -+ interchange. I_IDX/O_IDX gives indices of interchanged loops in loop -+ nest. DATAREFS are data references. DDRS are data dependences. */ -+ -+void -+tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx, -+ vec datarefs, -+ vec ddrs) -+{ -+ struct data_reference *dr; -+ struct data_dependence_relation *ddr; -+ -+ /* Update strides of data references. */ -+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ vec *stride = DR_ACCESS_STRIDE (dr); -+ gcc_assert (stride->length () > i_idx); -+ std::swap ((*stride)[i_idx], (*stride)[o_idx]); -+ } -+ /* Update data dependences. */ -+ for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i) -+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known) -+ { -+ for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j) -+ { -+ lambda_vector dist_vect = DDR_DIST_VECT (ddr, j); -+ std::swap (dist_vect[i_idx], dist_vect[o_idx]); -+ } -+ } -+} -+ -+/* Check data dependence relations, return TRUE if it's valid to interchange -+ two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two -+ loops is valid only if dist vector, after interchanging, doesn't have '>' -+ as the leftmost non-'=' direction. Practically, this function have been -+ conservative here by not checking some valid cases. */ -+ -+bool -+tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx, -+ vec ddrs) -+{ -+ struct data_dependence_relation *ddr; -+ -+ for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i) -+ { -+ /* Skip no-dependence case. */ -+ if (DDR_ARE_DEPENDENT (ddr) == chrec_known) -+ continue; -+ -+ for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j) -+ { -+ lambda_vector dist_vect = DDR_DIST_VECT (ddr, j); -+ unsigned level = dependence_level (dist_vect, m_loop_nest.length ()); -+ -+ /* If there is no carried dependence. */ -+ if (level == 0) -+ continue; -+ -+ level --; -+ -+ /* If dependence is not carried by any loop in between the two -+ loops [oloop, iloop] to interchange. */ -+ if (level < o_idx || level > i_idx) -+ continue; -+ -+ /* Be conservative, skip case if either direction at i_idx/o_idx -+ levels is not '=' or '<'. */ -+ if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0) -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+/* Interchange two loops specified by ILOOP and OLOOP. */ -+ -+void -+tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop) -+{ -+ reduction_p re; -+ gimple_stmt_iterator gsi; -+ tree i_niters, o_niters, var_after; -+ -+ /* Undo inner loop's simple reduction. */ -+ for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i) -+ if (re->type != DOUBLE_RTYPE) -+ { -+ if (re->producer) -+ reset_debug_uses (re->producer); -+ -+ iloop.undo_simple_reduction (re, m_dce_seeds); -+ } -+ -+ /* Only need to reset debug uses for double reduction. */ -+ for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i) -+ { -+ gcc_assert (re->type == DOUBLE_RTYPE); -+ reset_debug_uses (SSA_NAME_DEF_STMT (re->var)); -+ reset_debug_uses (SSA_NAME_DEF_STMT (re->next)); -+ } -+ -+ /* Prepare niters for both loops. */ -+ struct loop *loop_nest = m_loop_nest[0]; -+ edge instantiate_below = loop_preheader_edge (loop_nest); -+ gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src); -+ i_niters = number_of_latch_executions (iloop.m_loop); -+ i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters); -+ i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop), -+ i_niters); -+ i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true, -+ NULL_TREE, false, GSI_CONTINUE_LINKING); -+ o_niters = number_of_latch_executions (oloop.m_loop); -+ if (oloop.m_loop != loop_nest) -+ { -+ o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters); -+ o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop), -+ o_niters); -+ } -+ o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true, -+ NULL_TREE, false, GSI_CONTINUE_LINKING); -+ -+ /* Move src's code to tgt loop. This is necessary when src is the outer -+ loop and tgt is the inner loop. */ -+ move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs); -+ -+ /* Map outer loop's IV to inner loop, and vice versa. */ -+ map_inductions_to_loop (oloop, iloop); -+ map_inductions_to_loop (iloop, oloop); -+ -+ /* Create canonical IV for both loops. Note canonical IV for outer/inner -+ loop is actually from inner/outer loop. Also we record the new IV -+ created for the outer loop so that it can be skipped in later loop -+ interchange. */ -+ create_canonical_iv (oloop.m_loop, oloop.m_exit, -+ i_niters, &m_niters_iv_var, &var_after); -+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); -+ create_canonical_iv (iloop.m_loop, iloop.m_exit, -+ o_niters, NULL, &var_after); -+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); -+ -+ /* Scrap niters estimation of interchanged loops. */ -+ iloop.m_loop->any_upper_bound = false; -+ iloop.m_loop->any_likely_upper_bound = false; -+ free_numbers_of_iterations_estimates_loop (iloop.m_loop); -+ oloop.m_loop->any_upper_bound = false; -+ oloop.m_loop->any_likely_upper_bound = false; -+ free_numbers_of_iterations_estimates_loop (oloop.m_loop); -+ -+ /* ??? The association between the loop data structure and the -+ CFG changed, so what was loop N at the source level is now -+ loop M. We should think of retaining the association or breaking -+ it fully by creating a new loop instead of re-using the "wrong" one. */ -+} -+ -+/* Map induction variables of SRC loop to TGT loop. The function firstly -+ creates the same IV of SRC loop in TGT loop, then deletes the original -+ IV and re-initialize it using the newly created IV. For example, loop -+ nest: -+ -+ for (i = 0; i < N; i++) -+ for (j = 0; j < M; j++) -+ { -+ //use of i; -+ //use of j; -+ } -+ -+ will be transformed into: -+ -+ for (jj = 0; jj < M; jj++) -+ for (ii = 0; ii < N; ii++) -+ { -+ //use of ii; -+ //use of jj; -+ } -+ -+ after loop interchange. */ -+ -+void -+tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt) -+{ -+ induction_p iv; -+ edge e = tgt.m_exit; -+ gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi; -+ -+ /* Map source loop's IV to target loop. */ -+ for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i) -+ { -+ gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var); -+ gcc_assert (is_a (stmt)); -+ -+ use_operand_p use_p; -+ /* Only map original IV to target loop. */ -+ if (m_niters_iv_var != iv->var) -+ { -+ /* Map the IV by creating the same one in target loop. */ -+ tree var_before, var_after; -+ tree base = unshare_expr (iv->init_expr); -+ tree step = unshare_expr (iv->step); -+ create_iv (base, step, SSA_NAME_VAR (iv->var), -+ tgt.m_loop, &incr_pos, false, &var_before, &var_after); -+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before)); -+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after)); -+ -+ /* Replace uses of the original IV var with newly created IV var. */ -+ imm_use_iterator imm_iter; -+ FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var) -+ { -+ FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) -+ SET_USE (use_p, var_before); -+ -+ update_stmt (use_stmt); -+ } -+ } -+ -+ /* Mark all uses for DCE. */ -+ ssa_op_iter op_iter; -+ FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE) -+ { -+ tree use = USE_FROM_PTR (use_p); -+ if (TREE_CODE (use) == SSA_NAME -+ && ! SSA_NAME_IS_DEFAULT_DEF (use)) -+ bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use)); -+ } -+ -+ /* Delete definition of the original IV in the source loop. */ -+ gsi = gsi_for_stmt (stmt); -+ remove_phi_node (&gsi, true); -+ } -+} -+ -+/* Move stmts of outer loop to inner loop. */ -+ -+void -+tree_loop_interchange::move_code_to_inner_loop (struct loop *outer, -+ struct loop *inner, -+ basic_block *outer_bbs) -+{ -+ basic_block oloop_exit_bb = single_exit (outer)->src; -+ gimple_stmt_iterator gsi, to; -+ -+ for (unsigned i = 0; i < outer->num_nodes; i++) -+ { -+ basic_block bb = outer_bbs[i]; -+ -+ /* Skip basic blocks of inner loop. */ -+ if (flow_bb_inside_loop_p (inner, bb)) -+ continue; -+ -+ /* Move code from header/latch to header/latch. */ -+ if (bb == outer->header) -+ to = gsi_after_labels (inner->header); -+ else if (bb == outer->latch) -+ to = gsi_after_labels (inner->latch); -+ else -+ /* Otherwise, simply move to exit->src. */ -+ to = gsi_last_bb (single_exit (inner)->src); -+ -+ for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);) -+ { -+ gimple *stmt = gsi_stmt (gsi); -+ -+ if (oloop_exit_bb == bb -+ && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb))) -+ { -+ gsi_next (&gsi); -+ continue; -+ } -+ -+ if (gimple_vuse (stmt)) -+ gimple_set_vuse (stmt, NULL_TREE); -+ if (gimple_vdef (stmt)) -+ { -+ unlink_stmt_vdef (stmt); -+ release_ssa_name (gimple_vdef (stmt)); -+ gimple_set_vdef (stmt, NULL_TREE); -+ } -+ -+ reset_debug_uses (stmt); -+ gsi_move_before (&gsi, &to); -+ } -+ } -+} -+ -+/* Given data reference DR in LOOP_NEST, the function computes DR's access -+ stride at each level of loop from innermost LOOP to outer. On success, -+ it saves access stride at each level loop in a vector which is pointed -+ by DR->aux. For example: -+ -+ int arr[100][100][100]; -+ for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000 -+ for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400 -+ for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4 -+ arr[i][j - 1][k] = 0; */ -+ -+static void -+compute_access_stride (struct loop *loop_nest, struct loop *loop, -+ data_reference_p dr) -+{ -+ vec *strides = new vec (); -+ basic_block bb = gimple_bb (DR_STMT (dr)); -+ -+ while (!flow_bb_inside_loop_p (loop, bb)) -+ { -+ strides->safe_push (build_int_cst (sizetype, 0)); -+ loop = loop_outer (loop); -+ } -+ gcc_assert (loop == bb->loop_father); -+ -+ tree ref = DR_REF (dr); -+ tree scev_base = build_fold_addr_expr (ref); -+ tree scev = analyze_scalar_evolution (loop, scev_base); -+ scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev); -+ if (! chrec_contains_undetermined (scev)) -+ { -+ tree sl = scev; -+ struct loop *expected = loop; -+ while (TREE_CODE (sl) == POLYNOMIAL_CHREC) -+ { -+ struct loop *sl_loop = get_chrec_loop (sl); -+ while (sl_loop != expected) -+ { -+ strides->safe_push (size_int (0)); -+ expected = loop_outer (expected); -+ } -+ strides->safe_push (CHREC_RIGHT (sl)); -+ sl = CHREC_LEFT (sl); -+ expected = loop_outer (expected); -+ } -+ if (! tree_contains_chrecs (sl, NULL)) -+ while (expected != loop_outer (loop_nest)) -+ { -+ strides->safe_push (size_int (0)); -+ expected = loop_outer (expected); -+ } -+ } -+ -+ dr->aux = strides; -+} -+ -+/* Given loop nest LOOP_NEST with innermost LOOP, the function computes -+ access strides with respect to each level loop for all data refs in -+ DATAREFS from inner loop to outer loop. On success, it returns the -+ outermost loop that access strides can be computed successfully for -+ all data references. If access strides cannot be computed at least -+ for two levels of loop for any data reference, it returns NULL. */ -+ -+static struct loop * -+compute_access_strides (struct loop *loop_nest, struct loop *loop, -+ vec datarefs) -+{ -+ unsigned i, j, num_loops = (unsigned) -1; -+ data_reference_p dr; -+ vec *stride; -+ -+ for (i = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ compute_access_stride (loop_nest, loop, dr); -+ stride = DR_ACCESS_STRIDE (dr); -+ if (stride->length () < num_loops) -+ { -+ num_loops = stride->length (); -+ if (num_loops < 2) -+ return NULL; -+ } -+ } -+ -+ for (i = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ stride = DR_ACCESS_STRIDE (dr); -+ if (stride->length () > num_loops) -+ stride->truncate (num_loops); -+ -+ for (j = 0; j < (num_loops >> 1); ++j) -+ std::swap ((*stride)[j], (*stride)[num_loops - j - 1]); -+ } -+ -+ loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops); -+ gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop)); -+ return loop; -+} -+ -+/* Prune access strides for data references in DATAREFS by removing strides -+ of loops that isn't in current LOOP_NEST. */ -+ -+static void -+prune_access_strides_not_in_loop (struct loop *loop_nest, -+ struct loop *innermost, -+ vec datarefs) -+{ -+ data_reference_p dr; -+ unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1; -+ gcc_assert (num_loops > 1); -+ -+ /* Block remove strides of loops that is not in current loop nest. */ -+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ vec *stride = DR_ACCESS_STRIDE (dr); -+ if (stride->length () > num_loops) -+ stride->block_remove (0, stride->length () - num_loops); -+ } -+} -+ -+/* Dump access strides for all DATAREFS. */ -+ -+static void -+dump_access_strides (vec datarefs) -+{ -+ data_reference_p dr; -+ fprintf (dump_file, "Access Strides for DRs:\n"); -+ for (unsigned i = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ fprintf (dump_file, " "); -+ print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM); -+ fprintf (dump_file, ":\t\t<"); -+ -+ vec *stride = DR_ACCESS_STRIDE (dr); -+ unsigned num_loops = stride->length (); -+ for (unsigned j = 0; j < num_loops; ++j) -+ { -+ print_generic_expr (dump_file, (*stride)[j], TDF_SLIM); -+ fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n"); -+ } -+ } -+} -+ -+/* Return true if it's profitable to interchange two loops whose index -+ in whole loop nest vector are I_IDX/O_IDX respectively. The function -+ computes and compares three types information from all DATAREFS: -+ 1) Access stride for loop I_IDX and O_IDX. -+ 2) Number of invariant memory references with respect to I_IDX before -+ and after loop interchange. -+ 3) Flags indicating if all memory references access sequential memory -+ in ILOOP, before and after loop interchange. -+ If INNMOST_LOOP_P is true, the two loops for interchanging are the two -+ innermost loops in loop nest. This function also dumps information if -+ DUMP_INFO_P is true. */ -+ -+static bool -+should_interchange_loops (unsigned i_idx, unsigned o_idx, -+ vec datarefs, -+ bool innermost_loops_p, bool dump_info_p = true) -+{ -+ unsigned HOST_WIDE_INT ratio; -+ unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0; -+ struct data_reference *dr; -+ bool all_seq_dr_before_p = true, all_seq_dr_after_p = true; -+ widest_int iloop_strides = 0, oloop_strides = 0; -+ unsigned num_unresolved_drs = 0; -+ unsigned num_resolved_ok_drs = 0; -+ unsigned num_resolved_not_ok_drs = 0; -+ -+ if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n"); -+ -+ for (i = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ vec *stride = DR_ACCESS_STRIDE (dr); -+ tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx]; -+ -+ bool subloop_stride_p = false; -+ /* Data ref can't be invariant or sequential access at current loop if -+ its address changes with respect to any subloops. */ -+ for (j = i_idx + 1; j < stride->length (); ++j) -+ if (!integer_zerop ((*stride)[j])) -+ { -+ subloop_stride_p = true; -+ break; -+ } -+ -+ if (integer_zerop (iloop_stride)) -+ { -+ if (!subloop_stride_p) -+ num_old_inv_drs++; -+ } -+ if (integer_zerop (oloop_stride)) -+ { -+ if (!subloop_stride_p) -+ num_new_inv_drs++; -+ } -+ -+ if (TREE_CODE (iloop_stride) == INTEGER_CST -+ && TREE_CODE (oloop_stride) == INTEGER_CST) -+ { -+ iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride)); -+ oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride)); -+ } -+ else if (multiple_of_p (TREE_TYPE (iloop_stride), -+ iloop_stride, oloop_stride)) -+ num_resolved_ok_drs++; -+ else if (multiple_of_p (TREE_TYPE (iloop_stride), -+ oloop_stride, iloop_stride)) -+ num_resolved_not_ok_drs++; -+ else -+ num_unresolved_drs++; -+ -+ /* Data ref can't be sequential access if its address changes in sub -+ loop. */ -+ if (subloop_stride_p) -+ { -+ all_seq_dr_before_p = false; -+ all_seq_dr_after_p = false; -+ continue; -+ } -+ /* Track if all data references are sequential accesses before/after loop -+ interchange. Note invariant is considered sequential here. */ -+ tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))); -+ if (all_seq_dr_before_p -+ && ! (integer_zerop (iloop_stride) -+ || operand_equal_p (access_size, iloop_stride, 0))) -+ all_seq_dr_before_p = false; -+ if (all_seq_dr_after_p -+ && ! (integer_zerop (oloop_stride) -+ || operand_equal_p (access_size, oloop_stride, 0))) -+ all_seq_dr_after_p = false; -+ } -+ -+ if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "\toverall:\t\t"); -+ print_decu (iloop_strides, dump_file); -+ fprintf (dump_file, "\t"); -+ print_decu (oloop_strides, dump_file); -+ fprintf (dump_file, "\n"); -+ -+ fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n", -+ num_old_inv_drs, num_new_inv_drs); -+ fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n", -+ all_seq_dr_before_p ? "true" : "false", -+ all_seq_dr_after_p ? "true" : "false"); -+ fprintf (dump_file, "OK to interchage with variable strides: %d\n", -+ num_resolved_ok_drs); -+ fprintf (dump_file, "Not OK to interchage with variable strides: %d\n", -+ num_resolved_not_ok_drs); -+ fprintf (dump_file, "Variable strides we cannot decide: %d\n", -+ num_unresolved_drs); -+ } -+ -+ if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0) -+ return false; -+ -+ /* We use different stride comparison ratio for interchanging innermost -+ two loops or not. The idea is to be conservative in interchange for -+ the innermost loops. */ -+ ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO; -+ /* Do interchange if it gives better data locality behavior. */ -+ if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio))) -+ return true; -+ if (wi::gtu_p (iloop_strides, oloop_strides)) -+ { -+ /* Or it creates more invariant memory references. */ -+ if ((!all_seq_dr_before_p || all_seq_dr_after_p) -+ && num_new_inv_drs > num_old_inv_drs) -+ return true; -+ /* Or it makes all memory references sequential. */ -+ if (num_new_inv_drs >= num_old_inv_drs -+ && !all_seq_dr_before_p && all_seq_dr_after_p) -+ return true; -+ } -+ -+ return false; -+} -+ -+/* Try to interchange inner loop of a loop nest to outer level. */ -+ -+bool -+tree_loop_interchange::interchange (vec datarefs, -+ vec ddrs) -+{ -+ bool changed_p = false; -+ /* In each iteration we try to interchange I-th loop with (I+1)-th loop. -+ The overall effect is to push inner loop to outermost level in whole -+ loop nest. */ -+ for (unsigned i = m_loop_nest.length (); i > 1; --i) -+ { -+ unsigned i_idx = i - 1, o_idx = i - 2; -+ -+ /* Check validity for loop interchange. */ -+ if (!valid_data_dependences (i_idx, o_idx, ddrs)) -+ break; -+ -+ loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]); -+ loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]); -+ -+ /* Check if we can do transformation for loop interchange. */ -+ if (!iloop.analyze_carried_vars (NULL) -+ || !iloop.analyze_lcssa_phis () -+ || !oloop.analyze_carried_vars (&iloop) -+ || !oloop.analyze_lcssa_phis () -+ || !iloop.can_interchange_p (NULL) -+ || !oloop.can_interchange_p (&iloop)) -+ break; -+ -+ /* Check profitability for loop interchange. */ -+ if (should_interchange_loops (i_idx, o_idx, datarefs, -+ iloop.m_loop->inner == NULL)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "Loop_pair is interchanged\n\n", -+ oloop.m_loop->num, iloop.m_loop->num); -+ -+ changed_p = true; -+ interchange_loops (iloop, oloop); -+ /* No need to update if there is no further loop interchange. */ -+ if (o_idx > 0) -+ update_data_info (i_idx, o_idx, datarefs, ddrs); -+ } -+ else -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "Loop_pair is not interchanged\n\n", -+ oloop.m_loop->num, iloop.m_loop->num); -+ } -+ } -+ -+ simple_dce_from_worklist (m_dce_seeds); -+ return changed_p; -+} -+ -+ -+/* Loop interchange pass. */ -+ -+namespace { -+ -+const pass_data pass_data_linterchange = -+{ -+ GIMPLE_PASS, /* type */ -+ "linterchange", /* name */ -+ OPTGROUP_LOOP, /* optinfo_flags */ -+ TV_LINTERCHANGE, /* tv_id */ -+ PROP_cfg, /* properties_required */ -+ 0, /* properties_provided */ -+ 0, /* properties_destroyed */ -+ 0, /* todo_flags_start */ -+ 0, /* todo_flags_finish */ -+}; -+ -+class pass_linterchange : public gimple_opt_pass -+{ -+public: -+ pass_linterchange (gcc::context *ctxt) -+ : gimple_opt_pass (pass_data_linterchange, ctxt) -+ {} -+ -+ /* opt_pass methods: */ -+ opt_pass * clone () { return new pass_linterchange (m_ctxt); } -+ virtual bool gate (function *) { return flag_loop_interchange; } -+ virtual unsigned int execute (function *); -+ -+}; // class pass_linterchange -+ -+ -+/* Return true if LOOP has proper form for interchange. We check three -+ conditions in the function: -+ 1) In general, a loop can be interchanged only if it doesn't have -+ basic blocks other than header, exit and latch besides possible -+ inner loop nest. This basically restricts loop interchange to -+ below form loop nests: -+ -+ header<---+ -+ | | -+ v | -+ INNER_LOOP | -+ | | -+ v | -+ exit--->latch -+ -+ 2) Data reference in basic block that executes in different times -+ than loop head/exit is not allowed. -+ 3) Record the innermost outer loop that doesn't form rectangle loop -+ nest with LOOP. */ -+ -+static bool -+proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer) -+{ -+ edge e0, e1, exit; -+ -+ /* Don't interchange if loop has unsupported information for the moment. */ -+ if (loop->safelen > 0 -+ || loop->constraints != 0 -+ || loop->can_be_parallel -+ || loop->dont_vectorize -+ || loop->force_vectorize -+ || loop->in_oacc_kernels_region -+ || loop->orig_loop_num != 0 -+ || loop->simduid != NULL_TREE) -+ return false; -+ -+ /* Don't interchange if outer loop has basic block other than header, exit -+ and latch. */ -+ if (loop->inner != NULL -+ && loop->num_nodes != loop->inner->num_nodes + 3) -+ return false; -+ -+ if ((exit = single_dom_exit (loop)) == NULL) -+ return false; -+ -+ /* Check control flow on loop header/exit blocks. */ -+ if (loop->header == exit->src -+ && (EDGE_COUNT (loop->header->preds) != 2 -+ || EDGE_COUNT (loop->header->succs) != 2)) -+ return false; -+ else if (loop->header != exit->src -+ && (EDGE_COUNT (loop->header->preds) != 2 -+ || !single_succ_p (loop->header) -+ || unsupported_edge (single_succ_edge (loop->header)) -+ || EDGE_COUNT (exit->src->succs) != 2 -+ || !single_pred_p (exit->src) -+ || unsupported_edge (single_pred_edge (exit->src)))) -+ return false; -+ -+ e0 = EDGE_PRED (loop->header, 0); -+ e1 = EDGE_PRED (loop->header, 1); -+ if (unsupported_edge (e0) || unsupported_edge (e1) -+ || (e0->src != loop->latch && e1->src != loop->latch) -+ || (e0->src->loop_father == loop && e1->src->loop_father == loop)) -+ return false; -+ -+ e0 = EDGE_SUCC (exit->src, 0); -+ e1 = EDGE_SUCC (exit->src, 1); -+ if (unsupported_edge (e0) || unsupported_edge (e1) -+ || (e0->dest != loop->latch && e1->dest != loop->latch) -+ || (e0->dest->loop_father == loop && e1->dest->loop_father == loop)) -+ return false; -+ -+ /* Don't interchange if any reference is in basic block that doesn't -+ dominate exit block. */ -+ basic_block *bbs = get_loop_body (loop); -+ for (unsigned i = 0; i < loop->num_nodes; i++) -+ { -+ basic_block bb = bbs[i]; -+ -+ if (bb->loop_father != loop -+ || bb == loop->header || bb == exit->src -+ || dominated_by_p (CDI_DOMINATORS, exit->src, bb)) -+ continue; -+ -+ for (gimple_stmt_iterator gsi = gsi_start_bb_nondebug (bb); -+ !gsi_end_p (gsi); gsi_next_nondebug (&gsi)) -+ if (gimple_vuse (gsi_stmt (gsi))) -+ { -+ free (bbs); -+ return false; -+ } -+ } -+ free (bbs); -+ -+ tree niters = number_of_latch_executions (loop); -+ niters = analyze_scalar_evolution (loop_outer (loop), niters); -+ if (!niters || chrec_contains_undetermined (niters)) -+ return false; -+ -+ /* Record the innermost outer loop that doesn't form rectangle loop nest. */ -+ for (loop_p loop2 = loop_outer (loop); -+ loop2 && flow_loop_nested_p (*min_outer, loop2); -+ loop2 = loop_outer (loop2)) -+ { -+ niters = instantiate_scev (loop_preheader_edge (loop2), -+ loop_outer (loop), niters); -+ if (!evolution_function_is_invariant_p (niters, loop2->num)) -+ { -+ *min_outer = loop2; -+ break; -+ } -+ } -+ return true; -+} -+ -+/* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST] -+ should be interchanged by looking into all DATAREFS. */ -+ -+static bool -+should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost, -+ vec datarefs) -+{ -+ unsigned idx = loop_depth (innermost) - loop_depth (loop_nest); -+ gcc_assert (idx > 0); -+ -+ /* Check if any two adjacent loops should be interchanged. */ -+ for (struct loop *loop = innermost; -+ loop != loop_nest; loop = loop_outer (loop), idx--) -+ if (should_interchange_loops (idx, idx - 1, datarefs, -+ loop == innermost, false)) -+ return true; -+ -+ return false; -+} -+ -+/* Given loop nest LOOP_NEST and data references DATAREFS, compute data -+ dependences for loop interchange and store it in DDRS. Note we compute -+ dependences directly rather than call generic interface so that we can -+ return on unknown dependence instantly. */ -+ -+static bool -+tree_loop_interchange_compute_ddrs (vec loop_nest, -+ vec datarefs, -+ vec *ddrs) -+{ -+ struct data_reference *a, *b; -+ struct loop *innermost = loop_nest.last (); -+ -+ for (unsigned i = 0; datarefs.iterate (i, &a); ++i) -+ { -+ bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost; -+ for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j) -+ if (DR_IS_WRITE (a) || DR_IS_WRITE (b)) -+ { -+ bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost; -+ /* Don't support multiple write references in outer loop. */ -+ if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b)) -+ return false; -+ -+ ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest); -+ ddrs->safe_push (ddr); -+ compute_affine_dependence (ddr, loop_nest[0]); -+ -+ /* Give up if ddr is unknown dependence or classic direct vector -+ is not available. */ -+ if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know -+ || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE -+ && DDR_NUM_DIR_VECTS (ddr) == 0)) -+ return false; -+ -+ /* If either data references is in outer loop of nest, we require -+ no dependence here because the data reference need to be moved -+ into inner loop during interchange. */ -+ if (a_outer_p && b_outer_p -+ && operand_equal_p (DR_REF (a), DR_REF (b), 0)) -+ continue; -+ if (DDR_ARE_DEPENDENT (ddr) != chrec_known -+ && (a_outer_p || b_outer_p)) -+ return false; -+ } -+ } -+ -+ return true; -+} -+ -+/* Prune DATAREFS by removing any data reference not inside of LOOP. */ -+ -+static inline void -+prune_datarefs_not_in_loop (struct loop *loop, vec datarefs) -+{ -+ unsigned i, j; -+ struct data_reference *dr; -+ -+ for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i) -+ { -+ if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr)))) -+ datarefs[j++] = dr; -+ else -+ { -+ if (dr->aux) -+ { -+ DR_ACCESS_STRIDE (dr)->release (); -+ free (dr->aux); -+ } -+ free_data_ref (dr); -+ } -+ } -+ datarefs.truncate (j); -+} -+ -+/* Find and store data references in DATAREFS for LOOP nest. If there's -+ difficult data reference in a basic block, we shrink the loop nest to -+ inner loop of that basic block's father loop. On success, return the -+ outer loop of the result loop nest. */ -+ -+static struct loop * -+prepare_data_references (struct loop *loop, vec *datarefs) -+{ -+ struct loop *loop_nest = loop; -+ vec *bb_refs; -+ basic_block bb, *bbs = get_loop_body_in_dom_order (loop); -+ -+ for (unsigned i = 0; i < loop->num_nodes; i++) -+ bbs[i]->aux = NULL; -+ -+ /* Find data references for all basic blocks. Shrink loop nest on difficult -+ data reference. */ -+ for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i) -+ { -+ bb = bbs[i]; -+ if (!flow_bb_inside_loop_p (loop_nest, bb)) -+ continue; -+ -+ bb_refs = new vec (); -+ if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know) -+ { -+ loop_nest = bb->loop_father->inner; -+ if (loop_nest && !loop_nest->inner) -+ loop_nest = NULL; -+ -+ free_data_refs (*bb_refs); -+ delete bb_refs; -+ } -+ else if (bb_refs->is_empty ()) -+ delete bb_refs; -+ else -+ bb->aux = bb_refs; -+ } -+ -+ /* Collect all data references in loop nest. */ -+ for (unsigned i = 0; i < loop->num_nodes; i++) -+ { -+ bb = bbs[i]; -+ if (!bb->aux) -+ continue; -+ -+ bb_refs = (vec *) bb->aux; -+ if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb)) -+ datarefs->safe_splice (*bb_refs); -+ else -+ free_data_refs (*bb_refs); -+ -+ delete bb_refs; -+ bb->aux = NULL; -+ } -+ free (bbs); -+ -+ return loop_nest; -+} -+ -+/* Given innermost LOOP, return true if perfect loop nest can be found and -+ data dependences can be computed. If succeed, record the perfect loop -+ nest in LOOP_NEST; record all data references in DATAREFS and record all -+ data dependence relations in DDRS. -+ -+ We do support a restricted form of imperfect loop nest, i.e, loop nest -+ with load/store in outer loop initializing/finalizing simple reduction -+ of the innermost loop. For such outer loop reference, we require that -+ it has no dependence with others sinve it will be moved to inner loop -+ in interchange. */ -+ -+static bool -+prepare_perfect_loop_nest (struct loop *loop, vec *loop_nest, -+ vec *datarefs, vec *ddrs) -+{ -+ struct loop *start_loop = NULL, *innermost = loop; -+ struct loop *outermost = loops_for_fn (cfun)->tree_root; -+ -+ /* Find loop nest from the innermost loop. The outermost is the innermost -+ outer*/ -+ while (loop->num != 0 && loop->inner == start_loop -+ && flow_loop_nested_p (outermost, loop)) -+ { -+ if (!proper_loop_form_for_interchange (loop, &outermost)) -+ break; -+ -+ start_loop = loop; -+ /* If this loop has sibling loop, the father loop won't be in perfect -+ loop nest. */ -+ if (loop->next != NULL) -+ break; -+ -+ loop = loop_outer (loop); -+ } -+ if (!start_loop || !start_loop->inner) -+ return false; -+ -+ /* Prepare the data reference vector for the loop nest, pruning outer -+ loops we cannot handle. */ -+ start_loop = prepare_data_references (start_loop, datarefs); -+ if (!start_loop -+ /* Check if there is no data reference. */ -+ || datarefs->is_empty () -+ /* Check if there are too many of data references. */ -+ || (int) datarefs->length () > MAX_DATAREFS) -+ return false; -+ -+ /* Compute access strides for all data references, pruning outer -+ loops we cannot analyze refs in. */ -+ start_loop = compute_access_strides (start_loop, innermost, *datarefs); -+ if (!start_loop) -+ return false; -+ -+ /* Check if any interchange is profitable in the loop nest. */ -+ if (!should_interchange_loop_nest (start_loop, innermost, *datarefs)) -+ return false; -+ -+ /* Check if data dependences can be computed for loop nest starting from -+ start_loop. */ -+ loop = start_loop; -+ do { -+ loop_nest->truncate (0); -+ -+ if (loop != start_loop) -+ prune_datarefs_not_in_loop (start_loop, *datarefs); -+ -+ if (find_loop_nest (start_loop, loop_nest) -+ && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs)) -+ { -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ fprintf (dump_file, -+ "\nConsider loop interchange for loop_nest<%d - %d>\n", -+ start_loop->num, innermost->num); -+ -+ if (loop != start_loop) -+ prune_access_strides_not_in_loop (start_loop, innermost, *datarefs); -+ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ dump_access_strides (*datarefs); -+ -+ return true; -+ } -+ -+ free_dependence_relations (*ddrs); -+ *ddrs = vNULL; -+ /* Try to compute data dependences with the outermost loop stripped. */ -+ loop = start_loop; -+ start_loop = start_loop->inner; -+ } while (start_loop && start_loop->inner); -+ -+ return false; -+} -+ -+/* Main entry for loop interchange pass. */ -+ -+unsigned int -+pass_linterchange::execute (function *fun) -+{ -+ if (number_of_loops (fun) <= 2) -+ return 0; -+ -+ bool changed_p = false; -+ struct loop *loop; -+ FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) -+ { -+ vec loop_nest = vNULL; -+ vec datarefs = vNULL; -+ vec ddrs = vNULL; -+ if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs)) -+ { -+ tree_loop_interchange loop_interchange (loop_nest); -+ changed_p |= loop_interchange.interchange (datarefs, ddrs); -+ } -+ free_dependence_relations (ddrs); -+ free_data_refs_with_aux (datarefs); -+ loop_nest.release (); -+ } -+ -+ if (changed_p) -+ scev_reset_htab (); -+ -+ return changed_p ? (TODO_update_ssa_only_virtuals) : 0; -+} -+ -+} // anon namespace -+ -+gimple_opt_pass * -+make_pass_linterchange (gcc::context *ctxt) -+{ -+ return new pass_linterchange (ctxt); -+} -diff -N -urp a/gcc/gimple-pretty-print.h b/gcc/gimple-pretty-print.h ---- a/gcc/gimple-pretty-print.h 2018-11-15 15:54:01.223039794 +0800 -+++ b/gcc/gimple-pretty-print.h 2018-11-15 16:03:17.447054436 +0800 -@@ -27,10 +27,10 @@ along with GCC; see the file COPYING3. - extern void debug_gimple_stmt (gimple *); - extern void debug_gimple_seq (gimple_seq); - extern void print_gimple_seq (FILE *, gimple_seq, int, int); --extern void print_gimple_stmt (FILE *, gimple *, int, int); -+extern void print_gimple_stmt (FILE *, gimple *, int, int = 0); - extern void debug (gimple &ref); - extern void debug (gimple *ptr); --extern void print_gimple_expr (FILE *, gimple *, int, int); -+extern void print_gimple_expr (FILE *, gimple *, int, int = 0); - extern void pp_gimple_stmt_1 (pretty_printer *, gimple *, int, int); - extern void gimple_dump_bb (FILE *, basic_block, int, int); - extern void gimple_dump_bb_for_graph (pretty_printer *, basic_block); -diff -N -urp a/gcc/opts.c b/gcc/opts.c ---- a/gcc/opts.c 2018-11-15 15:59:30.459048461 +0800 -+++ b/gcc/opts.c 2018-11-15 16:03:17.447054436 +0800 -@@ -538,6 +538,7 @@ static const struct default_options defa - { OPT_LEVELS_3_PLUS, OPT_fgcse_after_reload, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_ftree_loop_vectorize, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_ftree_slp_vectorize, NULL, 1 }, -+ { OPT_LEVELS_3_PLUS, OPT_floop_interchange, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_fvect_cost_model_, NULL, VECT_COST_MODEL_DYNAMIC }, - { OPT_LEVELS_3_PLUS, OPT_fipa_cp_clone, NULL, 1 }, - { OPT_LEVELS_3_PLUS, OPT_ftree_partial_pre, NULL, 1 }, -diff -N -urp a/gcc/params.def b/gcc/params.def ---- a/gcc/params.def 2018-11-15 15:59:30.459048461 +0800 -+++ b/gcc/params.def 2018-11-15 16:03:17.451054437 +0800 -@@ -780,6 +780,20 @@ DEFPARAM (PARAM_L2_CACHE_SIZE, - "The size of L2 cache.", - 512, 0, 0) - -+/* Maximum number of statements in loop nest for loop interchange. */ -+ -+DEFPARAM (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS, -+ "loop-interchange-max-num-stmts", -+ "The maximum number of stmts in loop nest for loop interchange.", -+ 64, 0, 0) -+ -+/* Minimum stride ratio for loop interchange to be profitiable. */ -+ -+DEFPARAM (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO, -+ "loop-interchange-stride-ratio", -+ "The minimum stride ratio for loop interchange to be profitable", -+ 2, 0, 0) -+ - /* Whether we should use canonical types rather than deep "structural" - type checking. Setting this value to 1 (the default) improves - compilation performance in the C++ and Objective-C++ front end; -diff -N -urp a/gcc/passes.def b/gcc/passes.def ---- a/gcc/passes.def 2018-11-15 15:59:30.463048461 +0800 -+++ b/gcc/passes.def 2018-11-15 16:03:17.451054437 +0800 -@@ -278,6 +278,7 @@ along with GCC; see the file COPYING3. - NEXT_PASS (pass_cd_dce); - NEXT_PASS (pass_record_bounds); - NEXT_PASS (pass_loop_distribution); -+ NEXT_PASS (pass_linterchange); - NEXT_PASS (pass_copy_prop); - NEXT_PASS (pass_graphite); - PUSH_INSERT_PASSES_WITHIN (pass_graphite) -diff -N -urp a/gcc/timevar.def b/gcc/timevar.def ---- a/gcc/timevar.def 2018-11-15 15:59:30.463048461 +0800 -+++ b/gcc/timevar.def 2018-11-15 16:03:17.455054437 +0800 -@@ -182,6 +182,7 @@ DEFTIMEVAR (TV_TREE_LOOP , "tree lo - DEFTIMEVAR (TV_TREE_NOLOOP , "loopless fn") - DEFTIMEVAR (TV_TREE_LOOP_BOUNDS , "tree loop bounds") - DEFTIMEVAR (TV_LIM , "tree loop invariant motion") -+DEFTIMEVAR (TV_LINTERCHANGE , "tree loop interchange") - DEFTIMEVAR (TV_TREE_LOOP_IVCANON , "tree canonical iv") - DEFTIMEVAR (TV_SCEV_CONST , "scev constant prop") - DEFTIMEVAR (TV_TREE_LOOP_UNSWITCH , "tree loop unswitching") -diff -N -urp a/gcc/tree-pass.h b/gcc/tree-pass.h ---- a/gcc/tree-pass.h 2018-11-15 15:59:30.467048461 +0800 -+++ b/gcc/tree-pass.h 2018-11-15 16:03:17.455054437 +0800 -@@ -367,6 +367,7 @@ extern gimple_opt_pass *make_pass_tree_l - extern gimple_opt_pass *make_pass_tree_no_loop (gcc::context *ctxt); - extern gimple_opt_pass *make_pass_tree_loop_init (gcc::context *ctxt); - extern gimple_opt_pass *make_pass_lim (gcc::context *ctxt); -+extern gimple_opt_pass *make_pass_linterchange (gcc::context *ctxt); - extern gimple_opt_pass *make_pass_tree_unswitch (gcc::context *ctxt); - extern gimple_opt_pass *make_pass_loop_split (gcc::context *ctxt); - extern gimple_opt_pass *make_pass_loop_jam (gcc::context *ctxt); -diff -N -urp a/gcc/tree-pretty-print.h b/gcc/tree-pretty-print.h ---- a/gcc/tree-pretty-print.h 2018-11-15 15:54:01.439039800 +0800 -+++ b/gcc/tree-pretty-print.h 2018-11-15 16:03:17.455054437 +0800 -@@ -37,7 +37,7 @@ extern void debug_tree_chain (tree); - extern void print_generic_decl (FILE *, tree, int); - extern void print_generic_stmt (FILE *, tree, int); - extern void print_generic_stmt_indented (FILE *, tree, int, int); --extern void print_generic_expr (FILE *, tree, int); -+extern void print_generic_expr (FILE *, tree, int = 0); - extern void dump_omp_clauses (pretty_printer *, tree, int, int); - extern int dump_generic_node (pretty_printer *, tree, int, int, bool); - extern void print_declaration (pretty_printer *, tree, int, int); -diff -N -urp a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c ---- a/gcc/tree-scalar-evolution.c 2018-11-15 15:54:01.443039800 +0800 -+++ b/gcc/tree-scalar-evolution.c 2018-11-15 16:03:17.459054437 +0800 -@@ -3000,6 +3000,50 @@ instantiate_scev (basic_block instantiat - return res; - } - -+tree -+instantiate_scev (edge instantiate_below, struct loop *evolution_loop, -+ tree chrec) -+{ -+ tree res; -+ -+ if (dump_file && (dump_flags & TDF_SCEV)) -+ { -+ fprintf (dump_file, "(instantiate_scev \n"); -+ fprintf (dump_file, " (instantiate_below = %d -> %d)\n", -+ instantiate_below->src->index, instantiate_below->dest->index); -+ if (evolution_loop) -+ fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num); -+ fprintf (dump_file, " (chrec = "); -+ print_generic_expr (dump_file, chrec); -+ fprintf (dump_file, ")\n"); -+ } -+ -+ bool destr = false; -+ if (!global_cache) -+ { -+ global_cache = new instantiate_cache_type; -+ destr = true; -+ } -+ -+ res = instantiate_scev_r (instantiate_below->src, evolution_loop, -+ NULL, chrec, NULL, 0); -+ -+ if (destr) -+ { -+ delete global_cache; -+ global_cache = NULL; -+ } -+ -+ if (dump_file && (dump_flags & TDF_SCEV)) -+ { -+ fprintf (dump_file, " (res = "); -+ print_generic_expr (dump_file, res); -+ fprintf (dump_file, "))\n"); -+ } -+ -+ return res; -+} -+ - /* Similar to instantiate_parameters, but does not introduce the - evolutions in outer loops for LOOP invariants in CHREC, and does not - care about causing overflows, as long as they do not affect value -diff -N -urp a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h ---- a/gcc/tree-scalar-evolution.h 2018-11-15 15:54:01.443039800 +0800 -+++ b/gcc/tree-scalar-evolution.h 2018-11-15 16:03:17.459054437 +0800 -@@ -31,6 +31,7 @@ extern void scev_reset_htab (void); - extern void scev_finalize (void); - extern tree analyze_scalar_evolution (struct loop *, tree); - extern tree instantiate_scev (basic_block, struct loop *, tree); -+extern tree instantiate_scev (edge, struct loop *, tree); - extern tree resolve_mixers (struct loop *, tree, bool *); - extern void gather_stats_on_scev_database (void); - extern void final_value_replacement_loop (struct loop *); -diff -N -urp a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c ---- a/gcc/tree-ssa-dce.c 2018-11-15 15:54:01.443039800 +0800 -+++ b/gcc/tree-ssa-dce.c 2018-11-15 16:03:17.463054437 +0800 -@@ -1729,3 +1729,55 @@ make_pass_cd_dce (gcc::context *ctxt) - { - return new pass_cd_dce (ctxt); - } -+ -+ -+/* A cheap DCE interface. WORKLIST is a list of possibly dead stmts and -+ is consumed by this function. The function has linear complexity in -+ the number of dead stmts with a constant factor like the average SSA -+ use operands number. */ -+ -+void -+simple_dce_from_worklist (bitmap worklist) -+{ -+ while (! bitmap_empty_p (worklist)) -+ { -+ /* Pop item. */ -+ unsigned i = bitmap_first_set_bit (worklist); -+ bitmap_clear_bit (worklist, i); -+ -+ tree def = ssa_name (i); -+ /* Removed by somebody else or still in use. */ -+ if (! def || ! has_zero_uses (def)) -+ continue; -+ -+ gimple *t = SSA_NAME_DEF_STMT (def); -+ if (gimple_has_side_effects (t)) -+ continue; -+ -+ /* Add uses to the worklist. */ -+ ssa_op_iter iter; -+ use_operand_p use_p; -+ FOR_EACH_PHI_OR_STMT_USE (use_p, t, iter, SSA_OP_USE) -+ { -+ tree use = USE_FROM_PTR (use_p); -+ if (TREE_CODE (use) == SSA_NAME -+ && ! SSA_NAME_IS_DEFAULT_DEF (use)) -+ bitmap_set_bit (worklist, SSA_NAME_VERSION (use)); -+ } -+ -+ /* Remove stmt. */ -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ fprintf (dump_file, "Removing dead stmt:"); -+ print_gimple_stmt (dump_file, t, 0); -+ } -+ gimple_stmt_iterator gsi = gsi_for_stmt (t); -+ if (gimple_code (t) == GIMPLE_PHI) -+ remove_phi_node (&gsi, true); -+ else -+ { -+ gsi_remove (&gsi, true); -+ release_defs (t); -+ } -+ } -+} -diff -N -urp a/gcc/tree-ssa-dce.h b/gcc/tree-ssa-dce.h ---- a/gcc/tree-ssa-dce.h 1970-01-01 08:00:00.000000000 +0800 -+++ b/gcc/tree-ssa-dce.h 2018-11-15 16:03:17.463054437 +0800 -@@ -0,0 +1,22 @@ -+/* Copyright (C) 2017 Free Software Foundation, Inc. -+ -+This file is part of GCC. -+ -+GCC is free software; you can redistribute it and/or modify it -+under the terms of the GNU General Public License as published by the -+Free Software Foundation; either version 3, or (at your option) any -+later version. -+ -+GCC is distributed in the hope that it will be useful, but WITHOUT -+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+for more details. -+ -+You should have received a copy of the GNU General Public License -+along with GCC; see the file COPYING3. If not see -+. */ -+ -+#ifndef TREE_SSA_DCE_H -+#define TREE_SSA_DCE_H -+extern void simple_dce_from_worklist (bitmap); -+#endif -diff -N -urp a/gcc/tree-ssa-loop-ivcanon.c b/gcc/tree-ssa-loop-ivcanon.c ---- a/gcc/tree-ssa-loop-ivcanon.c 2018-11-15 15:54:01.447039800 +0800 -+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-11-15 16:03:17.467054437 +0800 -@@ -76,10 +76,13 @@ enum unroll_level - }; - - /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT -- is the exit edge whose condition is replaced. */ -- --static void --create_canonical_iv (struct loop *loop, edge exit, tree niter) -+ is the exit edge whose condition is replaced. The ssa versions of the new -+ IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER -+ if they are not NULL. */ -+ -+void -+create_canonical_iv (struct loop *loop, edge exit, tree niter, -+ tree *var_before = NULL, tree *var_after = NULL) - { - edge in; - tree type, var; -@@ -112,7 +115,9 @@ create_canonical_iv (struct loop *loop, - create_iv (niter, - build_int_cst (type, -1), - NULL_TREE, loop, -- &incr_at, false, NULL, &var); -+ &incr_at, false, var_before, &var); -+ if (var_after) -+ *var_after = var; - - cmp = (exit->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR; - gimple_cond_set_code (cond, cmp); -diff -N -urp a/gcc/tree-ssa-loop-ivopts.h b/gcc/tree-ssa-loop-ivopts.h ---- a/gcc/tree-ssa-loop-ivopts.h 2018-11-15 15:54:01.447039800 +0800 -+++ b/gcc/tree-ssa-loop-ivopts.h 2018-11-15 16:03:17.467054437 +0800 -@@ -33,4 +33,6 @@ bool multiplier_allowed_in_address_p (HO - addr_space_t); - void tree_ssa_iv_optimize (void); - -+void create_canonical_iv (struct loop *, edge, tree, -+ tree * = NULL, tree * = NULL); - #endif /* GCC_TREE_SSA_LOOP_IVOPTS_H */ -diff -N -urp a/gcc/tree-ssa-pre.c b/gcc/tree-ssa-pre.c ---- a/gcc/tree-ssa-pre.c 2018-11-15 15:54:01.447039800 +0800 -+++ b/gcc/tree-ssa-pre.c 2018-11-15 16:03:17.471054437 +0800 -@@ -39,6 +39,7 @@ along with GCC; see the file COPYING3. - #include "gimplify.h" - #include "gimple-iterator.h" - #include "tree-cfg.h" -+#include "tree-ssa-dce.h" - #include "tree-ssa-loop.h" - #include "tree-into-ssa.h" - #include "tree-dfa.h" -@@ -4908,99 +4909,6 @@ mark_operand_necessary (tree op) - return stmt; - } - --/* Because we don't follow exactly the standard PRE algorithm, and decide not -- to insert PHI nodes sometimes, and because value numbering of casts isn't -- perfect, we sometimes end up inserting dead code. This simple DCE-like -- pass removes any insertions we made that weren't actually used. */ -- --static void --remove_dead_inserted_code (void) --{ -- bitmap worklist; -- unsigned i; -- bitmap_iterator bi; -- gimple *t; -- -- worklist = BITMAP_ALLOC (NULL); -- EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi) -- { -- t = SSA_NAME_DEF_STMT (ssa_name (i)); -- if (gimple_plf (t, NECESSARY)) -- bitmap_set_bit (worklist, i); -- } -- while (!bitmap_empty_p (worklist)) -- { -- i = bitmap_first_set_bit (worklist); -- bitmap_clear_bit (worklist, i); -- t = SSA_NAME_DEF_STMT (ssa_name (i)); -- -- /* PHI nodes are somewhat special in that each PHI alternative has -- data and control dependencies. All the statements feeding the -- PHI node's arguments are always necessary. */ -- if (gimple_code (t) == GIMPLE_PHI) -- { -- unsigned k; -- -- for (k = 0; k < gimple_phi_num_args (t); k++) -- { -- tree arg = PHI_ARG_DEF (t, k); -- if (TREE_CODE (arg) == SSA_NAME) -- { -- gimple *n = mark_operand_necessary (arg); -- if (n) -- bitmap_set_bit (worklist, SSA_NAME_VERSION (arg)); -- } -- } -- } -- else -- { -- /* Propagate through the operands. Examine all the USE, VUSE and -- VDEF operands in this statement. Mark all the statements -- which feed this statement's uses as necessary. */ -- ssa_op_iter iter; -- tree use; -- -- /* The operands of VDEF expressions are also needed as they -- represent potential definitions that may reach this -- statement (VDEF operands allow us to follow def-def -- links). */ -- -- FOR_EACH_SSA_TREE_OPERAND (use, t, iter, SSA_OP_ALL_USES) -- { -- gimple *n = mark_operand_necessary (use); -- if (n) -- bitmap_set_bit (worklist, SSA_NAME_VERSION (use)); -- } -- } -- } -- -- EXECUTE_IF_SET_IN_BITMAP (inserted_exprs, 0, i, bi) -- { -- t = SSA_NAME_DEF_STMT (ssa_name (i)); -- if (!gimple_plf (t, NECESSARY)) -- { -- gimple_stmt_iterator gsi; -- -- if (dump_file && (dump_flags & TDF_DETAILS)) -- { -- fprintf (dump_file, "Removing unnecessary insertion:"); -- print_gimple_stmt (dump_file, t, 0, 0); -- } -- -- gsi = gsi_for_stmt (t); -- if (gimple_code (t) == GIMPLE_PHI) -- remove_phi_node (&gsi, true); -- else -- { -- gsi_remove (&gsi, true); -- release_defs (t); -- } -- } -- } -- BITMAP_FREE (worklist); --} -- -- - /* Initialize data structures used by PRE. */ - - static void -@@ -5142,8 +5050,7 @@ pass_pre::execute (function *fun) - statistics_counter_event (fun, "Eliminated", pre_stats.eliminations); - - clear_expression_ids (); -- remove_dead_inserted_code (); -- -+ - scev_finalize (); - fini_pre (); - todo |= fini_eliminate (); -diff -N -urp a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c ---- a/gcc/tree-vect-loop.c 2018-11-15 15:54:01.447039800 +0800 -+++ b/gcc/tree-vect-loop.c 2018-11-15 16:03:17.471054437 +0800 -@@ -2632,6 +2632,112 @@ vect_is_slp_reduction (loop_vec_info loo - return true; - } - -+/* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and -+ reduction operation CODE has a handled computation expression. */ -+ -+bool -+check_reduction_path (location_t loc, loop_p loop, gphi *phi, tree loop_arg, -+ enum tree_code code) -+{ -+ auto_vec > path; -+ auto_bitmap visited; -+ tree lookfor = PHI_RESULT (phi); -+ ssa_op_iter curri; -+ use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE); -+ while (USE_FROM_PTR (curr) != loop_arg) -+ curr = op_iter_next_use (&curri); -+ curri.i = curri.numops; -+ do -+ { -+ path.safe_push (std::make_pair (curri, curr)); -+ tree use = USE_FROM_PTR (curr); -+ if (use == lookfor) -+ break; -+ gimple *def = SSA_NAME_DEF_STMT (use); -+ if (gimple_nop_p (def) -+ || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) -+ { -+pop: -+ do -+ { -+ std::pair x = path.pop (); -+ curri = x.first; -+ curr = x.second; -+ do -+ curr = op_iter_next_use (&curri); -+ /* Skip already visited or non-SSA operands (from iterating -+ over PHI args). */ -+ while (curr != NULL_USE_OPERAND_P -+ && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME -+ || ! bitmap_set_bit (visited, -+ SSA_NAME_VERSION -+ (USE_FROM_PTR (curr))))); -+ } -+ while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); -+ if (curr == NULL_USE_OPERAND_P) -+ break; -+ } -+ else -+ { -+ if (gimple_code (def) == GIMPLE_PHI) -+ curr = op_iter_init_phiuse (&curri, as_a (def), SSA_OP_USE); -+ else -+ curr = op_iter_init_use (&curri, def, SSA_OP_USE); -+ while (curr != NULL_USE_OPERAND_P -+ && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME -+ || ! bitmap_set_bit (visited, -+ SSA_NAME_VERSION -+ (USE_FROM_PTR (curr))))) -+ curr = op_iter_next_use (&curri); -+ if (curr == NULL_USE_OPERAND_P) -+ goto pop; -+ } -+ } -+ while (1); -+ if (dump_file && (dump_flags & TDF_DETAILS)) -+ { -+ dump_printf_loc (MSG_NOTE, loc, "reduction path: "); -+ unsigned i; -+ std::pair *x; -+ FOR_EACH_VEC_ELT (path, i, x) -+ { -+ dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); -+ dump_printf (MSG_NOTE, " "); -+ } -+ dump_printf (MSG_NOTE, "\n"); -+ } -+ -+ /* Check whether the reduction path detected is valid. */ -+ bool fail = path.length () == 0; -+ bool neg = false; -+ for (unsigned i = 1; i < path.length (); ++i) -+ { -+ gimple *use_stmt = USE_STMT (path[i].second); -+ tree op = USE_FROM_PTR (path[i].second); -+ if (! has_single_use (op) -+ || ! is_gimple_assign (use_stmt)) -+ { -+ fail = true; -+ break; -+ } -+ if (gimple_assign_rhs_code (use_stmt) != code) -+ { -+ if (code == PLUS_EXPR -+ && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) -+ { -+ /* Track whether we negate the reduction value each iteration. */ -+ if (gimple_assign_rhs2 (use_stmt) == op) -+ neg = ! neg; -+ } -+ else -+ { -+ fail = true; -+ break; -+ } -+ } -+ } -+ return ! fail && ! neg; -+} - - /* Function vect_is_simple_reduction_1 - -diff -N -urp a/gcc/tree-vectorizer.h b/gcc/tree-vectorizer.h ---- a/gcc/tree-vectorizer.h 2018-11-15 15:54:01.451039800 +0800 -+++ b/gcc/tree-vectorizer.h 2018-11-15 16:03:17.475054437 +0800 -@@ -1166,6 +1166,9 @@ extern tree vect_create_addr_base_for_ve - extern void destroy_loop_vec_info (loop_vec_info, bool); - extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, bool, - bool *, bool); -+/* Used in gimple-loop-interchange.c. */ -+extern bool check_reduction_path (location_t, loop_p, gphi *, tree, -+ enum tree_code); - /* Drive for loop analysis stage. */ - extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); - extern tree vect_build_loop_niters (loop_vec_info); diff --git a/gcc.spec b/gcc.spec index c69f413ab23bd7aa899f8d1c032936b24e945233..cab523fe92ddec899e1038a07a5503aff721c802 100644 --- a/gcc.spec +++ b/gcc.spec @@ -41,7 +41,7 @@ Version: 7.3.0 # number 2020033101 meaning the openEuler 20.03 release date plus 01 to # replace DATE and will never change it in the future. %global openEulerDATE 2020033101 -Release: %{openEulerDATE}.53 +Release: %{openEulerDATE}.54 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD Group: Development/Languages #Source0: hcc-aarch64-linux-release.tar.bz2 @@ -73,18 +73,12 @@ Patch2: gcc-adapt-to-isl.patch Patch3: sanitizer-pr-85835.patch Patch4: CVE-2018-12886.patch Patch5: CVE-2019-15847.patch -Patch6: option-mlong-calls.patch Patch7: add-tsv110-pipeline-scheduling.patch -Patch8: option-mfentry-and-mlong-calls-bugfix.patch -Patch10: aarch64-ilp32-call-addr-dimode.patch Patch12: aarch64-fix-tls-negative-offset.patch Patch14: arm-fix-push-minipool.patch Patch22: arm-bigendian-disable-interleaved-LS-vectorize.patch Patch23: floop-unroll-and-jam.patch -Patch24: floop-interchange.patch Patch25: constructor-priority-bugfix.patch -Patch26: arm-adjust-be-ldrd-strd.patch -Patch28: try-unroll.patch Patch29: Big-endian-union-bitfield-bugfix.patch Patch31: fstack-clash-protection.patch Patch34: mark-pattern-as-clobbering-CC-REGNUM.patch @@ -569,18 +563,12 @@ package or when debugging this package. %patch3 -p1 %patch4 -p1 %patch5 -p1 -%patch6 -p1 %patch7 -p1 -%patch8 -p1 -%patch10 -p1 %patch12 -p1 %patch14 -p1 %patch22 -p1 %patch23 -p1 -%patch24 -p1 %patch25 -p1 -%patch26 -p1 -%patch28 -p1 %patch29 -p1 %patch31 -p1 %patch34 -p1 @@ -3376,6 +3364,9 @@ fi %changelog +* Tue Nov 21 2023 eastb233 - 7.3.0-2020033101.54 +- Delete several patches which cause some failures. + * Tue Oct 10 2023 Xiong Zhou -7.3.0-2020033101.53 - Fix CVE-2023-4039. Delete abnormal rpaths in shared objects. diff --git a/option-mfentry-and-mlong-calls-bugfix.patch b/option-mfentry-and-mlong-calls-bugfix.patch deleted file mode 100644 index c242567f7a4ae29d94cb82236385a734a860ca2e..0000000000000000000000000000000000000000 --- a/option-mfentry-and-mlong-calls-bugfix.patch +++ /dev/null @@ -1,108 +0,0 @@ -diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2018-09-19 17:11:42.583520820 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2018-09-19 17:10:22.715520820 +0800 -@@ -1260,29 +1260,32 @@ aarch64_is_long_call_p (rtx sym) - void - aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) - { -- if (!TARGET_LONG_CALLS) -+ if (flag_fentry) - { -- fprintf (file, "\tmov\tx9, x30\n"); -- fprintf (file, "\tbl\t__fentry__\n"); -- fprintf (file, "\tmov\tx30, x9\n"); -- } -- else -- { -- if (flag_pic) -+ if (!TARGET_LONG_CALLS) - { - fprintf (file, "\tmov\tx9, x30\n"); -- fprintf (file, "\tadrp\tx10, :got:__fentry__\n"); -- fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n"); -- fprintf (file, "\tblr\tx10\n"); -+ fprintf (file, "\tbl\t__fentry__\n"); - fprintf (file, "\tmov\tx30, x9\n"); - } - else - { -- fprintf (file, "\tmov\tx9, x30\n"); -- fprintf (file, "\tadrp\tx10, __fentry__\n"); -- fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n"); -- fprintf (file, "\tblr\tx10\n"); -- fprintf (file, "\tmov\tx30, x9\n"); -+ if (flag_pic) -+ { -+ fprintf (file, "\tmov\tx9, x30\n"); -+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n"); -+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n"); -+ fprintf (file, "\tblr\tx10\n"); -+ fprintf (file, "\tmov\tx30, x9\n"); -+ } -+ else -+ { -+ fprintf (file, "\tmov\tx9, x30\n"); -+ fprintf (file, "\tadrp\tx10, __fentry__\n"); -+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n"); -+ fprintf (file, "\tblr\tx10\n"); -+ fprintf (file, "\tmov\tx30, x9\n"); -+ } - } - } - } -@@ -12020,6 +12023,15 @@ aarch64_emit_unlikely_jump (rtx insn) - add_int_reg_note (jump, REG_BR_PROB, very_unlikely); - } - -+/* Return true, if profiling code should be emitted before -+ prologue. Otherwise it returns false. -+ Note: For x86 with "hotfix" it is sorried. */ -+static bool -+aarch64_profile_before_prologue (void) -+{ -+ return flag_fentry != 0; -+} -+ - /* Expand a compare and swap pattern. */ - - void -@@ -14952,6 +14964,9 @@ aarch64_run_selftests (void) - #undef TARGET_ASM_ALIGNED_SI_OP - #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t" - -+#undef TARGET_PROFILE_BEFORE_PROLOGUE -+#define TARGET_PROFILE_BEFORE_PROLOGUE aarch64_profile_before_prologue -+ - #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK - #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \ - hook_bool_const_tree_hwi_hwi_const_tree_true -diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h ---- a/gcc/config/aarch64/aarch64.h 2018-09-19 17:11:42.587520820 +0800 -+++ b/gcc/config/aarch64/aarch64.h 2018-09-19 17:10:22.715520820 +0800 -@@ -850,9 +850,12 @@ typedef struct - { \ - rtx fun, lr; \ - const rtx_insn* tmp = get_insns (); \ -- lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ -- fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ -- emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ -+ if (!flag_fentry) \ -+ { \ -+ lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ -+ fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ -+ emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ -+ } \ - if (TARGET_LONG_CALLS) \ - { \ - emit_insn (gen_blockage ()); \ -diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt ---- a/gcc/config/aarch64/aarch64.opt 2018-09-19 17:11:42.587520820 +0800 -+++ b/gcc/config/aarch64/aarch64.opt 2018-09-19 17:10:22.715520820 +0800 -@@ -192,3 +192,7 @@ single precision and to 32 bits for doub - mverbose-cost-dump - Common Undocumented Var(flag_aarch64_verbose_cost) - Enables verbose cost model dumping in the debug dump files. -+ -+mfentry -+Target Report Var(flag_fentry) Init(0) -+Emit profiling counter call at function entry immediately after prologue. diff --git a/option-mlong-calls.patch b/option-mlong-calls.patch deleted file mode 100644 index 7aadfbe06b96a1319106194b115f7e1534fadc05..0000000000000000000000000000000000000000 --- a/option-mlong-calls.patch +++ /dev/null @@ -1,362 +0,0 @@ -diff -N -urp a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h ---- a/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:43:27.862079389 +0800 -+++ b/gcc/config/aarch64/aarch64-protos.h 2018-11-06 10:44:34.930081154 +0800 -@@ -353,6 +353,10 @@ bool aarch64_use_return_insn_p (void); - const char *aarch64_mangle_builtin_type (const_tree); - const char *aarch64_output_casesi (rtx *); - -+extern void aarch64_pr_long_calls (struct cpp_reader *); -+extern void aarch64_pr_no_long_calls (struct cpp_reader *); -+extern void aarch64_pr_long_calls_off (struct cpp_reader *); -+ - enum aarch64_symbol_type aarch64_classify_symbol (rtx, rtx); - enum aarch64_symbol_type aarch64_classify_tls_symbol (rtx); - enum reg_class aarch64_regno_regclass (unsigned); -@@ -384,6 +388,7 @@ void aarch64_expand_epilogue (bool); - void aarch64_expand_mov_immediate (rtx, rtx); - void aarch64_expand_prologue (void); - void aarch64_expand_vector_init (rtx, rtx); -+void aarch64_function_profiler (FILE *, int); - void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx, - const_tree, unsigned); - void aarch64_init_expanders (void); -diff -N -urp a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c ---- a/gcc/config/aarch64/aarch64.c 2018-11-06 10:43:27.870079389 +0800 -+++ b/gcc/config/aarch64/aarch64.c 2018-11-06 10:44:34.934081154 +0800 -@@ -70,6 +70,9 @@ - /* This file should be included last. */ - #include "target-def.h" - -+static void aarch64_set_default_type_attributes (tree); -+static int aarch64_comp_type_attributes (const_tree, const_tree); -+ - /* Defined for convenience. */ - #define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT) - -@@ -1092,12 +1095,163 @@ aarch64_hard_regno_caller_save_mode (uns - return choose_hard_reg_mode (regno, nregs, false); - } - -+/* Table of machine attributes. */ -+static const struct attribute_spec aarch64_attribute_table[] = -+{ -+ /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler, -+ affects_type_identity }. */ -+ /* Function calls made to this symbol must be done indirectly, because -+ it may lie outside of the 26 bit addressing range of a normal function -+ call. */ -+ { "long_call", 0, 0, false, true, true, NULL, false }, -+ /* Whereas these functions are always known to reside within the 26 bit -+ addressing range. */ -+ { "short_call", 0, 0, false, true, true, NULL, false }, -+ { NULL, 0, 0, false, false, false, NULL, false } -+}; -+ -+/* Encode the current state of the #pragma[no_]long_calls. */ -+typedef enum -+{ -+ OFF, /* No #pragma[no_]long_calls is in effect. */ -+ LONG, /* #pragma long_calls is in effect. */ -+ SHORT /* #pragma no_long_calls is in effect. */ -+} aarch64_pragma_enum; -+ -+static aarch64_pragma_enum aarch64_pragma_long_calls = OFF; -+ -+void -+aarch64_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) -+{ -+ aarch64_pragma_long_calls = LONG; -+} -+ -+void -+aarch64_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) -+{ -+ aarch64_pragma_long_calls = SHORT; -+} -+ -+void -+aarch64_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) -+{ -+ aarch64_pragma_long_calls = OFF; -+} -+ -+/* Return 0 if the attributes for two types are incompatible, 1 if they -+ are compatible. */ -+static int -+aarch64_comp_type_attributes (const_tree type1, const_tree type2) -+{ -+ int l1, l2, s1, s2; -+ -+ /* Check for mismatch of non-default calling convention. */ -+ if (TREE_CODE (type1) != FUNCTION_TYPE) -+ return 1; -+ -+ /* Check for mismatched call attributes. */ -+ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; -+ l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; -+ s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; -+ s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; -+ -+ /* Only bother to check if an attribute is defined. */ -+ if (l1 | l2 | s1 | s2) -+ { -+ /* If one type has an attribute, the other -+ must have the same attribute. */ -+ if ((l1 != l2) || (s1 != s2)) -+ { -+ return 0; -+ } -+ -+ /* Disallow mixed attributes. */ -+ if ((l1 && s2) || (l2 && s1)) -+ { -+ return 0; -+ } -+ } -+ -+ return 1; -+} -+ -+/* Assigns default attributes to newly defined type. This is used to -+ set short_call/long_call attributes for function types of -+ functions defined inside corresponding #pragma scopes. */ -+static void -+aarch64_set_default_type_attributes (tree type) -+{ -+ /* Add __attribute__ ((long_call)) to all functions, when -+ inside #pragma long_calls or __attribute__ ((short_call)), -+ when inside #pragma no_long_calls. */ -+ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) -+ { -+ tree type_attr_list = NULL; -+ tree attr_name = NULL; -+ type_attr_list = TYPE_ATTRIBUTES (type); -+ -+ if (aarch64_pragma_long_calls == LONG) -+ { -+ attr_name = get_identifier ("long_call"); -+ } -+ else if (aarch64_pragma_long_calls == SHORT) -+ { -+ attr_name = get_identifier ("short_call"); -+ } -+ else -+ { -+ return; -+ } -+ -+ type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); -+ TYPE_ATTRIBUTES (type) = type_attr_list; -+ } -+} -+ -+/* Return true if DECL is known to be linked into section SECTION. */ -+static bool -+aarch64_function_in_section_p (tree decl, section *section) -+{ -+ /* We can only be certain about the prevailing symbol definition. */ -+ if (!decl_binds_to_current_def_p (decl)) -+ return false; -+ -+ /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ -+ if (!DECL_SECTION_NAME (decl)) -+ { -+ /* Make sure that we will not create a unique section for DECL. */ -+ if (flag_function_sections || DECL_COMDAT_GROUP (decl)) -+ return false; -+ } -+ -+ return function_section (decl) == section; -+} -+ - /* Return true if calls to DECL should be treated as - long-calls (ie called via a register). */ - static bool --aarch64_decl_is_long_call_p (const_tree decl ATTRIBUTE_UNUSED) -+aarch64_decl_is_long_call_p (tree decl) - { -- return false; -+ tree attrs = NULL; -+ -+ if (!decl) -+ return TARGET_LONG_CALLS; -+ -+ attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); -+ if (lookup_attribute ("short_call", attrs)) -+ return false; -+ -+ /* For "f", be conservative, and only cater for cases in which the -+ whole of the current function is placed in the same section. */ -+ if (!flag_reorder_blocks_and_partition -+ && TREE_CODE (decl) == FUNCTION_DECL -+ && aarch64_function_in_section_p (decl, current_function_section ())) -+ return false; -+ -+ if (lookup_attribute ("long_call", attrs)) -+ return true; -+ -+ return TARGET_LONG_CALLS; - } - - /* Return true if calls to symbol-ref SYM should be treated as -@@ -1108,6 +1257,36 @@ aarch64_is_long_call_p (rtx sym) - return aarch64_decl_is_long_call_p (SYMBOL_REF_DECL (sym)); - } - -+void -+aarch64_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) -+{ -+ if (!TARGET_LONG_CALLS) -+ { -+ fprintf (file, "\tmov\tx9, x30\n"); -+ fprintf (file, "\tbl\t__fentry__\n"); -+ fprintf (file, "\tmov\tx30, x9\n"); -+ } -+ else -+ { -+ if (flag_pic) -+ { -+ fprintf (file, "\tmov\tx9, x30\n"); -+ fprintf (file, "\tadrp\tx10, :got:__fentry__\n"); -+ fprintf (file, "\tldr\tx10, [x10, #:got_lo12:__fentry__]\n"); -+ fprintf (file, "\tblr\tx10\n"); -+ fprintf (file, "\tmov\tx30, x9\n"); -+ } -+ else -+ { -+ fprintf (file, "\tmov\tx9, x30\n"); -+ fprintf (file, "\tadrp\tx10, __fentry__\n"); -+ fprintf (file, "\tadd\tx10, x10, :lo12:__fentry__\n"); -+ fprintf (file, "\tblr\tx10\n"); -+ fprintf (file, "\tmov\tx30, x9\n"); -+ } -+ } -+} -+ - /* Return true if calls to symbol-ref SYM should not go through - plt stubs. */ - -@@ -15099,6 +15278,15 @@ aarch64_libgcc_floating_mode_supported_p - #undef TARGET_SCHED_CAN_SPECULATE_INSN - #define TARGET_SCHED_CAN_SPECULATE_INSN aarch64_sched_can_speculate_insn - -+#undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES -+#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES aarch64_set_default_type_attributes -+ -+#undef TARGET_ATTRIBUTE_TABLE -+#define TARGET_ATTRIBUTE_TABLE aarch64_attribute_table -+ -+#undef TARGET_COMP_TYPE_ATTRIBUTES -+#define TARGET_COMP_TYPE_ATTRIBUTES aarch64_comp_type_attributes -+ - #undef TARGET_CAN_USE_DOLOOP_P - #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost - -diff -N -urp a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h ---- a/gcc/config/aarch64/aarch64.h 2018-11-06 10:43:27.870079389 +0800 -+++ b/gcc/config/aarch64/aarch64.h 2018-11-06 10:49:29.574088911 +0800 -@@ -28,7 +28,6 @@ - - - --#define REGISTER_TARGET_PRAGMAS() aarch64_register_pragmas () - - /* Target machine storage layout. */ - -@@ -659,6 +658,14 @@ typedef struct - } CUMULATIVE_ARGS; - #endif - -+/* Handle pragmas for compatibility with Intel's compilers. */ -+#define REGISTER_TARGET_PRAGMAS() do { \ -+ c_register_pragma (0, "long_calls", aarch64_pr_long_calls); \ -+ c_register_pragma (0, "no_long_calls", aarch64_pr_no_long_calls); \ -+ c_register_pragma (0, "long_calls_off", aarch64_pr_long_calls_off); \ -+ aarch64_register_pragmas (); \ -+} while (0) -+ - #define FUNCTION_ARG_PADDING(MODE, TYPE) \ - (aarch64_pad_arg_upward (MODE, TYPE) ? upward : downward) - -@@ -842,13 +849,20 @@ typedef struct - #define PROFILE_HOOK(LABEL) \ - { \ - rtx fun, lr; \ -+ const rtx_insn* tmp = get_insns (); \ - lr = get_hard_reg_initial_val (Pmode, LR_REGNUM); \ - fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_NAME); \ - emit_library_call (fun, LCT_NORMAL, VOIDmode, 1, lr, Pmode); \ -+ if (TARGET_LONG_CALLS) \ -+ { \ -+ emit_insn (gen_blockage ()); \ -+ emit_insn_after (gen_blockage (), NEXT_INSN (tmp)); \ -+ } \ - } - - /* All the work done in PROFILE_HOOK, but still required. */ --#define FUNCTION_PROFILER(STREAM, LABELNO) do { } while (0) -+#define FUNCTION_PROFILER(STREAM, LABELNO) \ -+ aarch64_function_profiler (STREAM, LABELNO) - - /* For some reason, the Linux headers think they know how to define - these macros. They don't!!! */ -diff -N -urp a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md ---- a/gcc/config/aarch64/aarch64.md 2018-11-06 10:43:27.874079389 +0800 -+++ b/gcc/config/aarch64/aarch64.md 2018-11-06 10:44:34.934081154 +0800 -@@ -850,9 +850,10 @@ - { - rtx pat; - rtx callee = XEXP (operands[0], 0); -- if (!REG_P (callee) -- && ((GET_CODE (callee) != SYMBOL_REF) -- || aarch64_is_noplt_call_p (callee))) -+ -+ if (GET_CODE (callee) == SYMBOL_REF -+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee)) -+ : !REG_P (callee)) - XEXP (operands[0], 0) = force_reg (Pmode, callee); - - if (operands[2] == NULL_RTX) -@@ -881,9 +882,10 @@ - { - rtx pat; - rtx callee = XEXP (operands[1], 0); -- if (!REG_P (callee) -- && ((GET_CODE (callee) != SYMBOL_REF) -- || aarch64_is_noplt_call_p (callee))) -+ -+ if (GET_CODE (callee) == SYMBOL_REF -+ ? (aarch64_is_long_call_p (callee) || aarch64_is_noplt_call_p (callee)) -+ : !REG_P (callee)) - XEXP (operands[1], 0) = force_reg (Pmode, callee); - - if (operands[3] == NULL_RTX) -diff -N -urp a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt ---- a/gcc/config/aarch64/aarch64.opt 2018-11-06 10:43:27.874079389 +0800 -+++ b/gcc/config/aarch64/aarch64.opt 2018-11-06 10:44:34.934081154 +0800 -@@ -80,6 +80,10 @@ mlittle-endian - Target Report RejectNegative InverseMask(BIG_END) - Assume target CPU is configured as little endian. - -+mlong-calls -+Target Report Mask(LONG_CALLS) -+Generate call insns as indirect calls, if necessary. -+ - mcmodel= - Target RejectNegative Joined Enum(cmodel) Var(aarch64_cmodel_var) Init(AARCH64_CMODEL_SMALL) Save - Specify the code model. -diff -N -urp a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md ---- a/gcc/config/aarch64/predicates.md 2018-11-06 10:43:27.878079389 +0800 -+++ b/gcc/config/aarch64/predicates.md 2018-11-06 10:44:34.938081154 +0800 -@@ -27,8 +27,9 @@ - ) - - (define_predicate "aarch64_call_insn_operand" -- (ior (match_code "symbol_ref") -- (match_operand 0 "register_operand"))) -+ (ior (and (match_code "symbol_ref") -+ (match_test "!aarch64_is_long_call_p (op)")) -+ (match_operand 0 "register_operand"))) - - ;; Return true if OP a (const_int 0) operand. - (define_predicate "const0_operand" diff --git a/try-unroll.patch b/try-unroll.patch deleted file mode 100644 index 6f564f8054e00e95f0a031f785697257b6c3eac3..0000000000000000000000000000000000000000 --- a/try-unroll.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- a/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:05:43.841181211 +0800 -+++ b/gcc/tree-ssa-loop-ivcanon.c 2018-12-06 05:03:17.545185153 +0800 -@@ -726,7 +726,7 @@ try_unroll_loop_completely (struct loop - edge_to_cancel = NULL; - } - -- if (!n_unroll_found) -+ if (!n_unroll_found || SCEV_NOT_KNOWN == TREE_CODE (niter)) - return false; - - if (n_unroll > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES))