diff --git a/gcc/common.opt b/gcc/common.opt index e6ffa1c581851831e5ce555c06064bc382508d7a..8ae6dd3493976a36b6494e1d7e875394ee1368c3 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1357,6 +1357,10 @@ fipa-ic Common Var(flag_ipa_ic) Optimization Init(0) Perform interprocedural analysis of indirect calls. +fipa-alias +Common Var(flag_ipa_alias) Optimization Init(0) +Perform interprocedural alias analysis for variables and types. + ficp Common Var(flag_icp) Optimization Init(0) Try to promote indirect calls to direct ones. @@ -1911,6 +1915,10 @@ ftree-loop-if-convert-stores Common Ignore Does nothing. Preserved for backward compatibility. +ftree-loop-partial-if-convert +Common Var(flag_tree_partial_if_convert) Init(0) Optimization +Partial convert conditional jumps in innermost loops to branchless equivalents. + ; -finhibit-size-directive inhibits output of .size for ELF. ; This is used only for compiling crtstuff.c, ; and it may be extended to other effects @@ -3244,6 +3252,10 @@ funroll-completely-grow-size Undocumented Var(flag_cunroll_grow_size) Optimization ; Internal undocumented flag, allow size growth during complete unrolling +funroll-loops-replicative +Common Var(flag_cunroll_replicative) Init(0) Optimization +Perform loop unrolling by instruction replication. + ; Nonzero means that loop optimizer may assume that the induction variables ; that control loops do not overflow and that the loops with nontrivial ; exit condition are not infinite diff --git a/gcc/ipa-devirt.cc b/gcc/ipa-devirt.cc index 60d4b2a0b1fe2d15abc8831a42edb653a9f12ec0..e147f56ac2d2d8045bdae77a0f242a118ad65163 100644 --- a/gcc/ipa-devirt.cc +++ b/gcc/ipa-devirt.cc @@ -140,6 +140,7 @@ along with GCC; see the file COPYING3. If not see #include "data-streamer.h" #include "lto-streamer.h" #include "streamer-hooks.h" +#include "tree-cfg.h" /* Hash based set of pairs of types. */ struct type_pair @@ -4411,11 +4412,29 @@ make_pass_ipa_odr (gcc::context *ctxt) the given function type. */ typedef std::set type_set; typedef std::set decl_set; +typedef std::set stmt_set; +typedef tree pointee_object; + +typedef struct object_reference { + object_reference (gimple *s, gimple *src, gimple *e, function* f): + stmt (s), source (src), escape (e), fn (f) + { }; + gimple *stmt; + gimple *source; + gimple *escape; + function *fn; +} objref; + typedef std::map type_alias_map; typedef std::map type_decl_map; typedef std::map uid_to_type_map; typedef std::map type_map; +typedef std::map> field_access_map; +typedef std::map record_access_map; +typedef std::map> offset_map; +typedef std::map global_map; + static bool has_address_taken_functions_with_varargs = false; static type_set *unsafe_types = NULL; static type_alias_map *fta_map = NULL; @@ -4425,6 +4444,17 @@ static type_alias_map *cbase_to_ptype = NULL; static type_decl_map *fs_map = NULL; static uid_to_type_map *type_uid_map = NULL; +static record_access_map *record_accesses = NULL; +static global_map *global_pointer_vals = NULL; + +/* Set of global variables (pointers or structures with pointers) which + addresses are taken. */ + +static decl_set *address_taken_globals = NULL; + +/* Set of address taken pointer types or types with address taken fields. */ +static decl_set *address_taken_types = NULL; + static void print_type_set (unsigned ftype_uid, type_alias_map *map) { @@ -4831,6 +4861,175 @@ compare_block_and_init_type (tree block, tree t1) compare_type_lists (tlist1, tlist2); } +static void collect_address_taken_inits (tree t); +static bool get_addr_base_etc (tree mem, tree &base, HOST_WIDE_INT &offset, + tree &type); + +static bool +need_to_collect_object (tree base, tree var_type) +{ + if (VAR_OR_FUNCTION_DECL_P (base) && is_global_var (base) + && (!var_type || (POINTER_TYPE_P (var_type) + || TREE_CODE (var_type) == RECORD_TYPE))) + return true; + return false; +} + +/* Save object and type address taken info, return base object of t. */ + +static tree +mark_address_taken (tree t, bool for_types) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "mark_address_taken: {%s}: ", + get_tree_code_name (TREE_CODE (t))); + print_generic_expr (dump_file, t, TDF_NONE); + fprintf (dump_file, "\n"); + } + + tree base = t, var_type = TREE_TYPE (t); + HOST_WIDE_INT offset = -1; + bool found = true; + if (TREE_CODE (t) == MEM_REF || TREE_CODE (t) == ARRAY_REF) + { + found = get_addr_base_etc (t, base, offset, var_type); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "mark_address_taken: get_addr_base_etc base="); + print_generic_expr (dump_file, base, TDF_NONE); + fprintf (dump_file, " offset=%ld type=", offset); + print_generic_expr (dump_file, var_type, TDF_NONE); + fprintf (dump_file, "\n"); + } + } + if (!base) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "mark_address_taken: FAIL!\n"); + return NULL_TREE; + } + + if (need_to_collect_object (base, var_type)) + address_taken_globals->insert (base); + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "mark_address_taken: Don't know what to do with it" + " (found=%d)\n", found); + + if (!for_types || !var_type) + return base; + /* TODO: type-based addr taken. */ + if (POINTER_TYPE_P (var_type) || TREE_CODE (var_type) == RECORD_TYPE) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "mark_address_taken: FOR_TYPE {%s}: ", + get_tree_code_name (TREE_CODE (var_type))); + print_generic_expr (dump_file, var_type, TDF_NONE); + fprintf (dump_file, "\n"); + } + address_taken_types->insert (var_type); + } + return base; +} + +static void +add_global_pointer_val (tree global, unsigned offset, tree val, bool is_safe) +{ + if (dump_file) + { + fprintf (dump_file, "add_global_pointer_val: "); + print_generic_expr (dump_file, global); + fprintf (dump_file, " offset=%d: safe=%d, value=", offset, is_safe); + print_generic_expr (dump_file, val); + fprintf (dump_file, "\n"); + } + offset_map *om; + if (global_pointer_vals->count (global)) + om = (*global_pointer_vals)[global]; + else + { + om = new offset_map; + (*global_pointer_vals)[global] = om; + } + (*om)[offset].safe_push (is_safe ? val : NULL_TREE); +} + +static void +collect_global_vals_and_address_taken (tree var, unsigned offset, tree t, + bool is_safe) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "collect_global_vals_and_address_taken {%s}: ", + get_tree_code_name (TREE_CODE (t))); + print_generic_expr (dump_file, t, TDF_NONE); + fprintf (dump_file, "\n"); + } + + if (CONSTANT_CLASS_P (t) || TREE_CODE (t) == STRING_CST) + return; + if (TREE_CODE (t) == ADDR_EXPR) + { + tree op0 = TREE_OPERAND (t, 0); + tree base = mark_address_taken (op0, false); + add_global_pointer_val (var, offset, base, is_safe); + } + /* TODO: support record fields initialization. */ + else if (TREE_CODE (t) == CONSTRUCTOR) + { + unsigned HOST_WIDE_INT ix; + tree value, fld; + FOR_EACH_CONSTRUCTOR_ELT (CONSTRUCTOR_ELTS (t), ix, fld, value) + { + if (initializer_zerop (value) || fld == NULL_TREE) + continue; + tree base = var; + tree off = NULL_TREE; + gcc_assert (value && fld); + if ((dump_file && (dump_flags & TDF_DETAILS))) + { + fprintf (dump_file, "collect_global_vals_and_address_taken{%s}: ", + fld ? get_tree_code_name (TREE_CODE (fld)) : "?"); + print_generic_expr (dump_file, fld, TDF_NONE); + fprintf (dump_file, " type="); + print_generic_expr (dump_file, TREE_TYPE (fld), TDF_NONE); + fprintf (dump_file, "\n"); + } + if (TREE_CODE (fld) == FIELD_DECL && POINTER_TYPE_P (TREE_TYPE (fld)) + && tree_fits_shwi_p (DECL_FIELD_OFFSET (fld))) + { + off = DECL_FIELD_OFFSET (fld); + unsigned foff = tree_fits_shwi_p (off) ? tree_to_shwi (off) : 0; + collect_global_vals_and_address_taken (base, offset + foff, + value, is_safe); + } + else + collect_global_vals_and_address_taken (NULL_TREE, 0, value, false); + } + } + else if (TREE_CODE (t) == BLOCK) + for (tree bvar = BLOCK_VARS (t); bvar; bvar = DECL_CHAIN (bvar)) + { + gcc_assert (bvar); + collect_global_vals_and_address_taken (var, offset, bvar, false); + } + else if (TREE_CODE (t) == NOP_EXPR || TREE_CODE (t) == POINTER_PLUS_EXPR) + collect_global_vals_and_address_taken (var, offset, TREE_OPERAND (t, 0), + false); + else + { + if ((dump_file && (dump_flags & TDF_DETAILS))) + { + fprintf (dump_file, "collect_global_vals_and_address_taken: " + "UNEXPECTED t{%s}: ", get_tree_code_name (TREE_CODE (t))); + print_generic_expr (dump_file, t, TDF_NONE); + fprintf (dump_file, "\n"); + } + gcc_unreachable (); + } +} + /* Analyze global var to find type aliases comparing types of var and initializer elements. */ @@ -4838,21 +5037,27 @@ static void analyze_global_var (varpool_node *var) { tree decl = var->decl; - if (decl || !DECL_INITIAL (decl)) + if (!var->definition || !decl || !DECL_INITIAL (decl)) return; + var->get_constructor (); - if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl)) - || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK) + tree init = DECL_INITIAL (decl); + if (TREE_CODE (decl) == SSA_NAME || integer_zerop (init) + || TREE_CODE (init) == ERROR_MARK) return; if (dump_file && (dump_flags & TDF_DETAILS)) dump_global_var (decl); tree var_type = TREE_TYPE (decl); - tree init_type = TREE_TYPE (DECL_INITIAL (decl)); + tree init_type = TREE_TYPE (init); gcc_assert (var_type && init_type); - if (RECORD_OR_UNION_TYPE_P (init_type) - && !initializer_zerop (DECL_INITIAL (decl))) - compare_block_and_init_type (DECL_INITIAL (decl), init_type); + /* We cannot trust var->address_taken info so we need to collect it. */ + unsigned offset = 0; + if (flag_ipa_alias && init) + collect_global_vals_and_address_taken (decl, offset, init, true); + + if (RECORD_OR_UNION_TYPE_P (init_type) && !initializer_zerop (init)) + compare_block_and_init_type (init, init_type); else if (dump_file && (dump_flags & TDF_DETAILS)) fprintf (dump_file, "Is not a record with nonzero init\n"); @@ -4883,6 +5088,11 @@ dump_function_node_info (struct cgraph_node *n) dump_type_with_uid ("", atype); } fprintf (dump_file, "\n"); + if (dump_file) + { + fprintf (dump_file, "Inside ICP: "); + dump_function_to_file (n->decl, dump_file, dump_flags); + } } static void @@ -4928,6 +5138,14 @@ analyze_cgraph_edge (cgraph_edge *e) tree fntype = get_call_fntype (stmt); if (dump_file && (dump_flags & TDF_DETAILS)) dump_call_stmt_info (stmt, fntype); + tree fndecl = gimple_call_fndecl (stmt); + if (fndecl && (DECL_IS_OPERATOR_DELETE_P (fndecl) + || DECL_IS_OPERATOR_NEW_P (fndecl))) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Don't check types of known functions.\n"); + return; + } if (gimple_has_lhs (stmt)) { tree t1 = TREE_TYPE (gimple_call_lhs (stmt)); @@ -4992,6 +5210,831 @@ dump_assign_info (gimple *stmt, tree rhs, tree lhs_type, tree rhs_type) fprintf (dump_file, "\n"); } +static void +add_record_access (tree record_type, unsigned offset, function *fn, + gimple* stmt, gimple *src, gimple* esc) +{ + if (dump_file) + { + fprintf (dump_file, "add_record_access: "); + print_generic_expr (dump_file, record_type); + fprintf (dump_file, " offset=%d fn=", offset); + print_generic_expr (dump_file, fn->decl); + if (stmt) + { + fprintf (dump_file, " stmt "); + print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); + } + if (esc) + { + fprintf (dump_file, " esc "); + print_gimple_stmt (dump_file, esc, 3, TDF_DETAILS); + } + } + field_access_map *fa; + if (record_accesses->count (record_type)) + fa = (*record_accesses)[record_type]; + else + { + fa = new field_access_map; + (*record_accesses)[record_type] = fa; + } + objref *obj = new objref (stmt, src, esc, fn); + (*fa)[offset].safe_push (obj); +} + +static void +dump_record_accesses () +{ + fprintf (dump_file, "\nList of record accesses:\n"); + for (record_access_map::iterator it = record_accesses->begin (); + it != record_accesses->end (); it++) + { + fprintf (dump_file, "Record type: "); + print_generic_expr (dump_file, it->first); + if (it->first) + fprintf (dump_file, "(%d)\n", TYPE_UID (it->first)); + field_access_map *m2 = it->second; + if (m2 == NULL) + continue; + for (field_access_map::iterator it2 = m2->begin (); + it2 != m2->end (); it2++) + { + fprintf (dump_file, "\toffset %d:\n", it2->first); + for (unsigned int i = 0; i < it2->second.length (); i++) + { + function *fn = (it2->second)[i]->fn; + gimple *stmt = (it2->second)[i]->stmt; + gimple *src = (it2->second)[i]->source; + gimple *esc = (it2->second)[i]->escape; + fprintf (dump_file, "\t\tfn "); + print_generic_expr (dump_file, fn->decl, TDF_SLIM); + fprintf (dump_file, "(%d)\n", DECL_UID (fn->decl)); + if (stmt) + { + fprintf (dump_file, "\t\t\tstmt\t"); + print_gimple_stmt (dump_file, stmt, 0); + } + if (esc) + { + fprintf (dump_file, "\t\t\tesc\t"); + if (is_gimple_assign (esc)) + { + enum tree_code c = TREE_CODE (gimple_assign_rhs1 (esc)); + fprintf (dump_file, "{%s} ", get_tree_code_name (c)); + } + print_gimple_stmt (dump_file, esc, 0); + } + if (src) + { + fprintf (dump_file, "\t\t\tsrc\t"); + print_gimple_stmt (dump_file, src, 0); + } + } + } + } + fprintf (dump_file, "\nList of global pointer vars:\n"); + for (global_map::iterator it = global_pointer_vals->begin (); + it != global_pointer_vals->end (); it++) + { + fprintf (dump_file, "Global ptr: "); + print_generic_expr (dump_file, it->first); + if (it->first) + fprintf (dump_file, "(%d)\n", DECL_UID (it->first)); + offset_map *m2 = it->second; + for (offset_map::iterator it2 = m2->begin (); + it2 != m2->end (); it2++) + { + fprintf (dump_file, "\toffset %d:\n", it2->first); + for (unsigned int i = 0; i < it2->second.length (); i++) + { + tree val = (it2->second)[i]; + fprintf (dump_file, "\t\t\tval\t"); + if (val) + print_generic_expr (dump_file, val); + fprintf (dump_file, "%s\n", val ? "" : "?"); + } + } + } +} + +#include "tree-dfa.h" +#include "ssa.h" + +static void find_final_uses (tree lhs, vec &uses, vec &ops); +static void dump_stmt_list (const char *msg, vec &stmts, + vec *ops); +static bool is_escape_addr_value (gimple *stmt, tree val); + +static bool +is_assign_or_phi (gimple *stmt) +{ + return is_gimple_assign (stmt) || gimple_code (stmt) == GIMPLE_PHI + || (is_gimple_call (stmt) && gimple_call_lhs (stmt) != NULL_TREE); +} + +static bool is_builtin_call_no_escape (tree fndecl) +{ + if (!fndecl_built_in_p (fndecl, BUILT_IN_NORMAL)) + return false; + + switch (DECL_FUNCTION_CODE (fndecl)) + { + /* May escape memory itself. */ + case BUILT_IN_MEMCPY: + case BUILT_IN_MEMMOVE: + /* Do not escape. */ + case BUILT_IN_MEMSET: + case BUILT_IN_MEMCMP: + case BUILT_IN_STRLEN: + case BUILT_IN_STRCHR: + case BUILT_IN_STRSTR: + return true; + default: + break; + } + return false; +} + +/* Check if the function's arg value escapes function fn. */ + +static std::map> funcs_in_processing; +static std::map> func_arg_escape; + +static bool +evaluate_fn_arg (tree decl, unsigned argn) +{ + struct cgraph_node *n = cgraph_node::get (decl); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "evaluate_fn_arg has_body=%d ", + n->has_gimple_body_p ()); + print_generic_expr (dump_file, decl); + fprintf (dump_file, " #%d\n", argn); + } + if (!n->has_gimple_body_p ()) + return true; + n->get_body (); + function *fn = DECL_STRUCT_FUNCTION (decl); + gcc_assert (fn); + push_cfun (fn); + + /* Walk over formal arguments and find our arg. */ + unsigned i = 0; + tree name = NULL_TREE; + for (tree p = DECL_ARGUMENTS (current_function_decl); p; p = TREE_CHAIN (p)) + { + if (argn == i) + { + name = ssa_default_def (fn, p); + break; + } + i++; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Found: "); + print_generic_expr (dump_file, name); + fprintf (dump_file, "\n"); + } + if (name == NULL_TREE) + { + pop_cfun (); + return true; + } + auto_vec use_list; + auto_vec ops; + find_final_uses (name, use_list, ops); + for (unsigned i = 0; i < use_list.length (); ++i) + if (is_escape_addr_value (use_list[i], ops[i])) + { + pop_cfun (); + return true; + } + pop_cfun (); + return false; +} + +static bool +is_call_escape_addr_value (gimple *stmt, tree val) +{ + /* At this point there are no call or arg flags set by IPA, so we can + use only basic ones. */ + tree fn = gimple_call_fndecl (stmt); + const gcall *call = as_a (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "is_call_escape_addr_value in call %x %x %x %x: ", + gimple_call_flags (stmt), gimple_call_retslot_flags (call), + gimple_call_static_chain_flags (call), + gimple_call_return_flags (call)); + print_gimple_stmt (dump_file, stmt, 0); + } + if (fn == NULL_TREE) + { + /* It's indirect call, check the callee pointer. Address value val + is not escaped itself by calling. */ + tree callee = gimple_call_fn (stmt); + if (callee && val && operand_equal_p (callee, val)) + return false; + return true; + } + if (DECL_IS_OPERATOR_NEW_P (fn) || DECL_IS_OPERATOR_DELETE_P (fn) + || DECL_CXX_CONSTRUCTOR_P (fn) || DECL_CXX_DESTRUCTOR_P (fn)) + return false; + unsigned argn = UINT_MAX; + for (unsigned int i = 0; i < gimple_call_num_args (call); ++i) + if (operand_equal_p (val, gimple_call_arg (call, i))) + { + argn = i; + break; + } + if (argn == UINT_MAX) + return false; + unsigned darg = fndecl_dealloc_argno (fn); + if (dump_file && (dump_flags & TDF_DETAILS) && darg != UINT_MAX) + { + fprintf (dump_file, "fndecl_dealloc_argno: %d %d: ", argn, darg); + print_generic_expr (dump_file, val); + print_generic_expr (dump_file, gimple_call_arg (call, darg)); + fprintf (dump_file, "\n"); + } + if (darg != UINT_MAX && operand_equal_p (val, gimple_call_arg (call, darg))) + return false; + if (is_builtin_call_no_escape (fn)) + return false; + + /* Check escape by fn's argument N. Check cache. */ + if (func_arg_escape.count (fn) && func_arg_escape[fn].count (argn)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "is_call_escape_addr_value: "); + print_generic_expr (dump_file, fn); + fprintf (dump_file, " #%d => %d\n", + argn, func_arg_escape[fn][argn]); + } + return func_arg_escape[fn][argn]; + } + + bool res; + /* Break recursion. */ + if (funcs_in_processing.count (fn) && funcs_in_processing[fn].count (argn)) + { + func_arg_escape[fn][argn] = false; + res = false; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "is_call_escape_addr_value: "); + print_generic_expr (dump_file, fn); + fprintf (dump_file, " #%d recursion\n", argn); + } + } + else + { + funcs_in_processing[fn].insert (argn); + res = evaluate_fn_arg (fn, argn); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "is_call_escape_addr_value: "); + print_generic_expr (dump_file, fn); + fprintf (dump_file, " #%d evaluated %d\n", argn, res); + } + funcs_in_processing[fn].erase (argn); + func_arg_escape[fn][argn] = res; + } + return res; +} + +static bool +is_escape_addr_value (gimple *stmt, tree val) +{ + if (is_gimple_call (stmt)) + return is_call_escape_addr_value (stmt, val); + /* TODO: arith ops, stores, addrs, clobbers. */ + if (gimple_code (stmt) == GIMPLE_COND) + return false; + if (is_gimple_assign (stmt)) + { + enum tree_code c = gimple_assign_rhs_code (stmt); + if (c == PLUS_EXPR || c == MINUS_EXPR || c == MULT_EXPR || c == MEM_REF + || c == COMPONENT_REF) + return false; + if (c == ADDR_EXPR) + { + tree lhs1 = gimple_get_lhs (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "is_escape_addr_value addr_expr\n"); + + if (TREE_CODE (lhs1) != SSA_NAME) + return true; + auto_vec use_list; + auto_vec ops; + find_final_uses (lhs1, use_list, ops); + if (dump_file && (dump_flags & TDF_DETAILS) && use_list.length ()) + dump_stmt_list ("Addr_expr use-stmt list:\n", use_list, &ops); + for (unsigned i = 0; i < use_list.length (); ++i) + if (is_escape_addr_value (use_list[i], ops[i])) + return true; + return false; + } + } + if (gimple_store_p (stmt)) + { + tree rhs = gimple_assign_rhs1 (stmt); + return operand_equal_p (rhs, val); + } + return true; +} + +static bool +is_pass_trought_stmt (gimple *stmt) +{ + if (gimple_code (stmt) == GIMPLE_PHI) + return true; + if (!is_gimple_assign (stmt)) + return false; + enum tree_code c = gimple_assign_rhs_code (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "is_pass_trought_stmt c={%s}\n", + get_tree_code_name (c)); + } + if (c == SSA_NAME || c == NOP_EXPR || c == POINTER_PLUS_EXPR) + return true; + return false; +} + +static void +dump_call_with_attrs (gimple *stmt) +{ + gcc_assert (is_gimple_call (stmt)); + tree fn = gimple_call_fndecl (stmt); + fprintf (dump_file, "call ("); + if (fn) + { + if (DECL_CXX_CONSTRUCTOR_P (fn)) + fprintf (dump_file, "ctor "); + if (DECL_CXX_DESTRUCTOR_P (fn)) + fprintf (dump_file, "dtor "); + if (DECL_IS_OPERATOR_NEW_P (fn)) + fprintf (dump_file, "new "); + if (DECL_IS_OPERATOR_DELETE_P (fn)) + fprintf (dump_file, "del "); + unsigned dealloc_arg = fndecl_dealloc_argno (fn); + if (dealloc_arg != UINT_MAX) + fprintf (dump_file, "dealloc %d ", dealloc_arg); + } + fprintf (dump_file, "): "); +} + +static void +dump_stmt_list (const char *msg, vec &stmts, vec *ops) +{ + fprintf (dump_file, msg); + gcc_assert (!ops || stmts.length () == ops->length ()); + for (unsigned i = 0; i < stmts.length (); ++i) + { + gimple *stmt = stmts[i]; + fprintf (dump_file, "\t"); + if (gimple_assign_load_p (stmt) || gimple_store_p (stmt)) + fprintf (dump_file, gimple_assign_load_p (stmt) ? "ld: " : "st: "); + else if (is_gimple_call (stmt)) + dump_call_with_attrs (stmt); + else + fprintf (dump_file, "{%s}: ", gimple_code_name[gimple_code (stmt)]); + if (ops) + { + print_generic_expr (dump_file, (*ops)[i]); + fprintf (dump_file, " => "); + } + print_gimple_stmt (dump_file, stmt, 0); + } +} + +static void +find_final_uses (tree lhs, vec &uses, vec &ops) +{ + use_operand_p use_p; + imm_use_iterator imm_iter; + auto_vec worklist; + hash_set visited; + gimple *stmt = NULL; + while (lhs) + { + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + stmt = USE_STMT (use_p); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Looking thrue stmt ld/st/call (%d/%d/%d): ", + gimple_assign_load_p (stmt), gimple_store_p (stmt), + is_gimple_call (stmt)); + print_gimple_stmt (dump_file, stmt, 0); + } + if (visited.add (stmt)) + continue; + if (is_pass_trought_stmt (stmt)) + { + tree lhs1 = gimple_get_lhs (stmt); + if (TREE_CODE (lhs1) == SSA_NAME) + { + worklist.safe_push (stmt); + continue; + } + } + uses.safe_push (stmt); + ops.safe_push (lhs); + } + if (!worklist.length ()) + break; + stmt = worklist.pop (); + gcc_assert (is_gimple_assign (stmt) || gimple_code (stmt) == GIMPLE_PHI); + lhs = gimple_get_lhs (stmt); + } + if (dump_file && (dump_flags & TDF_DETAILS) && uses.length ()) + dump_stmt_list ("Final use-stmt list:\n", uses, &ops); +} + +static void +insert_work_or_final_list (tree rhs, gimple *stmt, vec &worklist, + vec &list) +{ + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); + if (def_stmt && gimple_code (def_stmt) != GIMPLE_NOP) + worklist.safe_push (def_stmt); + else + list.safe_push (stmt); +} + +static bool +find_initial_defs (gimple *stmt, vec &def_list) +{ + gcc_assert (is_assign_or_phi (stmt)); + if (is_gimple_call (stmt)) + { + def_list.safe_push (stmt); + return true; + } + tree rhs = gimple_assign_rhs1 (stmt); + gcc_assert (rhs); + if (TREE_CODE (rhs) == CONSTRUCTOR || TREE_CODE (rhs) == ADDR_EXPR + || TREE_CODE (rhs) == COMPONENT_REF) + def_list.safe_push (stmt); + if (TREE_CODE (rhs) == CONSTRUCTOR || CONSTANT_CLASS_P (rhs) + || TREE_CODE (rhs) == ADDR_EXPR || TREE_CODE (rhs) == COMPONENT_REF) + return true; + + if (TREE_CODE (rhs) != SSA_NAME) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "find_initial_defs todo rhs{%s} in stmt: ", + get_tree_code_name (TREE_CODE (rhs))); + print_gimple_stmt (dump_file, stmt, 0); + } + return false; + } + auto_vec worklist; + hash_set visited; + insert_work_or_final_list (rhs, stmt, worklist, def_list); + + while (worklist.length ()) + { + gimple *def_stmt = worklist.pop (); + if (!def_stmt) + continue; + if (visited.add (def_stmt)) + continue; + + if (!is_assign_or_phi (def_stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "find_initial_defs todo 2 stmt{%s}: ", + gimple_code_name[gimple_code (def_stmt)]); + print_gimple_stmt (dump_file, def_stmt, 0); + } + def_list.safe_push (def_stmt); + continue; + } + if (!is_pass_trought_stmt (def_stmt)) + { + def_list.safe_push (def_stmt); + continue; + } + if (gimple_code (def_stmt) == GIMPLE_PHI) + { + for (size_t i = 0; i < gimple_phi_num_args (def_stmt); i++) + { + tree arg = PHI_ARG_DEF (def_stmt, i); + if (CONSTANT_CLASS_P (arg)) + continue; + if (TREE_CODE (arg) == SSA_NAME) + insert_work_or_final_list (arg, def_stmt, worklist, def_list); + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "find_initial_defs todo 3 arg{%s}: ", + get_tree_code_name (TREE_CODE (arg))); + print_generic_expr (dump_file, ssa_name (i)); + fprintf (dump_file, "\n"); + } + def_list.safe_push (def_stmt); + } + } + continue; + } + enum tree_code c = gimple_assign_rhs_code (def_stmt); + if (c == SSA_NAME || c == NOP_EXPR || c == POINTER_PLUS_EXPR) + { + tree rhs1 = gimple_assign_rhs1 (def_stmt); + if (TREE_CODE (rhs1) == SSA_NAME) + insert_work_or_final_list (rhs1, def_stmt, worklist, def_list); + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "find_initial_defs todo 4 rhs1{%s}: ", + get_tree_code_name (TREE_CODE (rhs1))); + print_generic_expr (dump_file, rhs1); + fprintf (dump_file, "\n"); + } + def_list.safe_push (def_stmt); + } + continue; + } + def_list.safe_push (def_stmt); + } + if (dump_file && (dump_flags & TDF_DETAILS) && def_list.length ()) + dump_stmt_list ("Final def-stmt list:\n", def_list, NULL); + + return def_list.length (); +} + +static tree +get_record_base_type (tree mem_type) +{ + tree rtype = NULL_TREE; + if (mem_type == NULL_TREE) + return NULL_TREE; + + if (TREE_CODE (mem_type) == RECORD_TYPE) + rtype = mem_type; + else if (POINTER_TYPE_P (mem_type) + && TREE_CODE (TREE_TYPE (mem_type)) == RECORD_TYPE) + rtype = TREE_TYPE (mem_type); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "From type{%s} ", + get_tree_code_name (TREE_CODE (mem_type))); + print_generic_expr (dump_file, mem_type); + fprintf (dump_file, " to{%s} ", + rtype ? get_tree_code_name (TREE_CODE (rtype)) : "?"); + print_generic_expr (dump_file, rtype); + fprintf (dump_file, "\n"); + } + return rtype; +} + +static bool +get_addr_base_etc (tree mem, tree &base, HOST_WIDE_INT &offset, tree &type) +{ + if (mem == NULL_TREE) + return false; + poly_int64 base_offset; + offset = -1; + base = get_addr_base_and_unit_offset (mem, &base_offset); + + if (base == NULL_TREE) + { + poly_int64 size, max_size; + bool reverse; + base = get_ref_base_and_extent (mem, &base_offset, &size, + &max_size, &reverse); + if (base == NULL_TREE) + return false; + } + + if (!base_offset.is_constant (&offset)) + return false; + + type = TREE_TYPE (base); + if (TREE_CODE (base) != MEM_REF) + return true; + + tree new_offset = TREE_OPERAND (base, 1); + tree new_base = TREE_OPERAND (base, 0); + tree ptr_type = TREE_TYPE (new_base); + gcc_assert (POINTER_TYPE_P (ptr_type)); + + base = new_base; + if (new_offset && TREE_CODE (new_offset) == INTEGER_CST) + { + gcc_assert (tree_fits_uhwi_p (new_offset)); + offset += tree_to_uhwi (new_offset); + } + return true; +} + +static void +analyse_mem_assigns (gimple *stmt) +{ + if (dump_file) + { + fprintf (dump_file, "----- Start analyse_mem_assigns for stmt: "); + print_gimple_stmt (dump_file, stmt, 3, TDF_DETAILS); + } + if (!is_gimple_assign (stmt) && dump_file) + fprintf (dump_file, "stmt is not assign {%s}\n", + gimple_code_name[gimple_code (stmt)]); + + tree lhs = gimple_get_lhs (stmt); + if (dump_file && (dump_flags & TDF_DETAILS) && is_gimple_call (stmt)) + { + dump_call_with_attrs (stmt); + fprintf (dump_file, "\n"); + } + /* TODO: check escape over params. */ + if (lhs == NULL && is_gimple_call (stmt)) + return; + if (lhs == NULL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "STMT WITH NO LHS!!!"); + return; + } + bool is_store = gimple_store_p (stmt); + bool is_load = gimple_assign_load_p (stmt); + tree lhs_type = TREE_TYPE (lhs); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (lhs_type && POINTER_TYPE_P (lhs_type)) + fprintf (dump_file, "Ptr assign {%s}: lhs_type=", + get_tree_code_name (TREE_CODE (TREE_TYPE (lhs_type)))); + else + fprintf (dump_file, "Non-ptr assign {%s}: lhs_type=", + lhs_type ? get_tree_code_name (TREE_CODE (lhs_type)) : "?"); + print_generic_expr (dump_file, lhs_type); + fprintf (dump_file, "\n"); + } + + if (!lhs_type || !POINTER_TYPE_P (lhs_type)) + return; + + tree rhs = is_gimple_assign (stmt) ? gimple_assign_rhs1 (stmt) : NULL_TREE; + tree record_type = NULL_TREE; + HOST_WIDE_INT loff = -1; + tree lbase = NULL_TREE, ltype = NULL_TREE; + if (get_addr_base_etc (lhs, lbase, loff, ltype)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "lhs: base="); + print_generic_expr (dump_file, lbase); + fprintf (dump_file, " off=%ld ltype=", loff); + print_generic_expr (dump_file, ltype); + fprintf (dump_file, "\n"); + } + ltype = get_record_base_type (ltype); + } + else + { + fprintf (dump_file, "lhs: HAVE NOT FOUND all components (b/o/t=" + "%p/%ld/%p) for ", (void *) lbase, loff, (void *) ltype); + print_generic_expr (dump_file, lbase); + fprintf (dump_file, "\n"); + } + + if (rhs && dump_file) + { + fprintf (dump_file, "stmt, rhs=%s: ", + get_tree_code_name (TREE_CODE (rhs))); + print_generic_expr (dump_file, rhs); + fprintf (dump_file, " Type: "); + print_generic_expr (dump_file, TREE_TYPE (rhs)); + fprintf (dump_file, "\n"); + } + + if (is_store && lbase == NULL_TREE) + { + if (dump_file) + fprintf (dump_file, "Broken analysis in stmt.\n"); + return; + } + + tree lhs_mem_type = TREE_TYPE (lbase); + if (TREE_CODE (lbase) == MEM_REF) + { + tree new_offset = TREE_OPERAND (lbase, 1); + tree new_base = TREE_OPERAND (lbase, 0); + tree ptr_type = TREE_TYPE (new_base); + gcc_assert (POINTER_TYPE_P (ptr_type)); + lbase = new_base; + if (new_offset && TREE_CODE (new_offset) == INTEGER_CST) + { + gcc_assert (tree_fits_uhwi_p (new_offset)); + loff += tree_to_uhwi (new_offset); + } + if (lbase && dump_file) + { + fprintf (dump_file, "New: "); + print_generic_expr (dump_file, new_base); + fprintf (dump_file, " "); + print_generic_expr (dump_file, new_offset); + fprintf (dump_file, " off=%ld\n", loff); + } + } + + if (dump_file) + { + lhs_mem_type = TREE_TYPE (lbase); + fprintf (dump_file, "lhs_mem_type: {%s}: ", + get_tree_code_name (TREE_CODE ((lhs_mem_type)))); + print_generic_expr (dump_file, (lhs_mem_type)); + fprintf (dump_file, "\n"); + } + + tree rbase = NULL_TREE, rtype = NULL_TREE; + HOST_WIDE_INT roff = -1; + if (lhs && (is_load || is_gimple_call (stmt))) + { + if (get_addr_base_etc (rhs, rbase, roff, rtype)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "rhs: base="); + print_generic_expr (dump_file, rbase); + fprintf (dump_file, " off=%ld rtype=", roff); + print_generic_expr (dump_file, rtype); + fprintf (dump_file, "\n"); + } + rtype = get_record_base_type (rtype); + if (rtype) + { + gcc_assert (lhs); + vec use_list = vNULL; + vec ops = vNULL; + if (TREE_CODE (lhs) == SSA_NAME) + find_final_uses (lhs, use_list, ops); + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "UNIMPLEMENTED support of %s in lhs\n", + get_tree_code_name (TREE_CODE (lhs))); + for (unsigned i = 0; i < use_list.length (); ++i) + if (is_escape_addr_value (use_list[i], ops[i])) + add_record_access (rtype, roff, cfun, stmt, + NULL, use_list[i]); + use_list.release (); + ops.release (); + } + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "rhs: HAVE NOT FOUND all components (b/o/t=" + "%p/%ld/%p) for ", (void *) rbase, roff, (void *) rtype); + print_generic_expr (dump_file, rbase); + fprintf (dump_file, "\n"); + } + } + + if (!is_store) + return; + vec def_list = vNULL; + + /* TODO: support pointer assigns with nonzero offset in rhs. */ + if (need_to_collect_object (lbase, lhs_mem_type) && rbase) + add_global_pointer_val (lbase, loff, rbase, roff == 0); + + if (!lhs_mem_type || (TREE_CODE (lhs_mem_type) != RECORD_TYPE + && !POINTER_TYPE_P (lhs_mem_type))) + return; + + record_type = get_record_base_type (ltype); + + /* TODO: check it's not escape. */ + if (record_type == NULL_TREE) + return; + if (is_store) + { + bool found = find_initial_defs (stmt, def_list); + if (dump_file) + { + fprintf (dump_file, "Store found with off=%ld", loff); + fprintf (dump_file, " record_type="); + print_generic_expr (dump_file, record_type); + fprintf (dump_file, "\n"); + if (found == false) + fprintf (dump_file, "SOURCE is not found\n"); + } + for (unsigned i = 0; i < def_list.length (); ++i) + add_record_access (record_type, loff, cfun, stmt, def_list[i], NULL); + } + def_list.release (); +} + /* Analyze cast/copy assign stmt to find type aliases. */ static void @@ -5055,6 +6098,55 @@ analyze_assign_stmt (gimple *stmt) maybe_register_non_void_aliases (lhs_type, rhs_type); } +/* Check gimple assign for address taken objects. */ + +static void +analyze_addr_expr (gimple* stmt) +{ + enum tree_code c = gimple_assign_rhs_code (stmt); + tree lhs = gimple_assign_lhs (stmt); + tree op0 = gimple_assign_rhs1 (stmt); + tree op1 = gimple_assign_rhs2 (stmt); + tree base = NULL_TREE; + if ((c == POINTER_PLUS_EXPR && TREE_CODE (op0) == ADDR_EXPR + && TREE_CODE (op1) == SSA_NAME) || c == ADDR_EXPR) + base = TREE_OPERAND (op0, 0); + else + return; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "----- Start analyze_addr_expr: FOUND with base " + "{%s}:", get_tree_code_name (TREE_CODE (base))); + print_gimple_stmt (dump_file, stmt, 0); + } + gcc_assert (lhs); + if (TREE_CODE (lhs) == VAR_DECL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "analyze_addr_expr: escape over VAR_DECL " + "(glob=%d) in lhs: ", is_global_var (lhs)); + print_generic_expr (dump_file, lhs); + fprintf (dump_file, "\n"); + } + mark_address_taken (base, true); + } + else if (TREE_CODE (lhs) == SSA_NAME) + { + vec use_list = vNULL; + vec ops = vNULL; + find_final_uses (lhs, use_list, ops); + for (unsigned i = 0; i < use_list.length (); ++i) + if (is_escape_addr_value (use_list[i], ops[i])) + mark_address_taken (base, true); + use_list.release (); + ops.release (); + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "analyze_addr_expr: UNIMPLEMENTED support of %s " + "in lhs\n", get_tree_code_name (TREE_CODE (lhs))); +} + /* Walk all fn's stmt to analyze assigns. */ static void @@ -5063,10 +6155,19 @@ analyze_assigns (function* fn) push_cfun (fn); basic_block bb; gimple_stmt_iterator si; + /* TODO: maybe we need to analyze phis. */ FOR_EACH_BB_FN (bb, fn) for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) { gimple *stmt = gsi_stmt (si); + if (flag_ipa_alias) + { + if (gimple_store_p (stmt) || gimple_assign_load_p (stmt) + || is_gimple_call (stmt)) + analyse_mem_assigns (stmt); + if (is_gimple_assign (stmt)) + analyze_addr_expr (stmt); + } if (!gimple_assign_cast_p (stmt) && !gimple_assign_copy_p (stmt)) continue; analyze_assign_stmt (stmt); @@ -5099,6 +6200,7 @@ collect_type_alias_sets () function *fn = DECL_STRUCT_FUNCTION (n->decl); if (!fn) continue; + if (dump_file && (dump_flags & TDF_DETAILS)) dump_function_node_info (n); /* Analyze direct/indirect function calls. */ @@ -5109,6 +6211,8 @@ collect_type_alias_sets () /* Analyze assign (with casts) statements. */ analyze_assigns (fn); } + if (dump_file) + dump_record_accesses (); } static void @@ -5283,6 +6387,56 @@ process_alias_type_sets () } } +static void +process_data_accesses () +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\nProcess data accesses:\n"); + /* Walk over all found record types and copy their addess taken attribute + to canonical types. */ + for (record_access_map::iterator it = record_accesses->begin (); + it != record_accesses->end (); it++) + { + tree type = it->first; + if (type == NULL_TREE || TREE_CODE (type) != RECORD_TYPE) + continue; + tree ctype = TYPE_CANONICAL (type); + if (address_taken_types->count (type)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Copy addr-taken of "); + print_generic_expr (dump_file, type); + fprintf (dump_file, " to canonical "); + print_generic_expr (dump_file, ctype); + fprintf (dump_file, " (%d)->(%d)\n", + TYPE_UID (type), TYPE_UID (ctype)); + } + address_taken_types->insert (ctype); + } + } + /* If a canonical type of the record type is not address taken set fields + non-alias info to the record. */ + for (record_access_map::iterator it = record_accesses->begin (); + it != record_accesses->end (); it++) + { + tree type = it->first; + if (type == NULL_TREE || TREE_CODE (type) != RECORD_TYPE) + continue; + tree ctype = TYPE_CANONICAL (type); + if (!address_taken_types->count (ctype)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Set fields non-alias bit "); + print_generic_expr (dump_file, type); + fprintf (dump_file, " (%d)\n", TYPE_UID (type)); + } + TYPE_FIELDS_NONALIAS (type) = 1; + } + } +} + static void dump_unsafe_and_canonical_types () { @@ -5312,6 +6466,20 @@ dump_unsafe_and_canonical_types () else fprintf (dump_file, " null\n"); } + fprintf (dump_file, "\nList of address taken global ptrs or records:\n"); + for (decl_set::iterator it = address_taken_globals->begin (); + it != address_taken_globals->end (); ++it) + { + print_generic_expr (dump_file, *it); + fprintf (dump_file, " (%d)\n", DECL_UID (*it)); + } + fprintf (dump_file, "\nList of address taken ptr or record types:\n"); + for (decl_set::iterator it = address_taken_types->begin (); + it != address_taken_types->end (); ++it) + { + print_generic_expr (dump_file, *it); + fprintf (dump_file, " (%d)\n", TYPE_UID (*it)); + } } static void @@ -5683,6 +6851,8 @@ collect_function_type_aliases () if (dump_file && (dump_flags & TDF_DETAILS)) dump_function_type_aliases_list (); + + process_data_accesses (); } static void @@ -6251,6 +7421,31 @@ remove_type_alias_map (MAP *map) delete map; } +static void +remove_access_map (record_access_map *map) +{ + for (record_access_map::iterator it = map->begin (); + it != map->end (); it++) + { + field_access_map *m2 = it->second; + for (field_access_map::iterator it2 = m2->begin (); + it2 != m2->end (); it2++) + for (unsigned int i = 0; i < it2->second.length (); i++) + delete it2->second[i]; + delete m2; + } + delete map; +} + +static void +remove_global_map (global_map *map) +{ + for (global_map::iterator it = map->begin (); it != map->end (); it++) + delete it->second; + delete map; +} + + /* The ipa indirect call promotion pass. Run required analysis and optimize indirect calls. When indirect call has only one target, promote it into a direct call. */ @@ -6265,6 +7460,10 @@ ipa_icp (void) ctype_map = new type_map; unsafe_types = new type_set; type_uid_map = new uid_to_type_map; + record_accesses = new record_access_map; + global_pointer_vals = new global_map; + address_taken_globals = new decl_set; + address_taken_types = new decl_set; /* Find type aliases, fill the function signature map and optimize indirect calls. */ @@ -6276,9 +7475,13 @@ ipa_icp (void) remove_type_alias_map (fta_map); remove_type_alias_map (cbase_to_ptype); remove_type_alias_map (fs_map); + remove_access_map (record_accesses); + remove_global_map (global_pointer_vals); delete ctype_map; delete unsafe_types; delete type_uid_map; + delete address_taken_globals; + delete address_taken_types; return optimized ? TODO_remove_functions : 0; } diff --git a/gcc/params.opt b/gcc/params.opt index a716f2cc468bea2e0d25fa13ea76056c5cbf8f31..1deb562e93c6cee91e42ad0111a62a51fbc05c12 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -749,6 +749,10 @@ Maximum number of strings for which strlen optimization pass will track string l Common Joined UInteger Var(param_max_tree_if_conversion_phi_args) Init(4) IntegerRange(2, 65536) Param Optimization Maximum number of arguments in a PHI supported by TREE if-conversion unless the loop is marked with simd pragma. +-param=max-tree-partial-if-conversion-stmts= +Common Joined UInteger Var(param_max_tree_partial_if_conversion_stmts) Init(4) Param Optimization +Maximum number of statements in a basic block to be considered for partial if-conversion. + -param=max-unroll-times= Common Joined UInteger Var(param_max_unroll_times) Init(8) Param Optimization The maximum number of unrollings of a single loop. diff --git a/gcc/testsuite/gcc.dg/icp8.c b/gcc/testsuite/gcc.dg/icp8.c new file mode 100644 index 0000000000000000000000000000000000000000..7865da62c60d2d01ba2c2222c78f6f924579db59 --- /dev/null +++ b/gcc/testsuite/gcc.dg/icp8.c @@ -0,0 +1,40 @@ +/* Check that we analyze global vars with inits. */ +/* { dg-do run } */ +/* { dg-options "-O2 -flto -ficp -fdump-ipa-icp-details -fdump-ipa-icp=./icp8.c.085i.icp" } */ + +#define N 1000 +int glob1 = 10; +int glob2 = 20; +int garr1[N]; +int garr2[N]; + +typedef int (*fpt) (int a); +extern fpt fp1; +extern fpt fp2; + +int f1 (int a) +{ + for (int i = a; i < glob1; i++) + garr1[i] = garr2[i] + glob2; + fp2 = fp1; + return glob2; +} + +int f2 (int a) +{ + for (int i = a; i < glob2; i++) + garr2[i] = garr1[i] + glob1; + fp1 = fp2; + return glob1; +} + +fpt fp1 = f1; +fpt fp2 = f2; + +int main() +{ + return (fp1(1) + fp2(2)) != 40; + } + + +/* { dg-final { scan-ipa-dump-times "Mismatch of var and init types" 2 "icp"} } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ifc-13.c b/gcc/testsuite/gcc.dg/tree-ssa/ifc-13.c new file mode 100644 index 0000000000000000000000000000000000000000..08f35b3ba3f9f90782e696ddc372d238f70533eb --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ifc-13.c @@ -0,0 +1,20 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -ftree-loop-partial-if-convert -fdump-tree-ifcvt-stats-blocks-details" } */ +/* { dg-require-visibility "" } */ + +int a[1024] = {0.0}; +int b[1024] = {0.0}; +int c[1024] = {0.0}; + +int foo (float *x) +{ + for (int i = 0; i < 1024; i++) + { + c[i] = (x[i] > 0.0) ? a[i] : b[i]; + x[i] = (c[i] > 0) ? a[b[i]] : b[a[i]]; + } + + return 0; +} + +/* { dg-final { scan-tree-dump-times "Applying partial if-conversion" 1 "ifcvt" } } */ diff --git a/gcc/tree-core.h b/gcc/tree-core.h index f1c2b6413a3ca73dcfefa1a09c1408cf1a9e4e6b..38e6d7212a9512f5d5847b9baa7476b1fb476dde 100644 --- a/gcc/tree-core.h +++ b/gcc/tree-core.h @@ -1280,6 +1280,9 @@ struct GTY(()) tree_base { ENUM_IS_OPAQUE in ENUMERAL_TYPE + TYPE_FIELDS_NONALIAS in + RECORD_TYPE + protected_flag: TREE_PROTECTED in diff --git a/gcc/tree-if-conv.cc b/gcc/tree-if-conv.cc index 3574c673ed87069bc0e952fe13c22f6f5aa823fb..d4fb7950c3ce4b9f99c685890c6a32fd717ee1e3 100644 --- a/gcc/tree-if-conv.cc +++ b/gcc/tree-if-conv.cc @@ -145,6 +145,12 @@ static bool need_to_rewrite_undefined; before phi_convertible_by_degenerating_args. */ static bool any_complicated_phi; +/* True if we run partial if-conversion, i.e. convert some ifs in a loop. */ +static bool partial_p; + +/* Contains set of BBs which can be converted by partial if-conversion. */ +static hash_set if_conv_bbs; + /* Hash for struct innermost_loop_behavior. It depends on the user to free the memory. */ @@ -580,7 +586,7 @@ add_to_dst_predicate_list (class loop *loop, edge e, if (!flow_bb_inside_loop_p (loop, e->dest)) return; - if (!is_true_predicate (prev_cond)) + if (!is_true_predicate (prev_cond) && !partial_p) cond = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, prev_cond, cond); @@ -676,6 +682,14 @@ if_convertible_phi_p (class loop *loop, basic_block bb, gphi *phi) print_gimple_stmt (dump_file, phi, 0, TDF_SLIM); } + /* Don't convert stmts with complex operands since cplxlower fails on it. */ + if (partial_p && TREE_CODE (TREE_TYPE (PHI_RESULT (phi))) == COMPLEX_TYPE) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "complex type in phi\n"); + return false; + } + if (bb != loop->header && gimple_phi_num_args (phi) > 2 && !phi_convertible_by_degenerating_args (phi)) @@ -1029,6 +1043,14 @@ if_convertible_gimple_assign_stmt_p (gimple *stmt, return false; } + /* Don't convert stmts with complex operands since cplxlower fails on it. */ + if (partial_p && TREE_CODE (TREE_TYPE (lhs)) == COMPLEX_TYPE) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "complex type in assign\n"); + return false; + } + /* tree-into-ssa.cc uses GF_PLF_1, so avoid it, because in between if_convertible_loop_p and combine_blocks we can perform loop versioning. */ @@ -1112,6 +1134,168 @@ if_convertible_stmt_p (gimple *stmt, vec refs) } } +/* Return true if the given BB is a successor of a simple condition i.e. + it's BB after if-then or if-then-else clause. It can be represented by + the following CFG: + | pred_bb + | if (x) goto bb1 (e1) else goto bb2 (e2) + | end_pred_bb + | bb1 + | ... + | goto bb + | end_bb1 + | bb2 + | ... + | goto bb + | end_bb2 + | bb + | ... + | end_bb + where bb2 may be missing. */ + +static bool +is_simple_condition_successor (basic_block bb, bool check_conv_bbs) +{ + if (EDGE_COUNT (bb->preds) != 2) + return false; + edge e1 = EDGE_PRED (bb, 0); + edge e2 = EDGE_PRED (bb, 1); + basic_block bb1 = e1->src; + basic_block bb2 = e2->src; + if (!single_pred_p (bb1) && !single_pred_p (bb2)) + return false; + + basic_block pred_bb = NULL; + if (single_pred_p (bb1) && single_succ_p (bb1)) + pred_bb = single_pred_edge (bb1)->src; + else if (single_pred_p (bb2) && single_succ_p (bb2)) + pred_bb = single_pred_edge (bb2)->src; + else + return false; + + if (EDGE_COUNT (pred_bb->succs) != 2) + return false; + + if (pred_bb == bb2) + return check_conv_bbs ? if_conv_bbs.contains (bb1) : true; + if (pred_bb == bb1) + return check_conv_bbs ? if_conv_bbs.contains (bb2) : true; + + if (!single_pred_p (bb1) || !single_pred_p (bb2)) + return false; + if (single_pred_edge (bb1)->src == single_pred_edge (bb2)->src) + return check_conv_bbs ? (if_conv_bbs.contains (bb1) + && if_conv_bbs.contains (bb2)) : true; + return false; +} + + +/* Return true if this BB has simple condition with the following jumps to + conditional BBs. So we detect the following CFG: + | bb + | if (x) goto bb1 (e1) else goto bb2 (e2) + | end_bb + | bb1 + | ... + | goto succ_bb + | end_bb1 + | bb2 + | ... + | goto succ_bb + | end_bb2 + | succ_bb + | ... + | end_succ_bb + where bb2 may be missing. */ + +static bool +has_simple_condition (basic_block bb, bool check_conv_bbs) +{ + if (EDGE_COUNT (bb->succs) != 2) + return false; + edge e1 = EDGE_SUCC (bb, 0); + edge e2 = EDGE_SUCC (bb, 1); + basic_block bb1 = e1->dest; + basic_block bb2 = e2->dest; + if (!single_succ_p (bb1) && !single_succ_p (bb2)) + return false; + + basic_block succ_bb = NULL; + if (single_succ_p (bb1) && single_pred_p (bb1)) + succ_bb = single_succ_edge (bb1)->dest; + else if (single_succ_p (bb2) && single_pred_p (bb2)) + succ_bb = single_succ_edge (bb2)->dest; + else + return false; + + if (EDGE_COUNT (succ_bb->preds) != 2) + return false; + + if (succ_bb == bb2) + return check_conv_bbs ? if_conv_bbs.contains (bb1) : true; + if (succ_bb == bb1) + return check_conv_bbs ? if_conv_bbs.contains (bb2) : true; + + if (!single_succ_p (bb1) || !single_succ_p (bb2)) + return false; + if (single_succ_edge (bb1)->dest == single_succ_edge (bb2)->dest) + return check_conv_bbs ? (if_conv_bbs.contains (bb1) + && if_conv_bbs.contains (bb2)) : true; + return false; +} + +/* Return true if this BB can be part of simple condition i.e. it's a basic + block executed conditionally. It detects the following CFG: + | pred_bb + | if (x) goto bb1 (pred_edge) else goto bb2 (alt_edge) + | end_pred_bb + | bb + | ... + | goto succ_bb + | end_bb + | alt_bb + | ... + | goto succ_bb + | end_alt_bb + | succ_bb + | ... + | end_succ_bb + where alt_bb may be missing. BB and alt_bb may be swapped. */ + +static bool +can_be_simple_conditional_bb (basic_block bb, basic_block *alt_bb) +{ + if (!single_pred_p (bb) || !single_succ_p (bb)) + return false; + edge pred_edge = single_pred_edge (bb); + basic_block pred_bb = pred_edge->src; + basic_block succ_bb = single_succ_edge (bb)->dest; + if (EDGE_COUNT (pred_bb->succs) != 2 || EDGE_COUNT (succ_bb->preds) != 2) + return false; + + edge e, alt_edge = NULL; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, pred_bb->succs) + if (e != pred_edge) + { + alt_edge = e; + break; + } + if (alt_edge == NULL) + return false; + if (alt_edge->dest == succ_bb) + return true; + else if (single_succ_p (alt_edge->dest) + && single_succ_edge (alt_edge->dest)->dest == succ_bb) + { + if (alt_bb) + *alt_bb = alt_edge->dest; + } + else + return false; + return true; +} + /* Assumes that BB has more than 1 predecessors. Returns false if at least one successor is not on critical edge and true otherwise. */ @@ -1150,13 +1334,21 @@ if_convertible_bb_p (class loop *loop, basic_block bb, basic_block exit_bb) if (EDGE_COUNT (bb->succs) > 2) return false; + if (partial_p && !can_be_simple_conditional_bb (bb, NULL)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv: bb %d cannot be simple " + "conditional bb, remove it from conversion\n", bb->index); + if_conv_bbs.remove (bb); + return false; + } gimple *last = last_stmt (bb); if (gcall *call = safe_dyn_cast (last)) if (gimple_call_ctrl_altering_p (call)) return false; - if (exit_bb) + if (exit_bb && !partial_p) { if (bb != loop->latch) { @@ -1411,19 +1603,28 @@ if_convertible_loop_p_1 (class loop *loop, vec *refs) if (!ifc_bbs) { if (dump_file && (dump_flags & TDF_DETAILS)) - fprintf (dump_file, "Irreducible loop\n"); + fprintf (dump_file, "Irreducible loop %d\n", loop->num); return false; } for (i = 0; i < loop->num_nodes; i++) { basic_block bb = ifc_bbs[i]; - - if (!if_convertible_bb_p (loop, bb, exit_bb)) + bool if_conv_bb = if_convertible_bb_p (loop, bb, exit_bb); + if (!partial_p && !if_conv_bb) return false; if (bb_with_exit_edge_p (loop, bb)) exit_bb = bb; + + /* Partial optimization can convert some of the loop's bbs. */ + if (partial_p && if_conv_bb && !(bb == exit_bb && bb == loop->header)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv: add bb %d for " + "conversion\n", bb->index); + if_conv_bbs.add (bb); + } } for (i = 0; i < loop->num_nodes; i++) @@ -1442,7 +1643,11 @@ if_convertible_loop_p_1 (class loop *loop, vec *refs) tree label = gimple_label_label (as_a (gsi_stmt (gsi))); if (DECL_NONLOCAL (label) || FORCED_LABEL (label)) - return false; + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "loop has nonlocal labels\n"); + return false; + } } /* Fallthru. */ case GIMPLE_ASSIGN: @@ -1452,6 +1657,11 @@ if_convertible_loop_p_1 (class loop *loop, vec *refs) gimple_set_uid (gsi_stmt (gsi), 0); break; default: + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "loop has unexpected stmt:\n"); + print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM); + } return false; } } @@ -1510,12 +1720,27 @@ if_convertible_loop_p_1 (class loop *loop, vec *refs) { basic_block bb = ifc_bbs[i]; gimple_stmt_iterator itr; + bool all_stmt_convertable = true; /* Check the if-convertibility of statements in predicated BBs. */ - if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) + if (partial_p || !dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) for (itr = gsi_start_bb (bb); !gsi_end_p (itr); gsi_next (&itr)) + { if (!if_convertible_stmt_p (gsi_stmt (itr), *refs)) - return false; + { + all_stmt_convertable = false; + if (partial_p) + break; + return false; + } + } + if (partial_p && !all_stmt_convertable) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv: remove bb %d from " + "conversion\n", bb->index); + if_conv_bbs.remove (bb); + } } /* Checking PHIs needs to be done after stmts, as the fact whether there @@ -1531,7 +1756,8 @@ if_convertible_loop_p_1 (class loop *loop, vec *refs) } if (dump_file) - fprintf (dump_file, "Applying if-conversion\n"); + fprintf (dump_file, "Applying %sif-conversion to loop %d\n", + partial_p ? "partial " : "", loop->num); return true; } @@ -1551,6 +1777,9 @@ if_convertible_loop_p (class loop *loop) edge_iterator ei; bool res = false; vec refs; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Analyze %sconversion of loop %d\n", + partial_p ? "partial " : "", loop->num); /* Handle only innermost loop. */ if (!loop || loop->inner) @@ -1578,9 +1807,10 @@ if_convertible_loop_p (class loop *loop) /* If one of the loop header's edge is an exit edge then do not apply if-conversion. */ - FOR_EACH_EDGE (e, ei, loop->header->succs) - if (loop_exit_edge_p (loop, e)) - return false; + if (!partial_p) + FOR_EACH_EDGE (e, ei, loop->header->succs) + if (loop_exit_edge_p (loop, e)) + return false; refs.create (5); res = if_convertible_loop_p_1 (loop, &refs); @@ -2157,6 +2387,13 @@ predicate_all_scalar_phis (class loop *loop) if (bb == loop->header) continue; + if (partial_p && !is_simple_condition_successor (bb, true)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv predicate_all_scalar_phis: " + "skip bb %d\n", bb->index); + continue; + } phi_gsi = gsi_start_phis (bb); if (gsi_end_p (phi_gsi)) @@ -2199,8 +2436,16 @@ insert_gimplified_predicates (loop_p loop) reset_bb_predicate (bb); continue; } - stmts = bb_predicate_gimplified_stmts (bb); + + if (partial_p && !if_conv_bbs.contains (bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv insert_gimplified_predicates:" + " skip bb %d\n", bb->index); + continue; + } + if (stmts) { if (need_to_predicate) @@ -2520,6 +2765,9 @@ predicate_statements (loop_p loop) bool swap; int index; + if (partial_p && !if_conv_bbs.contains (bb)) + continue; + if (is_true_predicate (cond)) continue; @@ -2658,11 +2906,23 @@ remove_conditions_and_labels (loop_p loop) if (bb_with_exit_edge_p (loop, bb) || bb == loop->latch) continue; - + bool simple_conv_cond = partial_p ? has_simple_condition (bb, true) + : false; for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); ) switch (gimple_code (gsi_stmt (gsi))) { case GIMPLE_COND: + if (partial_p && !simple_conv_cond) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Partial if-conv keep cond: "); + print_gimple_stmt (dump_file, gsi_stmt (gsi), 0, TDF_SLIM); + } + gsi_next (&gsi); + break; + } + /* FALLTHRU. */ case GIMPLE_LABEL: gsi_remove (&gsi, true); break; @@ -2683,6 +2943,274 @@ remove_conditions_and_labels (loop_p loop) } } +/* Remove bb from the conversion set if the conversion cost is too high. */ + +static void +apply_partial_if_conv_cost_model () +{ + for (hash_set::iterator it = if_conv_bbs.begin (); + it != if_conv_bbs.end (); ++it) + { + basic_block bb = *it; + int n_stmts = 0; + gimple_stmt_iterator gsi; + /* The current heuristic is very simple: if the number of stmts in bb + exceeds a threshold, the conversion is disabled. */ + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + if (!is_gimple_debug (gsi_stmt (gsi))) + n_stmts++; + if (n_stmts <= param_max_tree_partial_if_conversion_stmts) + continue; + if_conv_bbs.remove (bb); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv cost: remove bb %d from " + "if_conv_bbs because it contains %d stmts (threshold " + "is %d)\n", bb->index, n_stmts, + param_max_tree_partial_if_conversion_stmts); + } +} + +/* For the given loop remove bbs from if_conv_bbs if their alternative bbs + cannot be converted. */ + +static void +cleanup_partial_if_conv_bbs () +{ + gcc_assert (partial_p); + for (hash_set::iterator it = if_conv_bbs.begin (); + it != if_conv_bbs.end (); ++it) + { + basic_block bb = *it, alt_bb = NULL; + if (!can_be_simple_conditional_bb (bb, &alt_bb)) + continue; + if (!alt_bb || if_conv_bbs.contains (alt_bb)) + continue; + if_conv_bbs.remove (bb); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv cleanup: remove bb %d from " + "if_conv_bbs because we don't convet alt bb %d\n", + bb->index, alt_bb->index); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Partial if-conv cleanup: convert " + "following blocks:\n"); + for (hash_set::iterator it = if_conv_bbs.begin (); + it != if_conv_bbs.end (); ++it) + fprintf (dump_file, "\tbb %d\n", (*it)->index); + } +} + +static void +evaluate_merge_bbs (unsigned int num_nodes, + hash_map &map) +{ + for (unsigned int i = 1; i < num_nodes; i++) + { + basic_block bb = ifc_bbs[i]; + basic_block merge_bb = get_immediate_dominator (CDI_DOMINATORS, bb); + map.put (bb, merge_bb); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv: set merge-bb %d for bb %d\n", + merge_bb->index, bb->index); + } +} + +/* Remove virtual phi in the bb and update last_vdef. */ + +static void +release_virtual_op (basic_block bb, tree &last_vdef) +{ + gphi *vphi = get_virtual_phi (bb); + if (!vphi) + return; + /* When there's just loads inside the loop a stray virtual + PHI merging the uses can appear, update last_vdef from + it. */ + if (!last_vdef) + last_vdef = gimple_phi_arg_def (vphi, 0); + imm_use_iterator iter; + use_operand_p use_p; + gimple *use_stmt; + FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) + { + FOR_EACH_IMM_USE_ON_STMT (use_p, iter) + SET_USE (use_p, last_vdef); + } + if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "If-conv remove vphi in bb %d:\n",bb->index); + print_gimple_stmt (dump_file, vphi, 0, TDF_SLIM); + } + gimple_stmt_iterator gsi = gsi_for_stmt (vphi); + remove_phi_node (&gsi, true); +} + +/* Return true if there is an edge between src_bb and dest_bb. */ + +static bool +has_edge (basic_block src_bb, basic_block dest_bb) +{ + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, src_bb->succs) + if (e->dest == dest_bb) + return true; + return false; +} + +/* Redirect edges before bb deletion in combine_blocks. */ + +static void +redirect_edges_before_bb_delete (basic_block bb, + hash_map &map) +{ + edge e; + for (edge_iterator ei = ei_start (bb->succs); (e = ei_safe_edge (ei));) + { + basic_block sbb = e->dest; + basic_block merge_bb = *map.get (sbb); + if (!has_edge (merge_bb, sbb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv in redirect_edges_before_bb_" + "delete: redirect edge %d->%d to %d->%d.\n", bb->index, + sbb->index, merge_bb->index, sbb->index); + gcc_assert (single_pred_p (bb)); + edge pred_e = single_pred_edge (bb); + gcc_assert (pred_e->src == merge_bb); + /* It was a conditional edge, make it FALLTHRU and redirect. */ + pred_e->flags |= EDGE_FALLTHRU; + redirect_edge_and_branch (pred_e, sbb); + } + else + { + ei_next (&ei); + continue; + } + } +} + +/* Set immediate dominators and EDGE_FALLTHRU for bbs of the loop, skipping + deleted bbs. */ + +static void +set_immediate_dominators_and_edge_fallthru (unsigned int num_nodes, + hash_set &deleted_bbs) +{ + for (unsigned int i = 0; i < num_nodes; i++) + { + basic_block bb = ifc_bbs[i]; + if (deleted_bbs.contains (bb)) + continue; + if (single_pred_p (bb)) + { + edge e = single_pred_edge (bb); + set_immediate_dominator (CDI_DOMINATORS, e->dest, e->src); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv set immediate dominator to " + "%d->%d.\n", e->src->index, e->dest->index); + } + /* We need to set EDGE_FALLTHRU to avoid fail in gimple_merge_blocks + on gcc_aassert. */ + if (!single_succ_p (bb)) + continue; + edge e = single_succ_edge (bb); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv set EDGE_FALLTHRU to %d->%d.\n", + e->src->index, e->dest->index); + e->flags &= ~(EDGE_TRUE_VALUE | EDGE_FALSE_VALUE); + e->flags |= EDGE_FALLTHRU; + } +} + +static void +print_vops_info (const char *info, gimple *stmt, tree vop) +{ + if (!dump_file || !(dump_flags & TDF_DETAILS)) + return; + if (info) + fprintf (dump_file, info); + print_gimple_stmt (dump_file, stmt, 0, TDF_SLIM); + print_generic_expr (dump_file, vop, TDF_SLIM); + fprintf (dump_file, "\n"); +} + +/* Correct virtual operands in bb taking into account bb to last_vdef map. */ + +static void +correct_virtual_ops_in_bb (basic_block bb, + hash_map &bb_last_vdef) +{ + tree last_vdef = NULL_TREE; + gphi *vphi = get_virtual_phi (bb); + if (vphi) + { + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block pred_bb = e->src; + tree *vdef = bb_last_vdef.get (pred_bb); + if (vdef) + { + use_operand_p op = PHI_ARG_DEF_PTR_FROM_EDGE (vphi, e); + SET_USE (op, *vdef); + } + } + last_vdef = gimple_phi_result (vphi); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Update last_vdef from vphi in bb %d:\n", + bb->index); + print_vops_info (NULL, vphi, last_vdef); + } + else + { + /* If there are no vphis, all preds have the same vdef. */ + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, bb->preds) + { + basic_block pred_bb = e->src; + tree *vdef = bb_last_vdef.get (pred_bb); + if (!vdef) + continue; + last_vdef = *vdef; + break; + } + } + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (last_vdef) + { + use_operand_p use_p = ssa_vuse_operand (stmt); + if (use_p && USE_FROM_PTR (use_p) != last_vdef) + { + print_vops_info ("Update vuse in stmt:\n", stmt, last_vdef); + SET_USE (use_p, last_vdef); + } + } + else if (gimple_vuse (stmt) && !gimple_vdef (stmt)) + { + last_vdef = gimple_vuse (stmt); + print_vops_info ("Update last_vdef from VUSE in stmt:\n", + stmt, last_vdef); + } + + if (gimple_vdef (stmt)) + { + last_vdef = gimple_vdef (stmt); + print_vops_info ("Update last_vdef from VDEF in stmt:\n", + stmt, last_vdef); + } + } + bb_last_vdef.put (bb, last_vdef); +} + /* Combine all the basic blocks from LOOP into one or two super basic blocks. Replace PHI nodes with conditional modify expressions. */ @@ -2708,6 +3236,11 @@ combine_blocks (class loop *loop) for (i = 0; i < orig_loop_num_nodes; i++) { bb = ifc_bbs[i]; + if (dump_file && (dump_flags & TDF_DETAILS) + && partial_p && if_conv_bbs.contains (bb)) + fprintf (dump_file, "Partail if-conv bb %d in if_conv_bbs\n", + bb->index); + predicated[i] = !is_true_predicate (bb_predicate (bb)); free_bb_predicate (bb); if (bb_with_exit_edge_p (loop, bb)) @@ -2718,7 +3251,18 @@ combine_blocks (class loop *loop) } gcc_assert (exit_bb != loop->latch); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "If-conv latch bb %d, exit bb %d, header bb %d\n", + loop->latch ? loop->latch->index : -1, + exit_bb ? exit_bb->index : -1, loop->header->index); + merge_target_bb = loop->header; + /* In the case of partial if-conversion some parts of the loop may be not + converted so different bbs can have different merge_target_bbs. Evaluate + merge_target_bb and store the results to bb_to_merge. */ + hash_map bb_to_merge; + if (partial_p) + evaluate_merge_bbs (orig_loop_num_nodes, bb_to_merge); /* Get at the virtual def valid for uses starting at the first block we merge into the header. Without a virtual PHI the loop has the @@ -2740,34 +3284,25 @@ combine_blocks (class loop *loop) bb = ifc_bbs[i]; - if (bb == exit_bb || bb == loop->latch) + if (!partial_p && (bb == exit_bb || bb == loop->latch)) continue; /* We release virtual PHIs late because we have to propagate them out using the current VUSE. The def might be the one used after the loop. */ - vphi = get_virtual_phi (bb); - if (vphi) - { - /* When there's just loads inside the loop a stray virtual - PHI merging the uses can appear, update last_vdef from - it. */ - if (!last_vdef) - last_vdef = gimple_phi_arg_def (vphi, 0); - imm_use_iterator iter; - use_operand_p use_p; - gimple *use_stmt; - FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) + if (!partial_p || is_simple_condition_successor (bb, true)) + release_virtual_op (bb, last_vdef); + if (partial_p) + { + if (!if_conv_bbs.contains (bb)) { - FOR_EACH_IMM_USE_ON_STMT (use_p, iter) - SET_USE (use_p, last_vdef); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv combine_blocks: " + "skip bb %d\n", bb->index); + continue; } - if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) - SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; - gsi = gsi_for_stmt (vphi); - remove_phi_node (&gsi, true); + merge_target_bb = *bb_to_merge.get (bb); } - /* Make stmts member of loop->header and clear range info from all stmts in BB which is now no longer executed conditional on a predicate we could have derived it from. */ @@ -2805,49 +3340,30 @@ combine_blocks (class loop *loop) } /* Fixup virtual operands in the exit block. */ - if (exit_bb - && exit_bb != loop->header) + if (!partial_p && exit_bb && exit_bb != loop->header) { /* We release virtual PHIs late because we have to propagate them out using the current VUSE. The def might be the one used after the loop. */ - vphi = get_virtual_phi (exit_bb); - if (vphi) - { - /* When there's just loads inside the loop a stray virtual - PHI merging the uses can appear, update last_vdef from - it. */ - if (!last_vdef) - last_vdef = gimple_phi_arg_def (vphi, 0); - imm_use_iterator iter; - use_operand_p use_p; - gimple *use_stmt; - FOR_EACH_IMM_USE_STMT (use_stmt, iter, gimple_phi_result (vphi)) - { - FOR_EACH_IMM_USE_ON_STMT (use_p, iter) - SET_USE (use_p, last_vdef); - } - if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (vphi))) - SSA_NAME_OCCURS_IN_ABNORMAL_PHI (last_vdef) = 1; - gimple_stmt_iterator gsi = gsi_for_stmt (vphi); - remove_phi_node (&gsi, true); - } + release_virtual_op (exit_bb, last_vdef); } /* Now remove all the edges in the loop, except for those from the exit block and delete the blocks we elided. */ - for (i = 1; i < orig_loop_num_nodes; i++) - { - bb = ifc_bbs[i]; + if (!partial_p) + for (i = 1; i < orig_loop_num_nodes; i++) + { + bb = ifc_bbs[i]; - for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) - { - if (e->src == exit_bb) - ei_next (&ei); - else - remove_edge (e); - } - } + for (ei = ei_start (bb->preds); (e = ei_safe_edge (ei));) + { + if (e->src == exit_bb) + ei_next (&ei); + else + remove_edge (e); + } + } + hash_set deleted_bbs; for (i = 1; i < orig_loop_num_nodes; i++) { bb = ifc_bbs[i]; @@ -2855,11 +3371,23 @@ combine_blocks (class loop *loop) if (bb == exit_bb || bb == loop->latch) continue; + if (partial_p) + { + if (!if_conv_bbs.contains (bb)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Partial if-conv in combine_blocks: " + "skip non-conv bb %d\n", bb->index); + continue; + } + redirect_edges_before_bb_delete (bb, bb_to_merge); + deleted_bbs.add (bb); + } delete_basic_block (bb); - } + } /* Re-connect the exit block. */ - if (exit_bb != NULL) + if (!partial_p && exit_bb != NULL) { if (exit_bb != loop->header) { @@ -2876,6 +3404,9 @@ combine_blocks (class loop *loop) } set_immediate_dominator (CDI_DOMINATORS, loop->latch, exit_bb); } + else if (partial_p) + set_immediate_dominators_and_edge_fallthru (orig_loop_num_nodes, + deleted_bbs); else { /* If the loop does not have an exit, reconnect header and latch. */ @@ -2886,13 +3417,27 @@ combine_blocks (class loop *loop) /* If possible, merge loop header to the block with the exit edge. This reduces the number of basic blocks to two, to please the vectorizer that handles only loops with two nodes. */ - if (exit_bb - && exit_bb != loop->header) + if (!partial_p && exit_bb && exit_bb != loop->header) { if (can_merge_blocks_p (loop->header, exit_bb)) merge_blocks (loop->header, exit_bb); } + if (partial_p) + { + hash_map bb_last_vdef; + for (i = 0; i < orig_loop_num_nodes; i++) + { + bb = ifc_bbs[i]; + if (deleted_bbs.contains (bb)) + continue; + correct_virtual_ops_in_bb (bb, bb_last_vdef); + } + /* Since vdef on backedge may be updated, we need to correct header bb + once again. */ + correct_virtual_ops_in_bb (loop->header, bb_last_vdef); + } + free (ifc_bbs); ifc_bbs = NULL; free (predicated); @@ -3327,10 +3872,11 @@ tree_if_conversion (class loop *loop, vec *preds) specified -ftree-loop-if-convert or unless versioning is required. Either version this loop, or if the pattern is right for outer-loop vectorization, version the outer loop. In the latter case we will - still if-convert the original inner loop. */ - if (need_to_predicate - || any_complicated_phi - || flag_tree_loop_if_convert != 1) + still if-convert the original inner loop. Partial if-conversion does + not version loops. */ + if (!partial_p && (need_to_predicate + || any_complicated_phi + || flag_tree_loop_if_convert != 1)) { class loop *vloop = (versionable_outer_loop_p (loop_outer (loop)) @@ -3368,11 +3914,27 @@ tree_if_conversion (class loop *loop, vec *preds) pe = single_pred_edge (gimple_bb (preds->last ())); } + if (partial_p) + { + if (param_max_tree_partial_if_conversion_stmts) + apply_partial_if_conv_cost_model (); + cleanup_partial_if_conv_bbs (); + if (if_conv_bbs.elements () == 0) + goto cleanup; + } + /* Now all statements are if-convertible. Combine all the basic blocks into one huge basic block doing the if-conversion on-the-fly. */ combine_blocks (loop); + /* TODO: maybe we can repair dominators by hands. */ + if (partial_p) + { + free_dominance_info (CDI_DOMINATORS); + calculate_dominance_info (CDI_DOMINATORS); + } + /* Perform local CSE, this esp. helps the vectorizer analysis if loads and stores are involved. CSE only the loop body, not the entry PHIs, those are to be kept in sync with the non-if-converted copy. @@ -3398,6 +3960,9 @@ tree_if_conversion (class loop *loop, vec *preds) todo |= TODO_cleanup_cfg; cleanup: + if (partial_p) + if_conv_bbs.empty (); + if (ifc_bbs) { unsigned int i; @@ -3459,10 +4024,19 @@ unsigned int pass_if_conversion::execute (function *fun) { unsigned todo = 0; - if (number_of_loops (fun) <= 1) return 0; + bool in_loop_pipeline = scev_initialized_p (); + if (!in_loop_pipeline) + { + calculate_dominance_info (CDI_DOMINATORS); + loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_PREHEADERS + | LOOPS_HAVE_RECORDED_EXITS + | LOOPS_HAVE_SIMPLE_LATCHES); + scev_initialize (); + } + auto_vec preds; for (auto loop : loops_list (cfun, 0)) if (flag_tree_loop_if_convert == 1 @@ -3470,8 +4044,18 @@ pass_if_conversion::execute (function *fun) && !loop->dont_vectorize)) todo |= tree_if_conversion (loop, &preds); + if (flag_tree_partial_if_convert) + { + partial_p = 1; + for (auto loop : loops_list (cfun, 0)) + todo |= tree_if_conversion (loop, &preds); + } + if (todo) { + if (!in_loop_pipeline) + free_dominance_info (CDI_DOMINATORS); + free_numbers_of_iterations_estimates (fun); scev_reset (); } @@ -3508,6 +4092,11 @@ pass_if_conversion::execute (function *fun) } } + if (!in_loop_pipeline) + { + loop_optimizer_finalize (); + scev_finalize (); + } return 0; } diff --git a/gcc/tree-parloops.cc b/gcc/tree-parloops.cc index 7fcb0d527d5f79dc47fcda736ac0acd99a738b09..b74b39fd8d98afa988ed98751de456d99f60a82f 100644 --- a/gcc/tree-parloops.cc +++ b/gcc/tree-parloops.cc @@ -1008,7 +1008,7 @@ typedef struct lambda_trans_matrix_s /* Allocate a new transformation matrix. */ -static lambda_trans_matrix +lambda_trans_matrix lambda_trans_matrix_new (int colsize, int rowsize, struct obstack * lambda_obstack) { @@ -1027,7 +1027,7 @@ lambda_trans_matrix_new (int colsize, int rowsize, MAT is an M*N matrix, and VEC is a vector with length N. The result is stored in DEST which must be a vector of length M. */ -static void +void lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, lambda_vector vec, lambda_vector dest) { diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc index e2ac20447410d8e730fa38222964e729147cab32..54221cc804538fc461ed016f8d334e8569978d2c 100644 --- a/gcc/tree-ssa-loop-ivcanon.cc +++ b/gcc/tree-ssa-loop-ivcanon.cc @@ -52,6 +52,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-eh.h" #include "gimple-iterator.h" #include "tree-cfg.h" +#include "tree-ssa-loop-ivopts.h" #include "tree-ssa-loop-manip.h" #include "tree-ssa-loop-niter.h" #include "tree-ssa-loop.h" @@ -76,6 +77,23 @@ enum unroll_level UL_ALL /* All suitable loops. */ }; +/* The variable represents that we do replicative unroll. It means + the optimization unrolls loop and group corresponding instructions + together, e.g. it substitutes the following loop + + for (i = 0..1) + instr1 (i); + instr2 (i); + + by the code: + + instr1 (0); + instr1 (1); + instr2 (0); + instr2 (1); */ + +static bool do_replicative_unroll = false; + /* Adds a canonical induction variable to LOOP iterating NITER times. EXIT is the exit edge whose condition is replaced. The ssa versions of the new IV before and after increment will be stored in VAR_BEFORE and VAR_AFTER @@ -83,7 +101,7 @@ enum unroll_level void create_canonical_iv (class loop *loop, edge exit, tree niter, - tree *var_before = NULL, tree *var_after = NULL) + tree *var_before, tree *var_after) { edge in; tree type, var; @@ -142,7 +160,7 @@ struct loop_size instructions after exit are not executed. */ int last_iteration; int last_iteration_eliminated_by_peeling; - + /* If some IV computation will become constant. */ bool constant_iv; @@ -483,196 +501,2532 @@ loop_edge_to_cancel (class loop *loop) return NULL; return edge_to_cancel; } - return NULL; + return NULL; +} + +/* Remove all tests for exits that are known to be taken after LOOP was + peeled NPEELED times. Put gcc_unreachable before every statement + known to not be executed. */ + +static bool +remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled) +{ + class nb_iter_bound *elt; + bool changed = false; + + for (elt = loop->bounds; elt; elt = elt->next) + { + /* If statement is known to be undefined after peeling, turn it + into unreachable (or trap when debugging experience is supposed + to be good). */ + if (!elt->is_exit + && wi::ltu_p (elt->bound, npeeled)) + { + gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); + gcall *stmt = gimple_build_call + (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); + gimple_set_location (stmt, gimple_location (elt->stmt)); + gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); + split_block (gimple_bb (stmt), stmt); + changed = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Forced statement unreachable: "); + print_gimple_stmt (dump_file, elt->stmt, 0); + } + } + /* If we know the exit will be taken after peeling, update. */ + else if (elt->is_exit + && wi::leu_p (elt->bound, npeeled)) + { + basic_block bb = gimple_bb (elt->stmt); + edge exit_edge = EDGE_SUCC (bb, 0); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Forced exit to be taken: "); + print_gimple_stmt (dump_file, elt->stmt, 0); + } + if (!loop_exit_edge_p (loop, exit_edge)) + exit_edge = EDGE_SUCC (bb, 1); + exit_edge->probability = profile_probability::always (); + gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); + gcond *cond_stmt = as_a (elt->stmt); + if (exit_edge->flags & EDGE_TRUE_VALUE) + gimple_cond_make_true (cond_stmt); + else + gimple_cond_make_false (cond_stmt); + update_stmt (cond_stmt); + changed = true; + } + } + return changed; +} + +/* Remove all exits that are known to be never taken because of the loop bound + discovered. */ + +static bool +remove_redundant_iv_tests (class loop *loop) +{ + class nb_iter_bound *elt; + bool changed = false; + + if (!loop->any_upper_bound) + return false; + for (elt = loop->bounds; elt; elt = elt->next) + { + /* Exit is pointless if it won't be taken before loop reaches + upper bound. */ + if (elt->is_exit && loop->any_upper_bound + && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) + { + basic_block bb = gimple_bb (elt->stmt); + edge exit_edge = EDGE_SUCC (bb, 0); + class tree_niter_desc niter; + + if (!loop_exit_edge_p (loop, exit_edge)) + exit_edge = EDGE_SUCC (bb, 1); + + /* Only when we know the actual number of iterations, not + just a bound, we can remove the exit. */ + if (!number_of_iterations_exit (loop, exit_edge, + &niter, false, false) + || !integer_onep (niter.assumptions) + || !integer_zerop (niter.may_be_zero) + || !niter.niter + || TREE_CODE (niter.niter) != INTEGER_CST + || !wi::ltu_p (loop->nb_iterations_upper_bound, + wi::to_widest (niter.niter))) + continue; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Removed pointless exit: "); + print_gimple_stmt (dump_file, elt->stmt, 0); + } + gcond *cond_stmt = as_a (elt->stmt); + if (exit_edge->flags & EDGE_TRUE_VALUE) + gimple_cond_make_false (cond_stmt); + else + gimple_cond_make_true (cond_stmt); + update_stmt (cond_stmt); + changed = true; + } + } + return changed; +} + +/* Stores loops that will be unlooped and edges that will be removed + after we process whole loop tree. */ +static vec loops_to_unloop; +static vec loops_to_unloop_nunroll; +static vec edges_to_remove; +/* Stores loops that has been peeled. */ +static bitmap peeled_loops; + +/* Cancel all fully unrolled loops by putting __builtin_unreachable + on the latch edge. + We do it after all unrolling since unlooping moves basic blocks + across loop boundaries trashing loop closed SSA form as well + as SCEV info needed to be intact during unrolling. + + IRRED_INVALIDATED is used to bookkeep if information about + irreducible regions may become invalid as a result + of the transformation. + LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case + when we need to go into loop closed SSA form. */ + +static void +unloop_loops (bitmap loop_closed_ssa_invalidated, + bool *irred_invalidated) +{ + while (loops_to_unloop.length ()) + { + class loop *loop = loops_to_unloop.pop (); + int n_unroll = loops_to_unloop_nunroll.pop (); + basic_block latch = loop->latch; + edge latch_edge = loop_latch_edge (loop); + int flags = latch_edge->flags; + location_t locus = latch_edge->goto_locus; + gcall *stmt; + gimple_stmt_iterator gsi; + + remove_exits_and_undefined_stmts (loop, n_unroll); + + /* Unloop destroys the latch edge. */ + unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); + + /* Create new basic block for the latch edge destination and wire + it in. */ + stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), + 0); + latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), + flags); + latch_edge->probability = profile_probability::never (); + latch_edge->flags |= flags; + latch_edge->goto_locus = locus; + + add_bb_to_loop (latch_edge->dest, current_loops->tree_root); + latch_edge->dest->count = profile_count::zero (); + set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, + latch_edge->src); + + gsi = gsi_start_bb (latch_edge->dest); + gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + } + loops_to_unloop.release (); + loops_to_unloop_nunroll.release (); + + /* Remove edges in peeled copies. Given remove_path removes dominated + regions we need to cope with removal of already removed paths. */ + unsigned i; + edge e; + auto_vec src_bbs; + src_bbs.reserve_exact (edges_to_remove.length ()); + FOR_EACH_VEC_ELT (edges_to_remove, i, e) + src_bbs.quick_push (e->src->index); + FOR_EACH_VEC_ELT (edges_to_remove, i, e) + if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Remove path after edge %d->%d.\n", + e->src->index, e->dest->index); + bool ok = remove_path (e, irred_invalidated, + loop_closed_ssa_invalidated); + gcc_assert (ok); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Remove path done.\n"); + } + edges_to_remove.release (); +} + +#include "tree-data-ref.h" + +/* Use types and macros from tree-parloops.cc. + TODO: move to one place and avoid code duplication. */ + +typedef struct lambda_trans_matrix_s +{ + lambda_matrix matrix; + int rowsize; + int colsize; + int denominator; +} *lambda_trans_matrix; +#define LTM_MATRIX(T) ((T)->matrix) +#define LTM_ROWSIZE(T) ((T)->rowsize) +#define LTM_COLSIZE(T) ((T)->colsize) + +extern lambda_trans_matrix +lambda_trans_matrix_new (int colsize, int rowsize, + struct obstack * lambda_obstack); +extern void +dump_data_dependence_relation (FILE *outf, const data_dependence_relation *ddr); + +void +lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n, + lambda_vector vec, lambda_vector dest); + +static bool +type_has_components_p (tree type) +{ + return AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type) + || TREE_CODE (type) == COMPLEX_TYPE; +} + +static bool +has_eq_idxs (const struct data_reference *a, const struct data_reference *b) +{ + if (DR_NUM_DIMENSIONS (a) != DR_NUM_DIMENSIONS (b)) + return false; + for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (a); i++) + { + tree f1 = DR_ACCESS_FN (a, i); + tree f2 = DR_ACCESS_FN (b, i); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "has_eq_idxs %d: ", i); + print_generic_expr (dump_file, f1, TDF_SLIM); + fprintf (dump_file, " and "); + print_generic_expr (dump_file, f2, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (!eq_evolutions_p (DR_ACCESS_FN (a, i), DR_ACCESS_FN (b, i))) + return false; + } + return true; +} + +static bool +has_no_dims (const struct data_reference *a, const struct data_reference *b) +{ + return (DR_NUM_DIMENSIONS (a) == 0) && (DR_NUM_DIMENSIONS (b) == 0); +} + +static tree +find_memory_source (tree a) +{ + tree op0 = a, op1 = NULL_TREE; + if (TREE_CODE (op0) == MEM_REF) + op0 = TREE_OPERAND (op0, 0); + if (TREE_CODE (op0) == POINTER_PLUS_EXPR) + { + op1 = TREE_OPERAND (op0, 1); + op0 = TREE_OPERAND (op0, 0); + if (TREE_CODE (TREE_TYPE (op1)) != INTEGER_TYPE) + return NULL_TREE; + } + while (TREE_CODE (op0) == NOP_EXPR || TREE_CODE (op0) == ADDR_EXPR + || TREE_CODE (op0) == COMPONENT_REF || TREE_CODE (op0) == MEM_REF) + op0 = TREE_OPERAND (op0, 0); + if (VAR_P (op0)) + return op0; + + if (TREE_CODE (op0) != SSA_NAME) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "find_memory_source {%s} fail\n", + get_tree_code_name (TREE_CODE (op0))); + return NULL_TREE; + } + gimple *stmt = SSA_NAME_DEF_STMT (op0); + if (gimple_code (stmt) == GIMPLE_NOP) + return SSA_NAME_VAR (op0); + if (!is_gimple_assign (stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "find_memory_source fail: "); + print_gimple_stmt (dump_file, stmt, 0); + } + return NULL_TREE; + } + op0 = gimple_assign_rhs1 (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "find_memory_source {%s} found\n", + get_tree_code_name (TREE_CODE (op0))); + } + return find_memory_source (op0); +} + +static bool +loop_local_objects_p (tree a, tree b) +{ + tree op1 = a, op2 = b; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "loop_local_objects_p1 {%s %s}\n", + op1 == NULL_TREE ? "?" : get_tree_code_name (TREE_CODE (op1)), + op2 == NULL_TREE ? "?" : get_tree_code_name (TREE_CODE (op2))); + print_generic_expr (dump_file, op1, TDF_SLIM); + fprintf (dump_file, " and "); + print_generic_expr (dump_file, op2, TDF_SLIM); + fprintf (dump_file, " may alias (%d)\n", + op1 != NULL_TREE && op2 != NULL_TREE + ? ptr_derefs_may_alias_p (op1, op2) : -1); + } + if (op1 == NULL_TREE || op2 == NULL_TREE) + return false; + if (!VAR_P (op1) || !VAR_P (op2)) + return false; + if (!is_global_var (op1) && !is_global_var (op1) + && DECL_CONTEXT (op1) == cfun->decl && DECL_CONTEXT (op2) == cfun->decl) + return true; + return false; +} + +static bool +different_global_objects_p (tree a, tree b) +{ + tree op1 = a, op2 = b; + if (op1 == NULL_TREE || op2 == NULL_TREE) + return false; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "different_global_objects_p {%s %s} with types:\n", + get_tree_code_name (TREE_CODE (op1)), + get_tree_code_name (TREE_CODE (op2))); + print_generic_expr (dump_file, TREE_TYPE (op1), TDF_SLIM); + fprintf (dump_file, " and "); + print_generic_expr (dump_file, TREE_TYPE (op2), TDF_SLIM); + fprintf (dump_file, "\n"); + } + bool op1_parm_or_glob = is_global_var (op1) || TREE_CODE (op1) == PARM_DECL; + bool op2_parm_or_glob = is_global_var (op2) || TREE_CODE (op2) == PARM_DECL; + if (!op1_parm_or_glob || !op2_parm_or_glob || operand_equal_p (op1, op2)) + return false; + tree type1 = TREE_TYPE (op1), type2 = TREE_TYPE (op2); + if (POINTER_TYPE_P (type1) && TREE_CODE (TREE_TYPE (type1)) == RECORD_TYPE) + type1 = TREE_TYPE (type1); + if (POINTER_TYPE_P (type2) && TREE_CODE (TREE_TYPE (type2)) == RECORD_TYPE) + type2 = TREE_TYPE (type2); + /* For aggregates we rely on canonical types. */ + if (RECORD_OR_UNION_TYPE_P (type1)) + type1 = TYPE_CANONICAL (type1); + if (RECORD_OR_UNION_TYPE_P (type2)) + type2 = TYPE_CANONICAL (type2); + + if (!type1 || !type2 || type1 == type2) + return false; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "different_global_objects_p types %d %d\n", + TYPE_UID (type1), TYPE_UID (type2)); + print_generic_expr (dump_file, type1, TDF_SLIM); + print_generic_expr (dump_file, type2, TDF_SLIM); + } + return (RECORD_OR_UNION_TYPE_P (type1) && TYPE_FIELDS_NONALIAS (type1)) + || (RECORD_OR_UNION_TYPE_P (type2) && TYPE_FIELDS_NONALIAS (type2)); +} + +static bool +known_relations (struct data_dependence_relation *ddr, class loop *loop) +{ + const struct data_reference *a = DDR_A (ddr); + const struct data_reference *b = DDR_B (ddr); + tree ref_a = DR_REF (a); + tree ref_b = DR_REF (b); + tree type_a = TREE_TYPE (ref_a); + tree type_b = TREE_TYPE (ref_b); + bool same_types = gimple_canonical_types_compatible_p (type_a, type_b, false); + + /* Two loop invariants with the same base and different indeces. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Is relative accesses: dims=%d,%d, " + "same_types=%d (%d)\n", DR_NUM_DIMENSIONS (a), + DR_NUM_DIMENSIONS (b), same_types, flag_strict_aliasing); + print_generic_expr (dump_file, ref_a, TDF_SLIM); + fprintf (dump_file, " "); + print_generic_expr (dump_file, ref_b, TDF_SLIM); + fprintf (dump_file, "\n"); + print_generic_expr (dump_file, type_a, TDF_SLIM); + fprintf (dump_file, " "); + print_generic_expr (dump_file, type_b, TDF_SLIM); + fprintf (dump_file, "\n"); + } + bool is_inv_a = expr_invariant_in_loop_p (loop, ref_a); + bool is_inv_b = expr_invariant_in_loop_p (loop, ref_b); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Invariants %d %d.\n", is_inv_a, is_inv_b); + if (DR_NUM_DIMENSIONS (a)) + { + tree a1 = DR_ACCESS_FN (a, 0); + print_generic_expr (dump_file, a1, TDF_SLIM); + fprintf (dump_file, "{%s %d}\n", get_tree_code_name (TREE_CODE (a1)), + TREE_CODE (a1) == POLYNOMIAL_CHREC ? CHREC_VARIABLE (a1) + : -1); + } + if (DR_NUM_DIMENSIONS (b)) + { + tree b1 = DR_ACCESS_FN (b, 0); + print_generic_expr (dump_file, b1, TDF_SLIM); + fprintf (dump_file, "{%s %d}\n", get_tree_code_name (TREE_CODE (b1)), + TREE_CODE (b1) == POLYNOMIAL_CHREC ? CHREC_VARIABLE (b1) + : -1); + } + } + if (is_inv_a && is_inv_b) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Moving invariant memory references is safe.\n"); + return true; + } + if (!type_has_components_p (type_a) && !type_has_components_p (type_b) + && !same_types) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Moving invariants is safe."); + return true; + } + bool is_eq_idxs = has_eq_idxs (a, b); + bool no_dims = has_no_dims (a, b); + tree base_a = DR_BASE_OBJECT (a); + tree base_b = DR_BASE_OBJECT (b); + + bool is_eq_base = operand_equal_p (base_a, base_b); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Bases %p %p are equal %d and no dims %d\n", + (void *) base_a, (void *) base_b, is_eq_base, no_dims); + if (is_eq_base && no_dims) + { + /* Moving the references with the same bases and no indexing. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Moving refs with the same bases and no idxs.\n"); + return true; + } + tree mem_obj_a = find_memory_source (base_a); + tree mem_obj_b = find_memory_source (base_b); + bool is_eq_mem_obj = (mem_obj_a != NULL_TREE && mem_obj_b != NULL_TREE) + ? operand_equal_p (mem_obj_a, mem_obj_b) : false; + if (loop_local_objects_p (mem_obj_a, mem_obj_b)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Access to different local objects.\n"); + return true; + } + if (different_global_objects_p (mem_obj_a, mem_obj_b)) + { + /* TODO: implement for global vars. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Access to different global objects.\n"); + return true; + } + /* Try to figure out with object indexing. */ + if (DR_NUM_DIMENSIONS (a) == 1 && DR_NUM_DIMENSIONS (b) == 1) + { + tree a1 = DR_ACCESS_FN (a, 0); + tree b1 = DR_ACCESS_FN (b, 0); + bool cmp_base_trees = data_ref_compare_tree (base_a, base_b); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Eq bases %d (%d, %d), idxs %d\n", + is_eq_base, cmp_base_trees, is_eq_mem_obj, is_eq_idxs); + if (is_eq_base && TREE_CODE (a1) == SSA_NAME + && TREE_CODE (b1) == SSA_NAME) + { + bool is_inv_a1 = expr_invariant_in_loop_p (loop, a1); + bool is_inv_b1 = expr_invariant_in_loop_p (loop, b1); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Invariant indeces %d %d\n", + is_inv_a1, is_inv_b1); + if ((is_inv_a1 && is_inv_b1) || is_eq_idxs) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "The same object and invariant indeces " + "or the same indeces.\n"); + return true; + } + } + /* LD/ST to the same object with the same index is solved by + reduction detection. */ + if (is_eq_mem_obj && is_eq_idxs) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "LD/ST to the same object " + "(checked by reduction).\n"); + return true; + } + /* TODO: support the case of global objects. */ + if (!is_eq_base && is_eq_idxs) + { + bool may_alias = ptr_derefs_may_alias_p (base_a, base_b); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "ptr_derefs_may_alias_p: %d\n", may_alias); + if (!may_alias) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "No alias by ptr_derefs_may_alias_p.\n"); + return true; + } + } + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "known_relations: fasle\n"); + return false; +} + +/* Like lambda_transform_legal_p in tree-parloops.cc but it has own + relation checker. */ + +static bool +replicative_unroll_legal_p (lambda_trans_matrix trans, int nb_loops, + vec dependence_relations, class loop *loop) +{ + unsigned int i, j; + lambda_vector distres; + struct data_dependence_relation *ddr; + + gcc_assert (LTM_COLSIZE (trans) == nb_loops + && LTM_ROWSIZE (trans) == nb_loops); + + /* When there are no dependences, the transformation is correct. */ + if (dependence_relations.length () == 0) + return true; + + ddr = dependence_relations[0]; + if (ddr == NULL) + return true; + + /* Don't give up and rsolve chrec_dont_know later. */ + + distres = lambda_vector_new (nb_loops); + + bool has_deps = false; + /* For each distance vector in the dependence graph. */ + FOR_EACH_VEC_ELT (dependence_relations, i, ddr) + { + /* Don't care about relations for which we know that there is no + dependence, nor about read-read (aka. output-dependences): + these data accesses can happen in any order. */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_known + || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))) + continue; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "-------\nAnalyse dependences:\n"); + dump_data_dependence_relation (dump_file, ddr); + } + + if (known_relations (ddr, loop)) + continue; + /* Conservatively answer: "this transformation is not valid". */ + if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "In replicative_unroll_legal_p: " + " chrec_dont_know\n"); + has_deps = true; + } + + /* If the dependence could not be captured by a distance vector, + conservatively answer that the transform is not valid. */ + if (DDR_NUM_DIST_VECTS (ddr) == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "In replicative_unroll_legal_p: " + "DDR_NUM_DIST_VECTS\n"); + has_deps = true; + } + + /* Compute trans.dist_vect. */ + for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) + { + lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, + DDR_DIST_VECT (ddr, j), distres); + + if (!lambda_vector_lexico_pos (distres, nb_loops)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "In replicative_unroll_legal_p: " + "lambda_vector_lexico_pos\n"); + has_deps = true; + } + } + } + if (has_deps) + return false; + return true; +} + +bool +has_irreducible_deps (class loop *loop, struct obstack * parloop_obstack) +{ + vec dependence_relations; + vec datarefs; + lambda_trans_matrix trans; + bool ret = false; + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Considering loop %d\n", loop->num); + if (!loop->inner) + fprintf (dump_file, "loop is innermost\n"); + else + fprintf (dump_file, "loop NOT innermost\n"); + } + + /* Check for problems with dependences. If the loop can be reversed, + the iterations are independent. */ + auto_vec loop_nest; + datarefs.create (10); + dependence_relations.create (100); + /* Set RR and Self deps in false. */ + if (!compute_data_dependences_for_loop (loop, false, &loop_nest, &datarefs, + &dependence_relations)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " FAILED: cannot analyze data dependencies\n"); + ret = false; + goto end; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + dump_data_dependence_relations (dump_file, dependence_relations); + + trans = lambda_trans_matrix_new (1, 1, parloop_obstack); + LTM_MATRIX (trans)[0][0] = -1; + + if (replicative_unroll_legal_p (trans, 1, dependence_relations, loop)) + { + ret = true; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, " SUCCESS: may be parallelized\n"); + } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, + " FAILED: data dependencies exist across iterations\n"); + + end: + free_dependence_relations (dependence_relations); + free_data_refs (datarefs); + + return ret; +} + +static void +mark_loop_stmts_unvisited (class loop *loop) +{ + basic_block *body = get_loop_body (loop); + unsigned int i; + for (i = 0; i < loop->num_nodes; i++) + { + basic_block bb = body[i]; + for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + gimple_set_visited (gsi.phi (), false); + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + gimple_set_visited (gsi_stmt (gsi), false); + } +} + +/* Return true if uses of DEF do not prevent replicative unroll. */ + +static bool +check_forward_iv_uses (tree def, class loop *loop) +{ + use_operand_p use_p; + imm_use_iterator imm_iter; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) + { + gimple *stmt = USE_STMT (use_p); + bool outside_loop = stmt->bb->loop_father != loop; + bool cond_in_header = (stmt->bb == loop->header) + && (gimple_code (stmt) == GIMPLE_COND); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Def is used in stmt visited (%d) phi (%d) " + "outside loop (%d) cond in loop head (%d):\n", + gimple_visited_p (stmt), gimple_code (stmt) == GIMPLE_PHI, + outside_loop, cond_in_header); + print_gimple_stmt (dump_file, stmt, 0); + } + if (gimple_code (stmt) == GIMPLE_PHI || gimple_visited_p (stmt) + || outside_loop || cond_in_header) + continue; + if (is_gimple_debug (stmt) || !is_gimple_assign (stmt)) + { + enum gimple_code code = gimple_code (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not assingn: %s\n", gimple_code_name[code]); + return false; + } + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + if (gimple_assign_rhs_code (stmt) == MEM_REF) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found mem_ref: false\n"); + return false; + } + if (TREE_CODE (lhs) != SSA_NAME || TREE_CODE (rhs) != SSA_NAME) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Not ssa lhs/rhs: false\n"); + return false; + } + } + return true; +} + +static tree +extract_single_var_from_expr (tree expr) +{ + int i, n; + tree tmp; + enum tree_code code; + + if (!expr || is_gimple_min_invariant (expr)) + return NULL; + + code = TREE_CODE (expr); + if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code))) + { + n = TREE_OPERAND_LENGTH (expr); + for (i = 0; i < n; i++) + { + tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i)); + + if (tmp) + return tmp; + } + } + return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL; +} + +/* Inspirited by simple_iv_with_niters in tree-scalar-evolution.cc but + also supports non-linear induction variables with any type. */ + +static bool +simple_generic_iv (class loop *loop, tree op, affine_iv *iv) +{ + iv->base = NULL_TREE; + iv->step = NULL_TREE; + iv->no_overflow = false; + + auto_vec stack; + tree def; + gcc_assert (stack.is_empty ()); + stack.safe_push (op); + int len = 0; + while (!stack.is_empty ()) + { + def = stack.pop (); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Processing def: "); + print_generic_expr (dump_file, def, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (TREE_CODE (def) != SSA_NAME) + continue; + gimple *def_stmt = SSA_NAME_DEF_STMT (def); + if (!def_stmt) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "No def stmt is found for: "); + print_generic_expr (dump_file, def, TDF_SLIM); + fprintf (dump_file, "\n"); + } + return false; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Check def stmt (%d): ", + gimple_visited_p (def_stmt)); + print_gimple_stmt (dump_file, def_stmt, 0); + } + + if (gimple_visited_p (def_stmt)) + continue; + + if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Def stmt is not assign or phi.\n"); + return false; + } + if (def_stmt->bb->loop_father != loop || gimple_visited_p (def_stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Skip stmt outside loop (%d) or " + "visited (%d).\n", def_stmt->bb->loop_father != loop, + gimple_visited_p (def_stmt)); + continue; + } + gimple_set_visited (def_stmt, true); + + if (gimple_code (def_stmt) != GIMPLE_PHI) + { + enum tree_code c = gimple_assign_rhs_code (def_stmt); + if (c != PLUS_EXPR && c != MINUS_EXPR && c != POINTER_PLUS_EXPR + && c != SSA_NAME) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Unsupported code (%s) in assignment.\n", + get_tree_code_name (c)); + return false; + } + } + + use_operand_p use_p; + imm_use_iterator imm_iter; + unsigned uses = 0; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) + { + gimple *stmt = USE_STMT (use_p); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Chain to stmt (code=%s) (inside=%d):\n", + gimple_code_name[gimple_code (stmt)], + stmt->bb->loop_father == loop); + print_gimple_stmt (dump_file, stmt, 0); + } + + if (stmt->bb->loop_father == loop) + uses++; + else + continue; + + if (!is_gimple_assign (stmt) && gimple_code (stmt) != GIMPLE_PHI) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Isn't assign or phi.\n"); + return false; + } + tree lhs; + if (is_gimple_assign (stmt)) + lhs = gimple_assign_lhs (stmt); + else + lhs = PHI_RESULT (stmt); + + if (TREE_CODE (lhs) == SSA_NAME) + stack.safe_push (lhs); + else + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Assign to non-ssa name " + "breaks the chain: "); + print_generic_expr (dump_file, lhs, TDF_SLIM); + fprintf (dump_file, "\n"); + } + return false; + } + } + if (uses != 1) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unexpected (%d) number of uses " + "in gnericic iv chain:\n", uses); + print_generic_expr (dump_file, def, TDF_SLIM); + fprintf (dump_file, "\n"); + } + return false; + } + len++; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Chain start and finish: "); + print_generic_expr (dump_file, op, TDF_SLIM); + fprintf (dump_file, " "); + print_generic_expr (dump_file, def, TDF_SLIM); + fprintf (dump_file, ", len is %d\n", len); + } + /* The acceptable chain is 2 stmts: iv phi and iv modification, longer + chains cannot be easily optimized by replicative unroll. */ + if (len > 2) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "The chain is too long (%d).\n", len); + return false; + } + return true; +} + +static bool +optimizable_iv_p (gphi *orig_phi, edge backedge, class loop *loop) +{ + tree phi_res = PHI_RESULT (orig_phi); + tree first_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, backedge); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Initial phi's res "); + print_generic_expr (dump_file, phi_res, TDF_SLIM); + fprintf (dump_file, " and use "); + print_generic_expr (dump_file, first_def, TDF_SLIM); + fprintf (dump_file, "\n"); + } + + affine_iv iv; + tree step, base, stop; + if (!simple_iv (loop, loop, PHI_RESULT (orig_phi), &iv, true)) + { + if (simple_generic_iv (loop, PHI_RESULT (orig_phi), &iv)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Pass simple_generic_iv\n"); + return true; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Fail simple_iv\n"); + return false; + } + step = iv.step; + if (!iv.step || !(CONSTANT_CLASS_P (step) || TREE_CODE (step) == SSA_NAME)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + if (!iv.step) + fprintf (dump_file, "No step found.\n"); + else + { + fprintf (dump_file, "Isn't simple step: "); + print_generic_expr (dump_file, iv.step, TDF_SLIM); + fprintf (dump_file, "\n"); + } + } + return false; + } + base = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); + stop = extract_single_var_from_expr (step); + base = expand_simple_operations (base, stop); + if (contains_abnormal_ssa_name_p (base) + || contains_abnormal_ssa_name_p (step)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Abnormal ssa names in base "); + print_generic_expr (dump_file, base, TDF_SLIM); + fprintf (dump_file, " or step "); + print_generic_expr (dump_file, step, TDF_SLIM); + fprintf (dump_file, "\n"); + } + return false; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Simple IV with base "); + print_generic_expr (dump_file, base, TDF_SLIM); + fprintf (dump_file, " and step "); + print_generic_expr (dump_file, step, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (CONSTANT_CLASS_P (base) && CONSTANT_CLASS_P (step)) + return true; + return simple_generic_iv (loop, PHI_RESULT (orig_phi), &iv); + + if (!check_forward_iv_uses (phi_res, loop)) + return false; + + auto_vec stack; + tree def; + /* Walk DU-graph in forward direction. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Walk DU in forward direction.\n"); + gcc_assert (stack.is_empty ()); + stack.safe_push (phi_res); + while (!stack.is_empty ()) + { + def = stack.pop (); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Processing def: "); + print_generic_expr (dump_file, def, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (TREE_CODE (def) != SSA_NAME) + continue; + gimple *def_stmt = SSA_NAME_DEF_STMT (def); + if (!def_stmt) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "No def stmt is found for: "); + print_generic_expr (dump_file, def, TDF_SLIM); + fprintf (dump_file, "\n"); + } + return false; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Check def stmt (%d): ", + gimple_visited_p (def_stmt)); + print_gimple_stmt (dump_file, def_stmt, 0); + } + + if (gimple_visited_p (def_stmt)) + continue; + + if (!is_gimple_assign (def_stmt) && gimple_code (def_stmt) != GIMPLE_PHI) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Def stmt is not assign or phi.\n"); + return false; + } + if (def_stmt->bb->loop_father != loop || gimple_visited_p (def_stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Skip stmt outside loop (%d) " + "or visited (%d).\n", def_stmt->bb->loop_father != loop, + gimple_visited_p (def_stmt)); + continue; + } + gimple_set_visited (def_stmt, true); + + if (gimple_code (def_stmt) != GIMPLE_PHI) + { + enum tree_code c = gimple_assign_rhs_code (def_stmt); + if (c != PLUS_EXPR && c != MINUS_EXPR && c != POINTER_PLUS_EXPR + && c != SSA_NAME) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Unsupported code (%s) in assignment.\n", + get_tree_code_name (c)); + return false; + } + } + + use_operand_p use_p; + imm_use_iterator imm_iter; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) + { + gimple *stmt = USE_STMT (use_p); + if (gimple_code (stmt) == GIMPLE_PHI) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Can forwprop to stmt (%s):\n", + gimple_code_name[gimple_code (stmt)]); + print_gimple_stmt (dump_file, stmt, 0); + } + gcc_assert (is_gimple_assign (stmt)); + tree lhs = gimple_assign_lhs (stmt); + + if (TREE_CODE (lhs) == SSA_NAME) + stack.safe_push (lhs); + } + } + return true; +} + +/* Return true if induction variables in the loop are evaluated + in the way simple enough for replicative unroll. */ + +static bool +loop_with_simple_ivs (class loop *loop) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Check IVs' simplicity in loop %d\n", loop->num); + + edge backedge = loop_latch_edge (loop); + if (!backedge) + return false; + + mark_loop_stmts_unvisited (loop); + basic_block header = loop->header; + gphi_iterator it; + for (it = gsi_start_phis (header); !gsi_end_p (it); gsi_next (&it)) + { + gphi *orig_phi = it.phi (); + if (virtual_operand_p (PHI_RESULT (orig_phi))) + continue; + /* All defs outside the loop are simple. */ + edge e; + edge_iterator ei; + FOR_EACH_EDGE (e, ei, header->preds) + { + if (e == backedge) + continue; + tree phi_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); + if (TREE_CODE (phi_arg) != SSA_NAME && !CONSTANT_CLASS_P (phi_arg)) + return false; + } + + if (!optimizable_iv_p (orig_phi, backedge, loop)) + return false; + + } + return true; +} + +static bool +simple_load_store_p (gimple *stmt) +{ + return gimple_store_p (stmt) || gimple_assign_load_p (stmt); +} + +static bool +is_simple_access_fn (tree ac) +{ + enum tree_code c = TREE_CODE (ac); + if (c == SSA_NAME || c == INTEGER_CST || c == NOP_EXPR || c == MULT_EXPR + || c == PLUS_EXPR || c == MINUS_EXPR) + return true; + return false; +} + +static bool +invariant_ref_p (class loop *loop, data_reference_p &a) +{ + tree ref = DR_REF (a); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Ref: (%s)", get_tree_code_name (TREE_CODE (ref))); + print_generic_expr (dump_file, ref, TDF_SLIM); + fprintf (dump_file, "\n"); + } + + tree base = DR_BASE_OBJECT (a); + if (base == NULL_TREE) + return false; + if (!expr_invariant_in_loop_p (loop, base)) + return false; + + for (unsigned int i = 0; i < DR_NUM_DIMENSIONS (a); i++) + { + tree ac = DR_ACCESS_FN (a, i); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Access fn: (%s)", + get_tree_code_name (TREE_CODE (ac))); + print_generic_expr (dump_file, ac, TDF_SLIM); + fprintf (dump_file, "\n"); + } + + if ((is_simple_access_fn (ac) && !expr_invariant_in_loop_p (loop, ac)) + || TREE_CODE (ac) == POLYNOMIAL_CHREC + || TREE_CODE (ac) == SCEV_NOT_KNOWN) + return false; + if (!is_simple_access_fn (ac)) + gcc_unreachable (); + } + return true; +} + + +hash_map *> stmt_to_rchain; +hash_set rchain_stmts; +hash_set rend_stmts; + +/* Try to find a simple reduction chain of stmts. */ + +static bool +find_reduction_chain (gimple *ld_stmt, hash_map &stmt_drs, + vec &datarefs_vec) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Try to find chain starting from stmt:"); + print_gimple_stmt (dump_file, ld_stmt, 0); + } + unsigned *dr = stmt_drs.get (ld_stmt); + gcc_assert (dr); + data_reference_p a = datarefs_vec[*dr]; + gimple *stmt = ld_stmt; + auto_vec chain; + bool found = false; + while (stmt) + { + chain.safe_push (stmt); + if (!is_gimple_assign (stmt)) + return false; + /* For now it looks for chains starting from the invariant load and ending + in one final invariant store. */ + tree lhs = gimple_assign_lhs (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Process lhs: "); + print_generic_expr (dump_file, lhs, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (TREE_CODE (lhs) != SSA_NAME) + return false; + use_operand_p use_p; + imm_use_iterator imm_iter; + unsigned uses = 0; + gimple *next_stmt = NULL; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + gimple *use_stmt = USE_STMT (use_p); + // num_imm_uses + if (!use_stmt || is_gimple_debug (use_stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Break for lsh!\n"); + continue; + } + uses++; + next_stmt = use_stmt; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Next stmt in chain: "); + print_gimple_stmt (dump_file, next_stmt, 0); + } + } + if (uses != 1) + return false; + gcc_assert (next_stmt); + stmt = next_stmt; + if (!gimple_store_p (stmt)) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Store is found.\n"); + dr = stmt_drs.get (stmt); + if (dr == NULL) + return false; + data_reference_p b = datarefs_vec[*dr]; + found = same_data_refs (a, b); + if (found) + { + chain.safe_push (stmt); + break; + } + else + return false; + } + if (!found) + return false; + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Found chain:\n"); + vec *rrchain; + vec_alloc (rrchain, 3); + unsigned i; + for (i = 0; i < chain.length (); ++i) + { + rchain_stmts.add (chain[i]); + rrchain->safe_push (chain[i]); + stmt_to_rchain.put (chain[i], rrchain); + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, chain[i], 0); + } + rend_stmts.add (chain[i-1]); + return true; +} + +/* Detect reduction behaviour and return true if the reduction chains are + simple enough to be optimizaed by replicative unrolling. */ + +static bool +loop_with_simple_reductions (class loop *loop, vec &reductions) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Check reductions simplicity in loop %d\n", loop->num); + + basic_block *body = get_loop_body (loop); + unsigned int i; + hash_set inv_lds, inv_sts; + hash_map> ref_drs; + auto_vec candidate_mrs; + /* Use aproach like in loop distribution. */ + auto_vec datarefs_vec; + hash_map stmt_drs; + auto_vec stmts_with_inv_dr; + auto_vec inv_ld_stmts, inv_st_stmts; + + for (i = 0; i < loop->num_nodes; i++) + { + basic_block bb = body[i]; + gimple_stmt_iterator gsi; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "bb %d stmts:\n", bb->index); + /* For now find chains only inside one bbs. */ + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (is_gimple_debug (stmt)) + continue; + unsigned drp = datarefs_vec.length (); + if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Bad ref\n"); + return false; + } + if (datarefs_vec.length () == drp) + continue; + if (gimple_code (stmt) == GIMPLE_CALL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Unsupported loop with call stmt:\n"); + print_gimple_stmt (dump_file, stmt, 0); + } + return false; + } + if (!simple_load_store_p (stmt)) + continue; + bool is_load = gimple_assign_load_p (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, stmt, 0); + if ((datarefs_vec.length () - drp > 1) + && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "UNSUPPORTED %d data refs in stmt:\n", + datarefs_vec.length () - drp); + print_gimple_stmt (dump_file, stmt, 0); + } + data_reference_p a = datarefs_vec[drp]; + gcc_assert (DR_STMT (a) == stmt); + + stmt_drs.get_or_insert (stmt) = drp; + + tree base = DR_BASE_OBJECT (a); + tree ref = DR_REF (a); + if (ref == NULL_TREE) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "LD/ST ref (drp=%d): ", drp); + print_generic_expr (dump_file, ref, TDF_SLIM); + } + if (!invariant_ref_p (loop, a)) + continue; + stmts_with_inv_dr.safe_push (stmt); + if (is_load) + inv_ld_stmts.safe_push (stmt); + else + inv_st_stmts.safe_push (stmt); + if (base) + { + if (TREE_CODE (base) == MEM_REF) + base = TREE_OPERAND (base, 0); + if (TREE_CODE (base) == ADDR_EXPR) + base = TREE_OPERAND (base, 0); + ref = base; + if (base && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "with base {%s}: ", + get_tree_code_name (TREE_CODE (base))); + print_generic_expr (dump_file, base, TDF_SLIM); + fprintf (dump_file, "\n"); + } + } + bool exist; + ref_drs.get_or_insert (ref, &exist).safe_push (drp); + if (!inv_lds.contains (ref) && !inv_sts.contains (ref)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "candidate\n"); + candidate_mrs.safe_push (ref); + gcc_assert (exist == false); + } + if (is_load && !inv_lds.contains (ref)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "inv lds\n"); + inv_lds.add (ref); + } + if (!is_load && !inv_sts.contains (ref)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "inv sts\n"); + inv_sts.add (ref); + } + } + } + for (unsigned i = 0; i < candidate_mrs.length (); ++i) + { + tree mr = candidate_mrs[i]; + bool is_cand = inv_lds.contains (mr) && inv_sts.contains (mr); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "MR r=%d w=%d c=%d: ", inv_lds.contains (mr), + inv_sts.contains (mr), is_cand); + print_generic_expr (dump_file, mr, TDF_SLIM); + fprintf (dump_file, "\nDR indeces: "); + } + vec *drps = ref_drs.get (mr); + gcc_assert (drps->length ()); + for (unsigned int i = 0; i < drps->length (); i++) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "%d ", (*drps)[i]); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "\n"); + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Stmts with invs:\n"); + for (unsigned i = 0; i < stmts_with_inv_dr.length (); ++i) + { + gimple *stmt = stmts_with_inv_dr[i]; + print_gimple_stmt (dump_file, stmt, 0); + } + } + if (inv_ld_stmts.length () == 0 || inv_st_stmts.length () == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "No invariant loads (%d) or stores (%d):\n", + inv_ld_stmts.length (), inv_st_stmts.length ()); + return true; + } + /* TODO: detect broken reductions more precisely. */ + bool incomplete_chains = false; + for (unsigned i = 0; i < inv_ld_stmts.length (); ++i) + { + bool has_chain = find_reduction_chain (inv_ld_stmts[i], stmt_drs, + datarefs_vec); + if (has_chain) + reductions.safe_push (inv_ld_stmts[i]); + else + incomplete_chains = true; + } + if (incomplete_chains) + return false; + return true; +} + +/* Returns true if the loop can be unrolled with instruction replication. */ + +vec reductions; +static bool +can_unroll_with_replication (class loop *loop, edge exit) +{ + basic_block header = loop->header; + bool single_preheader = EDGE_COUNT (header->preds) == 2; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Check can_unroll_with_replication for loop %d:\n" + "loop header bb %d, latch bb %d, exiting bb %d,\nis_innermost %d," + " has_single_preheader %d, has_single_exit %d.\n", loop->num, + header->index, loop->latch->index, exit ? exit->src->index : -1, + !loop->inner, single_preheader, single_exit (loop) ? 1 : 0); + if (loop->inner) + return false; + if (exit == NULL) + return false; + if (!single_preheader && !single_exit (loop)) + return false; + + /* Check data dependences. */ + struct obstack parloop_obstack; + gcc_obstack_init (&parloop_obstack); + bool is_parallel_loop = has_irreducible_deps (loop, &parloop_obstack); + obstack_free (&parloop_obstack, NULL); + if (!is_parallel_loop) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d has data dependences\n", loop->num); + return false; + } + + /* Check induction variables. */ + if (!loop_with_simple_ivs (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d has complicated IVs\n", loop->num); + return false; + } + + /* Check reduction cases. */ + if (!loop_with_simple_reductions (loop, reductions)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d has complicated reductions\n", loop->num); + return false; + } + + /* Check simple CFG structure. */ + basic_block *body = get_loop_body (loop); + unsigned int i; + for (i = 0; i < loop->num_nodes; i++) + { + /* Skip bb with exit edge. */ + if (body[i] == exit->src) + continue; + gimple *stmt = last_stmt (body[i]); + if (!stmt || !is_ctrl_stmt (stmt)) + continue; + if (gimple_code (stmt) != GIMPLE_COND) + return false; + gcond *cond_stmt = as_a (stmt); + tree *op0 = gimple_cond_lhs_ptr (cond_stmt); + tree *op1 = gimple_cond_rhs_ptr (cond_stmt); + gcc_assert (op0 && op1); + + bool r0 = expr_invariant_in_loop_p (loop, *op0); + bool r1 = expr_invariant_in_loop_p (loop, *op1); + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Check condition invariance in loop %d bb ? " + " (%d %d):\n", loop->num, r0, r1); + print_gimple_stmt (dump_file, cond_stmt, 0); + } + if (!r0 || !r1) + return false; + } + if (dump_file) + fprintf (dump_file, "Can replicativly unroll loop %d\n", loop->num); + return true; +} + +/* Hash table to store copied bbs of unrolled loop to original bbs. */ +static hash_map copy_to_orig; +/* Hash table to store original bbs of unrolled loop to first set + of copied bbs. */ +static hash_map orig_to_copy0; +static vec loops_to_replicate; + +static hash_map> loop_new_bbs; +static hash_map> loop_all_new_bbs; +static hash_map> copy0_to_stmts; + +static void +map_copied_stmts (gimple *orig_stmt, gimple *stmt, bool is_copy0, + hash_map &orig_to_copy0_stmt) +{ + bool exist; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Copied and orig stmts:\n"); + print_gimple_stmt (dump_file, stmt, 0); + print_gimple_stmt (dump_file, orig_stmt, 0); + } + if (is_copy0) + orig_to_copy0_stmt.put (orig_stmt, stmt); + else + { + gimple **copy0 = orig_to_copy0_stmt.get (orig_stmt); + copy0_to_stmts.get_or_insert (*copy0, &exist).safe_push (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Add to copy0 map\n"); + } +} + +static void +copy_reduction_info (hash_map &orig_to_copy0_stmt) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Copy reduction info\n"); + + auto_vec old_stmts; + hash_map *>::iterator it = stmt_to_rchain.begin (); + for (; it != stmt_to_rchain.end (); ++it) + { + old_stmts.safe_push ((*it).first); + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, (*it).first, 0); + } + for (unsigned k = 0; k < old_stmts.length (); ++k) + { + gimple *old_stmt = old_stmts[k]; + gimple **new_stmt = orig_to_copy0_stmt.get (old_stmt); + if (new_stmt == NULL || stmt_to_rchain.get (*new_stmt)) + continue; + vec *old_rchain = *stmt_to_rchain.get (old_stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Look at orig stmt and new with len=%d\n", + old_rchain->length ()); + print_gimple_stmt (dump_file, old_stmt, 0); + } + + gcc_assert (new_stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, *new_stmt, 0); + vec *new_rchain; + vec_alloc (new_rchain, 3); + for (unsigned j = 0; j < old_rchain->length (); ++j) + { + gimple **stmt = orig_to_copy0_stmt.get ((*old_rchain)[j]); + gcc_assert (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "::::orig stmt and new with len=%d\n", + old_rchain->length ()); + print_gimple_stmt (dump_file, (*old_rchain)[j], 0); + if (stmt) + print_gimple_stmt (dump_file, *stmt, 0); + } + new_rchain->safe_push (*stmt); + } + for (unsigned j = 0; j < new_rchain->length (); ++j) + stmt_to_rchain.put ((*new_rchain)[j], new_rchain); + } +} + +/* Save data to execute instruction grouping later. */ + +static void +store_replication_info (class loop *loop, unsigned HOST_WIDE_INT n_unroll, + unsigned first_new_bb, bool is_unroll) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Store grouping info for loop %d by %ld, first new bb " + "is %d, unroll is %d\n", loop->num, n_unroll, first_new_bb, + is_unroll); + loops_to_replicate.safe_push (loop); + + unsigned i; + basic_block bb; + hash_map orig_to_copy0_stmt; + + /* Detect unroll by peeling: if the last of new bbs is connected to the first + original bb, first iterations was peeled. */ + auto_vec orig_bbs; + + for (i = first_new_bb; i < (unsigned) last_basic_block_for_fn (cfun); i++) + { + bb = BASIC_BLOCK_FOR_FN (cfun, i); + basic_block orig_bb = get_bb_original (bb); + bool is_copy0 = false; + bool exist; + basic_block &bb_ref0 = orig_to_copy0.get_or_insert (orig_bb, &exist); + if (!exist) + { + is_copy0 = true; + bb_ref0 = bb; + if (!is_unroll) + orig_bbs.safe_push (orig_bb); + } + + basic_block &bb_ref = copy_to_orig.get_or_insert (bb, &exist); + if (!exist) + bb_ref = orig_bb; + + if (is_copy0) + loop_new_bbs.get_or_insert (loop, &exist).safe_push (bb); + loop_all_new_bbs.get_or_insert (loop, &exist).safe_push (bb); + + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Operate with bb %d and orig bb %d:\n", + bb->index, orig_bb->index); + gimple_stmt_iterator gsi, orig_gsi = gsi_start_bb (orig_bb); + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + gimple *orig_stmt = gsi_stmt (orig_gsi); + /* Skip labels since they are not copied during unrolling. */ + if (gimple_code (orig_stmt) == GIMPLE_LABEL) + { + gsi_next (&orig_gsi); + orig_stmt = gsi_stmt (orig_gsi); + } + gsi_next (&orig_gsi); + map_copied_stmts (orig_stmt, stmt, is_copy0, orig_to_copy0_stmt); + if (rchain_stmts.contains (orig_stmt)) + rchain_stmts.add (stmt); + if (rend_stmts.contains (orig_stmt)) + rend_stmts.add (stmt); + } + gphi_iterator it, orig_it = gsi_start_phis (orig_bb); + for (it = gsi_start_phis (bb); !gsi_end_p (it); gsi_next (&it)) + { + gphi *phi = it.phi (); + gphi *orig_phi = orig_it.phi (); + gsi_next (&orig_it); + map_copied_stmts (orig_phi, phi, is_copy0, orig_to_copy0_stmt); + } + } + /* In the case of peeling we also copy the main loop body to the first bb. */ + if (!is_unroll) + for (unsigned int i = 0; i < orig_bbs.length (); i++) + { + basic_block bb = orig_bbs[i]; + gimple_stmt_iterator gsi; + bool exist; + loop_all_new_bbs.get_or_insert (loop, &exist).safe_push (bb); + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + /* Skip labels. */ + if (gimple_code (stmt) == GIMPLE_LABEL) + continue; + map_copied_stmts (stmt, stmt, false, orig_to_copy0_stmt); + } + gphi_iterator it; + for (it = gsi_start_phis (bb); !gsi_end_p (it); gsi_next (&it)) + { + gphi *phi = it.phi (); + map_copied_stmts (phi, phi, false, orig_to_copy0_stmt); + } + } + + copy_reduction_info (orig_to_copy0_stmt); +} + +static bool +simple_ssa_copy_p (gimple *stmt) +{ + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return false; + if (gimple_assign_rhs_code (stmt) != SSA_NAME + /* Probably it can be asserted. */ + || TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME + || gimple_assign_rhs2 (stmt) != NULL_TREE) + return false; + return true; +} + +static bool +simple_const_increment_p (gimple *stmt) +{ + enum tree_code code; + tree lhs, preinc, rhs2; + + if (gimple_code (stmt) != GIMPLE_ASSIGN) + return false; + + lhs = gimple_assign_lhs (stmt); + if (TREE_CODE (lhs) != SSA_NAME) + return false; + + code = gimple_assign_rhs_code (stmt); + if (code != PLUS_EXPR + && code != MINUS_EXPR + && code != POINTER_PLUS_EXPR) + return false; + + preinc = gimple_assign_rhs1 (stmt); + rhs2 = gimple_assign_rhs2 (stmt); + if (TREE_CODE (preinc) != SSA_NAME || !CONSTANT_CLASS_P (rhs2)) + return false; + return true; +} + +#include "tree-ssa-propagate.h" + +/* Replaces all uses of NAME by VAL. Like replace_uses_by but use this version + uses fold_stmt_inplace. Maybe we need to unify this with fold_stmt. */ + +void +replace_uses_inplace_by (tree name, tree val) +{ + imm_use_iterator imm_iter; + use_operand_p use; + gimple *stmt; + edge e; + + FOR_EACH_IMM_USE_STMT (stmt, imm_iter, name) + { + /* Mark the block if we change the last stmt in it. */ + if (cfgcleanup_altered_bbs + && stmt_ends_bb_p (stmt)) + bitmap_set_bit (cfgcleanup_altered_bbs, gimple_bb (stmt)->index); + + FOR_EACH_IMM_USE_ON_STMT (use, imm_iter) + { + replace_exp (use, val); + + if (gimple_code (stmt) == GIMPLE_PHI) + { + e = gimple_phi_arg_edge (as_a (stmt), + PHI_ARG_INDEX_FROM_USE (use)); + if (e->flags & EDGE_ABNORMAL + && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val)) + { + /* This can only occur for virtual operands, since + for the real ones SSA_NAME_OCCURS_IN_ABNORMAL_PHI (name)) + would prevent replacement. */ + gcc_checking_assert (virtual_operand_p (name)); + SSA_NAME_OCCURS_IN_ABNORMAL_PHI (val) = 1; + } + } + } + + if (gimple_code (stmt) != GIMPLE_PHI) + { + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + gimple *orig_stmt = stmt; + size_t i; + + /* FIXME. It shouldn't be required to keep TREE_CONSTANT + on ADDR_EXPRs up-to-date on GIMPLE. Propagation will + only change sth from non-invariant to invariant, and only + when propagating constants. */ + if (is_gimple_min_invariant (val)) + for (i = 0; i < gimple_num_ops (stmt); i++) + { + tree op = gimple_op (stmt, i); + /* Operands may be empty here. For example, the labels + of a GIMPLE_COND are nulled out following the creation + of the corresponding CFG edges. */ + if (op && TREE_CODE (op) == ADDR_EXPR) + recompute_tree_invariant_for_addr_expr (op); + } + + if (fold_stmt_inplace (&gsi)) + stmt = gsi_stmt (gsi); + + if (maybe_clean_or_replace_eh_stmt (orig_stmt, stmt)) + gimple_purge_dead_eh_edges (gimple_bb (stmt)); + + update_stmt (stmt); + } + } + + gcc_checking_assert (has_zero_uses (name)); + + /* Also update the trees stored in loop structures. */ + if (current_loops) + { + for (auto loop : loops_list (cfun, 0)) + substitute_in_loop_info (loop, name, val); + } +} + +static bool +simplify_ssa_operands (tree lhs, tree rhs, tree rhs2, enum tree_code c, + bool modify, hash_set &modified_bbs) +{ + if (TREE_CODE (lhs) != SSA_NAME) + return false; + if (TREE_CODE (rhs) != SSA_NAME && !CONSTANT_CLASS_P (rhs)) + return false; + if (rhs2 != NULL_TREE && TREE_CODE (rhs2) != SSA_NAME + && !CONSTANT_CLASS_P (rhs2)) + return false; + + tree extra_const = NULL_TREE; + if (rhs2 != NULL_TREE) + { + if (TREE_CODE (rhs) && CONSTANT_CLASS_P (rhs2)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "RESULT2\n"); + extra_const = rhs2; + } + else if (TREE_CODE (rhs) == SSA_NAME && TREE_CODE (rhs2) == SSA_NAME) + { + gimple *stmt = SSA_NAME_DEF_STMT (rhs2); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "RESULT4:\n"); + print_gimple_stmt (dump_file, stmt, 0); + } + if (is_gimple_debug (stmt) || !is_gimple_assign (stmt) + || (gimple_assign_rhs2 (stmt) && modify)) + return false; + tree new_rhs = gimple_assign_rhs1 (stmt); + if (!CONSTANT_CLASS_P (new_rhs)) + return false; + extra_const = rhs2; + } + } + + use_operand_p use_p; + imm_use_iterator imm_iter; + unsigned uses = 0; + bool can_update = true, has_memref_uses = false; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + uses++; + gimple *stmt1 = USE_STMT (use_p); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Stmt's use: "); + print_gimple_stmt (dump_file, stmt1, 0); + } + bool is_memref = simple_load_store_p (stmt1); + if (extra_const != NULL_TREE && !simple_const_increment_p (stmt1) + && !simple_ssa_copy_p (stmt1) && !is_memref + && gimple_code (stmt1) != GIMPLE_PHI) + can_update = false; + has_memref_uses |= is_memref; + } + if (uses == 0 || can_update == false + || (has_memref_uses && extra_const != NULL_TREE)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Reason no uses (%d) can't upd (%d) " + "const in mem (%d)\n", uses == 0, can_update == false, + (has_memref_uses && extra_const != NULL_TREE)); + return false; + } + if (rhs2 && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "UNIMPLEMENTED: (%s)", get_tree_code_name (c)); + print_generic_expr (dump_file, rhs2, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (!modify) + return true; + + tree val = rhs; + if (extra_const != NULL_TREE) + { + if (c == POINTER_PLUS_EXPR) + val = fold_build2 (c, (TREE_TYPE (rhs)), rhs, rhs2); + else + val = fold_build2 (c, TREE_TYPE (rhs), rhs, rhs2); + if (extra_const != NULL_TREE && dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "INSERTED: (%s) ", get_tree_code_name (c)); + print_generic_expr (dump_file, val, TDF_SLIM); + fprintf (dump_file, "\n"); + } + } + if (extra_const != NULL_TREE) + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + gimple *stmt = USE_STMT (use_p); + if (gimple_code (stmt) != GIMPLE_PHI) + continue; + basic_block bb = gimple_bb (stmt); + modified_bbs.add (bb); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Add bb %d to blocks with modified phis\n", + bb->index); + } + if (extra_const != NULL_TREE && TREE_CODE (val) != SSA_NAME + && !CONSTANT_CLASS_P (val)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "UNSUPPORTED: cannot propagate "); + print_generic_expr (dump_file, val, TDF_SLIM); + fprintf (dump_file, "\n"); + } + return false; + } + replace_uses_inplace_by (lhs, val); + return true; +} + +static bool +forward_propagate_iv (gimple *stmt, bool modify, + hash_set &modified_bbs) +{ + if (is_gimple_debug (stmt) || !is_gimple_assign (stmt)) + { + enum gimple_code code = gimple_code (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "False0 %s\n", gimple_code_name[code]); + return false; + } + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + enum tree_code c = gimple_assign_rhs_code (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + enum tree_code def_code = TREE_CODE (rhs); + enum tree_code ldef_code = TREE_CODE (lhs); + fprintf (dump_file, "--------------------------------\n"); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "In assign %s = (%s) %s", + get_tree_code_name (ldef_code), + get_tree_code_name (c), + get_tree_code_name (def_code)); + if (rhs2 != NULL_TREE) + fprintf (dump_file, " %s", get_tree_code_name (TREE_CODE (rhs2))); + fprintf (dump_file, "\n"); + } + if (c != PLUS_EXPR && c != MINUS_EXPR && c != POINTER_PLUS_EXPR + && c != SSA_NAME && TREE_CODE_CLASS (c) != tcc_constant) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "False1\n"); + return false; + } + + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Single stmt's arg: "); + print_generic_expr (dump_file, lhs, TDF_SLIM); + fprintf (dump_file, " "); + print_generic_expr (dump_file, rhs, TDF_SLIM); + if (rhs2 != NULL_TREE) + { + fprintf (dump_file, " "); + print_generic_expr (dump_file, rhs2, TDF_SLIM); + } + fprintf (dump_file, "\n"); + } + /* Unsupported case. */ + if (gimple_assign_rhs3 (stmt)) + return false; + return simplify_ssa_operands (lhs, rhs, rhs2, c, modify, modified_bbs); +} + +static bool +propagate_ivs_for_bb (basic_block bb, hash_set &modified_bbs) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Propagate IVs for bb %d\n", bb->index); + + bool changed = false; + if (single_pred_p (bb)) + { + edge e = single_pred_edge (bb); + for (gphi_iterator it = gsi_start_phis (bb); + !gsi_end_p (it); gsi_next (&it)) + { + gphi *phi = it.phi (); + if (virtual_operand_p (PHI_RESULT (phi))) + continue; + tree phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, e); + tree phi_res = PHI_RESULT (phi); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Single phi's arg: "); + print_generic_expr (dump_file, phi_res, TDF_SLIM); + fprintf (dump_file, " "); + print_generic_expr (dump_file, phi_arg, TDF_SLIM); + fprintf (dump_file, "\n"); + } + if (TREE_CODE (phi_arg) != SSA_NAME && !CONSTANT_CLASS_P (phi_arg)) + continue; + //gcc_assert (TREE_CODE (phi_arg) == INTEGER_CST); + use_operand_p use_p; + imm_use_iterator imm_iter; + unsigned uses = 0; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, phi_res) + { + uses++; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + gimple *stmt1 = USE_STMT (use_p); + fprintf (dump_file, "Phi's use: "); + print_gimple_stmt (dump_file, stmt1, 0); + } + } + if (uses == 0) + continue; + replace_uses_inplace_by (phi_res, phi_arg); + changed = true; + } + } + for (gimple_stmt_iterator it = gsi_start_bb (bb); + !gsi_end_p (it); gsi_next (&it)) + if (forward_propagate_iv (gsi_stmt (it), true, modified_bbs)) + changed = true; + return changed; +} + +static bool +def_has_uses (tree def) +{ + use_operand_p use_p; + imm_use_iterator imm_iter; + unsigned uses = 0; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) + uses++; + return uses > 0; +} + +/* Either convert trivial phis (i.e. with single use operand) to assign stmts + or delete the phis if their results have no uses. */ + +static void +convert_trivial_phis (basic_block bb) +{ + gcc_assert (single_pred_p (bb)); + gphi_iterator it = gsi_start_phis (bb); + if (gsi_end_p (it)) + return; + gimple_stmt_iterator gsi = gsi_start_bb (bb); + unsigned int inserted = 0; + for (; !gsi_end_p (it); gsi_next (&it)) + { + gphi *phi = it.phi (); + tree dst = PHI_RESULT (phi); + if (virtual_operand_p (dst)) + continue; + /* If phi has uses, convert it to assignment stmt, + otherwise delete it. */ + if (def_has_uses (dst)) + { + tree src = gimple_phi_arg_def (phi, 0); + gimple *g = gimple_build_assign (dst, src); + if (inserted == 0) + gsi_insert_before (&gsi, g, GSI_NEW_STMT); + else + gsi_insert_after (&gsi, g, GSI_NEW_STMT); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Insert assign: "); + print_gimple_stmt (dump_file, g, 0); + } + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Remove phi: "); + print_gimple_stmt (dump_file, phi, 0); + } + gimple_stmt_iterator si = gsi_for_stmt (phi); + gsi_remove (&si, true); + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "After convert_trivial_phis:\n"); + dump_basic_block (dump_flags, bb, 0); + } +} + +static void +propagate_ivs (class loop *loop) +{ + bool changed; + unsigned int i; + vec *bbs = loop_all_new_bbs.get (loop); + gcc_assert (bbs->length ()); + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Start propagate_ivs for loop %d\n", loop->num); + + hash_set modified_bbs; + do + { + changed = false; + for (i = 0; i < bbs->length (); i++) + changed = propagate_ivs_for_bb ((*bbs)[i], modified_bbs) || changed; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, changed ? "Do next iter of propagate_ivs.\n" + : "Finish propagate_ivs.\n"); + } + while (changed); + + /* Correct phis outside the loop if they was modified. */ + if (modified_bbs.is_empty ()) + return; + for (i = 0; i < bbs->length (); i++) + { + basic_block bb = (*bbs)[i]; + if (modified_bbs.contains (bb)) + modified_bbs.remove (bb); + } + for (hash_set::iterator it = modified_bbs.begin (); + it != modified_bbs.end (); ++it) + convert_trivial_phis (*it); } -/* Remove all tests for exits that are known to be taken after LOOP was - peeled NPEELED times. Put gcc_unreachable before every statement - known to not be executed. */ - -static bool -remove_exits_and_undefined_stmts (class loop *loop, unsigned int npeeled) +static void +copy_attached_stmts (gimple_stmt_iterator &gsi, gimple *copy0) { - class nb_iter_bound *elt; - bool changed = false; + unsigned int i; + vec *stmts = copy0_to_stmts.get (copy0); + /* If it unrolls by 2 with peeling, we have no additional stmts. */ + if (stmts == NULL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Copy0 with no attached stmts: \n"); + print_gimple_stmt (dump_file, copy0, 0); + } + return; + } + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Copy0 and attached stmts:\n"); + print_gimple_stmt (dump_file, copy0, 0); + for (i = 0; i < stmts->length (); i++) + print_gimple_stmt (dump_file, (*stmts)[i], 0); + } - for (elt = loop->bounds; elt; elt = elt->next) + bool is_rstmt = rchain_stmts.contains (copy0); + bool is_rend = rend_stmts.contains (copy0); + if (is_rstmt && !is_rend) + return; + if (is_rend) { - /* If statement is known to be undefined after peeling, turn it - into unreachable (or trap when debugging experience is supposed - to be good). */ - if (!elt->is_exit - && wi::ltu_p (elt->bound, npeeled)) + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, ":\n"); + vec **rchain_p = stmt_to_rchain.get (copy0); + if (!rchain_p) { - gimple_stmt_iterator gsi = gsi_for_stmt (elt->stmt); - gcall *stmt = gimple_build_call - (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); - gimple_set_location (stmt, gimple_location (elt->stmt)); - gsi_insert_before (&gsi, stmt, GSI_NEW_STMT); - split_block (gimple_bb (stmt), stmt); - changed = true; if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "Forced statement unreachable: "); - print_gimple_stmt (dump_file, elt->stmt, 0); + fprintf (dump_file, "No item in stmt_to_rchain for key:\n"); + print_gimple_stmt (dump_file, copy0, 0); } } - /* If we know the exit will be taken after peeling, update. */ - else if (elt->is_exit - && wi::leu_p (elt->bound, npeeled)) + tree last_res = gimple_assign_rhs1 (copy0); + vec *rchain = *rchain_p; + gimple *rstart = (*rchain)[0]; + unsigned rlen = rchain->length (); + gimple *rend = (*rchain)[rlen - 1]; + vec *start_stmts = copy0_to_stmts.get (rstart); + vec *end_stmts = copy0_to_stmts.get (rend); + /* Insert before the chain end, so set the iterator before the store. */ + gsi_prev (&gsi); + for (i = 0; i < stmts->length (); i++) { - basic_block bb = gimple_bb (elt->stmt); - edge exit_edge = EDGE_SUCC (bb, 0); + tree lhs = gimple_assign_lhs ((*start_stmts)[i]); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Start lhs:\n"); + print_generic_expr (dump_file, lhs, TDF_SLIM); + fprintf (dump_file, "\n"); + } + gimple *new_stmt = gimple_build_assign (lhs, SSA_NAME, last_res); + gsi_insert_after (&gsi, new_stmt, GSI_CONTINUE_LINKING); + if (dump_file && (dump_flags & TDF_DETAILS)) + print_gimple_stmt (dump_file, new_stmt, 0); + last_res = gimple_assign_rhs1 ((*end_stmts)[i]); + for (unsigned j = 1; j < rlen - 1; j++) + { + gimple *cur_stmt = (*rchain)[j]; + vec *cur_stmts = copy0_to_stmts.get (cur_stmt); + gimple_stmt_iterator gsi2 = gsi_for_stmt ((*cur_stmts)[i]); + gsi_move_after (&gsi2, &gsi); + } + } + /* Patch stmt at the chain end. */ + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Last rhs:\n"); + print_generic_expr (dump_file, last_res, TDF_SLIM); + fprintf (dump_file, "\n"); + } + gimple_assign_set_rhs1 (rend, last_res); + update_stmt (rend); + + /* Delete start and end stmts which were not copied. */ + for (i = 0; i < start_stmts->length (); i++) + { + gimple_stmt_iterator gsi2 = gsi_for_stmt ((*start_stmts)[i]); + gsi_remove (&gsi2, true); + } + for (i = 0; i < end_stmts->length (); i++) + { + gimple_stmt_iterator gsi2 = gsi_for_stmt ((*end_stmts)[i]); + gsi_remove (&gsi2, true); + } + /* Move the iterator to the initial place. */ + gsi_next (&gsi); + } + else + for (i = 0; i < stmts->length (); i++) + { + gimple *stmt2 = (*stmts)[i]; + gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt2); + gsi_move_after (&gsi2, &gsi); + } +} + +/* Remove virtual operands in cleared bbs. */ + +static void +remove_attached_virtuals (basic_block bb) +{ + gphi_iterator it; + for (it = gsi_start_phis (bb); !gsi_end_p (it); gsi_next (&it)) + { + gphi *copy0 = it.phi (); + if (!virtual_operand_p (PHI_RESULT (copy0))) + continue; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Virtual op in bb %d:\n", + gimple_bb (copy0)->index); + print_gimple_stmt (dump_file, copy0, 0); + } + vec *stmts = copy0_to_stmts.get (copy0); + for (unsigned int i = 0; i < stmts->length (); i++) + { + gimple *stmt2 = (*stmts)[i]; + gcc_assert (virtual_operand_p (PHI_RESULT (stmt2))); if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "Forced exit to be taken: "); - print_gimple_stmt (dump_file, elt->stmt, 0); + fprintf (dump_file, "Remove virtual op in cleared bb %d:\n", + gimple_bb (stmt2)->index); + print_gimple_stmt (dump_file, stmt2, 0); } - if (!loop_exit_edge_p (loop, exit_edge)) - exit_edge = EDGE_SUCC (bb, 1); - exit_edge->probability = profile_probability::always (); - gcc_checking_assert (loop_exit_edge_p (loop, exit_edge)); - gcond *cond_stmt = as_a (elt->stmt); - if (exit_edge->flags & EDGE_TRUE_VALUE) - gimple_cond_make_true (cond_stmt); - else - gimple_cond_make_false (cond_stmt); - update_stmt (cond_stmt); - changed = true; + gimple_stmt_iterator si = gsi_for_stmt (stmt2); + gsi_remove (&si, true); } } - return changed; } -/* Remove all exits that are known to be never taken because of the loop bound - discovered. */ +static void +copy_replicative_in_bb (basic_block bb) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Replicative copying to bb %d:\n", bb->index); + /* Copy regular stmts. */ + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *copy0 = gsi_stmt (gsi); + /* Don't need to move cfg stmts. */ + if (is_ctrl_stmt (copy0)) + continue; + copy_attached_stmts (gsi, copy0); + } + /* Copy phis. */ + gphi_iterator it; + for (it = gsi_start_phis (bb); !gsi_end_p (it); gsi_next (&it)) + { + gphi *copy0 = it.phi (); + if (virtual_operand_p (PHI_RESULT (copy0))) + continue; + copy_attached_stmts (it, copy0); + } + remove_attached_virtuals (bb); + if (!single_pred_p (bb)) + return; + /* Convert trivial phis. */ + convert_trivial_phis (bb); +} static bool -remove_redundant_iv_tests (class loop *loop) +need_hoist_stmt (gimple *stmt, hash_set &loop_bbs, + basic_block preheader, hash_set &after_stmts, + hash_map &pos_stmts) { - class nb_iter_bound *elt; - bool changed = false; - - if (!loop->any_upper_bound) - return false; - for (elt = loop->bounds; elt; elt = elt->next) + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + gimple *def_stmt = SSA_NAME_DEF_STMT (rhs); + if (!def_stmt || !(is_gimple_assign (def_stmt) + || gimple_code (def_stmt) == GIMPLE_PHI)) { - /* Exit is pointless if it won't be taken before loop reaches - upper bound. */ - if (elt->is_exit && loop->any_upper_bound - && wi::ltu_p (loop->nb_iterations_upper_bound, elt->bound)) + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "FIXME: no def (%d) or it's not assign/phi (%d)\n", + !def_stmt, !is_gimple_assign (def_stmt) + && !(gimple_code (def_stmt) == GIMPLE_PHI)); + if (!def_stmt) + return false; + } + basic_block def_bb = gimple_bb (def_stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Ok, its def is in bb %d (loop/preheader=%d/%d):\n", + def_bb->index, loop_bbs.contains (def_bb), def_bb == preheader); + print_gimple_stmt (dump_file, def_stmt, 0); + if (!stmt_dominates_stmt_p (def_stmt, stmt)) + fprintf (dump_file, "Def doesn't dominate the stmt," + " it should be fixed later\n"); + } + use_operand_p use_p; + imm_use_iterator imm_iter; + gimple *udom_stmt = NULL; + bool insert_after = false; + auto_vec dom_stmts; + FOR_EACH_IMM_USE_FAST (use_p, imm_iter, lhs) + { + gimple *use_stmt = USE_STMT (use_p); + if (dump_file && (dump_flags & TDF_DETAILS)) { - basic_block bb = gimple_bb (elt->stmt); - edge exit_edge = EDGE_SUCC (bb, 0); - class tree_niter_desc niter; + fprintf (dump_file, "Its use is in bb %d: ", + gimple_bb (use_stmt)->index); + print_gimple_stmt (dump_file, use_stmt, 0); + } + if (!loop_bbs.contains (gimple_bb (use_stmt))) + continue; + if (stmt_dominates_stmt_p (stmt, use_stmt)) + continue; + dom_stmts.safe_push (use_stmt); + if (!udom_stmt || stmt_dominates_stmt_p (use_stmt, udom_stmt)) + { + udom_stmt = use_stmt; + insert_after = false; + continue; + } + if (stmt_dominates_stmt_p (udom_stmt, use_stmt)) + continue; + basic_block dbb = nearest_common_dominator (CDI_DOMINATORS, + gimple_bb (use_stmt), + gimple_bb (udom_stmt)); + gcc_assert (loop_bbs.contains (dbb) || dbb == preheader); + udom_stmt = last_stmt (dbb); + insert_after = true; + } + if (dom_stmts.length ()) + { + gcc_assert (udom_stmt); + pos_stmts.put (stmt, udom_stmt); + if (insert_after) + after_stmts.add (stmt); + return true; + } + return false; +} - if (!loop_exit_edge_p (loop, exit_edge)) - exit_edge = EDGE_SUCC (bb, 1); +static void +hoist_ivs (vec *bbs) +{ + /* The first stored bb is always header. */ + basic_block header = (*bbs)[0]; + basic_block preheader = single_pred_p (header) ? single_pred (header) : NULL; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Hoisting stmts in the loop with new header bb %d:\n", + header->index); - /* Only when we know the actual number of iterations, not - just a bound, we can remove the exit. */ - if (!number_of_iterations_exit (loop, exit_edge, - &niter, false, false) - || !integer_onep (niter.assumptions) - || !integer_zerop (niter.may_be_zero) - || !niter.niter - || TREE_CODE (niter.niter) != INTEGER_CST - || !wi::ltu_p (loop->nb_iterations_upper_bound, - wi::to_widest (niter.niter))) - continue; - + hash_set loop_bbs; + for (unsigned int i = 0; i < bbs->length (); i++) + loop_bbs.add ((*bbs)[i]); + + auto_vec dead_stmts, stmts_to_hoist; + hash_set after_stmts; + hash_map pos_stmts; + for (unsigned int i = 0; i < bbs->length (); i++) + { + basic_block bb = (*bbs)[i]; + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); if (dump_file && (dump_flags & TDF_DETAILS)) { - fprintf (dump_file, "Removed pointless exit: "); - print_gimple_stmt (dump_file, elt->stmt, 0); + fprintf (dump_file, "Check stmt: "); + print_gimple_stmt (dump_file, stmt, 0); } - gcond *cond_stmt = as_a (elt->stmt); - if (exit_edge->flags & EDGE_TRUE_VALUE) - gimple_cond_make_false (cond_stmt); + if (is_gimple_debug (stmt) || !is_gimple_assign (stmt)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Debug or not assign stmt...\n"); + continue; + } + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + tree rhs2 = gimple_assign_rhs2 (stmt); + enum tree_code c = gimple_assign_rhs_code (stmt); + if (c != PLUS_EXPR && c != MINUS_EXPR && c != POINTER_PLUS_EXPR + && c != SSA_NAME && !CONSTANT_CLASS_P (rhs)) + continue; + if (TREE_CODE (lhs) != SSA_NAME || (TREE_CODE (rhs) != SSA_NAME + && !CONSTANT_CLASS_P (rhs))) + continue; + if (rhs2 != NULL_TREE && !CONSTANT_CLASS_P (rhs2)) + continue; + if (!def_has_uses (lhs)) + dead_stmts.safe_push (stmt); else - gimple_cond_make_true (cond_stmt); - update_stmt (cond_stmt); - changed = true; + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Check hoisting..."); + gcc_assert (!CONSTANT_CLASS_P (rhs)); + /* The same ssa object. */ + if (SSA_NAME_IDENTIFIER (lhs) != SSA_NAME_IDENTIFIER (rhs) + || SSA_NAME_VAR (lhs) != SSA_NAME_VAR (rhs)) + continue; + if (need_hoist_stmt (stmt, loop_bbs, preheader, + after_stmts, pos_stmts)) + stmts_to_hoist.safe_push (stmt); + } } } - return changed; + /* Hoist stmts to correct ssa. */ + for (unsigned int i = 0; i < stmts_to_hoist.length (); i++) + { + gimple *stmt = stmts_to_hoist[i]; + gimple_stmt_iterator gsi_stmt = gsi_for_stmt (stmt); + gimple **pos = pos_stmts.get (stmt); + gimple_stmt_iterator gsi_pos = gsi_for_stmt (*pos); + bool insert_after = false; + if (after_stmts.contains (stmt)) + { + insert_after = true; + gsi_move_after (&gsi_stmt, &gsi_pos); + } + else + gsi_move_before (&gsi_stmt, &gsi_pos); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Hoist stmt: "); + print_gimple_stmt (dump_file, stmt, 0); + fprintf (dump_file, "%s this:", insert_after ? "after" : "before"); + print_gimple_stmt (dump_file, *pos, 0); + } + } + /* Remove stmts with unused defs. */ + for (unsigned int i = 0; i < dead_stmts.length (); i++) + { + gimple *stmt = dead_stmts[i]; + gimple_stmt_iterator gsi = gsi_for_stmt (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Remove unused stmt: "); + print_gimple_stmt (dump_file, stmt, 0); + } + gsi_remove (&gsi, true); + } } -/* Stores loops that will be unlooped and edges that will be removed - after we process whole loop tree. */ -static vec loops_to_unloop; -static vec loops_to_unloop_nunroll; -static vec edges_to_remove; -/* Stores loops that has been peeled. */ -static bitmap peeled_loops; - -/* Cancel all fully unrolled loops by putting __builtin_unreachable - on the latch edge. - We do it after all unrolling since unlooping moves basic blocks - across loop boundaries trashing loop closed SSA form as well - as SCEV info needed to be intact during unrolling. - - IRRED_INVALIDATED is used to bookkeep if information about - irreducible regions may become invalid as a result - of the transformation. - LOOP_CLOSED_SSA_INVALIDATED is used to bookkepp the case - when we need to go into loop closed SSA form. */ - static void -unloop_loops (bitmap loop_closed_ssa_invalidated, - bool *irred_invalidated) +apply_replicative () { - while (loops_to_unloop.length ()) + while (loops_to_replicate.length ()) { - class loop *loop = loops_to_unloop.pop (); - int n_unroll = loops_to_unloop_nunroll.pop (); - basic_block latch = loop->latch; - edge latch_edge = loop_latch_edge (loop); - int flags = latch_edge->flags; - location_t locus = latch_edge->goto_locus; - gcall *stmt; - gimple_stmt_iterator gsi; - - remove_exits_and_undefined_stmts (loop, n_unroll); - - /* Unloop destroys the latch edge. */ - unloop (loop, irred_invalidated, loop_closed_ssa_invalidated); + class loop *loop = loops_to_replicate.pop (); - /* Create new basic block for the latch edge destination and wire - it in. */ - stmt = gimple_build_call (builtin_decl_implicit (BUILT_IN_UNREACHABLE), 0); - latch_edge = make_edge (latch, create_basic_block (NULL, NULL, latch), flags); - latch_edge->probability = profile_probability::never (); - latch_edge->flags |= flags; - latch_edge->goto_locus = locus; + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Do apply_replicative for loop %d\n", loop->num); - add_bb_to_loop (latch_edge->dest, current_loops->tree_root); - latch_edge->dest->count = profile_count::zero (); - set_immediate_dominator (CDI_DOMINATORS, latch_edge->dest, latch_edge->src); + propagate_ivs (loop); - gsi = gsi_start_bb (latch_edge->dest); - gsi_insert_after (&gsi, stmt, GSI_NEW_STMT); + vec *bbs = loop_new_bbs.get (loop); + gcc_assert (bbs->length ()); + for (unsigned int i = 0; i < bbs->length (); i++) + copy_replicative_in_bb ((*bbs)[i]); + hoist_ivs (bbs); } - loops_to_unloop.release (); - loops_to_unloop_nunroll.release (); + mark_virtual_operands_for_renaming (cfun); + update_ssa (TODO_update_ssa_only_virtuals); - /* Remove edges in peeled copies. Given remove_path removes dominated - regions we need to cope with removal of already removed paths. */ - unsigned i; - edge e; - auto_vec src_bbs; - src_bbs.reserve_exact (edges_to_remove.length ()); - FOR_EACH_VEC_ELT (edges_to_remove, i, e) - src_bbs.quick_push (e->src->index); - FOR_EACH_VEC_ELT (edges_to_remove, i, e) - if (BASIC_BLOCK_FOR_FN (cfun, src_bbs[i])) - { - bool ok = remove_path (e, irred_invalidated, - loop_closed_ssa_invalidated); - gcc_assert (ok); - } - edges_to_remove.release (); + loops_to_replicate.release (); } /* Tries to unroll LOOP completely, i.e. NITER times. @@ -883,11 +3237,24 @@ try_unroll_loop_completely (class loop *loop, loop->num); return false; } + else if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Ok, unr_insns=%ld, " + "param_max_completely_peeled_insns=%d\n", + unr_insns, param_max_completely_peeled_insns); } if (!dbg_cnt (gimple_unroll)) return false; - + bool replicative = false; + unsigned first_new_block; + if (do_replicative_unroll && can_unroll_with_replication (loop, exit) + && n_unroll > 1) + { + static int count = 0; + replicative = true; + first_new_block = last_basic_block_for_fn (cfun); + count ++; + } initialize_original_copy_tables (); auto_sbitmap wont_exit (n_unroll + 1); if (exit && niter @@ -906,7 +3273,7 @@ try_unroll_loop_completely (class loop *loop, } if (may_be_zero) bitmap_clear_bit (wont_exit, 1); - + bool is_unroll = (loop->latch == loop_preheader_edge (loop)->src); if (!gimple_duplicate_loop_body_to_header_edge ( loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit, &edges_to_remove, @@ -917,6 +3284,8 @@ try_unroll_loop_completely (class loop *loop, fprintf (dump_file, "Failed to duplicate the loop\n"); return false; } + if (replicative) + store_replication_info (loop, n_unroll, first_new_block, is_unroll); free_original_copy_tables (); } @@ -1178,6 +3547,12 @@ canonicalize_loop_induction_variables (class loop *loop, if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false)) { niter = niter_desc.niter; + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "Loop %d niter ", loop->num); + print_generic_expr (dump_file, niter, TDF_SLIM); + fprintf (dump_file, "\n"); + } may_be_zero = niter_desc.may_be_zero && !integer_zerop (niter_desc.may_be_zero); } @@ -1441,6 +3816,19 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) free_numbers_of_iterations_estimates (cfun); estimate_numbers_of_iterations (cfun); + /* TODO: empty move at change? */ + if (do_replicative_unroll) + { + copy_to_orig.empty (); + orig_to_copy0.empty (); + loop_new_bbs.empty (); + loop_all_new_bbs.empty (); + copy0_to_stmts.empty (); + stmt_to_rchain.empty (); + rchain_stmts.empty (); + rend_stmts.empty (); + } + changed = tree_unroll_loops_completely_1 (may_increase_size, unroll_outer, father_bbs, current_loops->tree_root); @@ -1457,6 +3845,9 @@ tree_unroll_loops_completely (bool may_increase_size, bool unroll_outer) TODO_update_ssa); else update_ssa (TODO_update_ssa); + /* Apply replicative unroll. */ + if (do_replicative_unroll) + apply_replicative (); /* father_bbs is a bitmap of loop father header BB indices. Translate that to what non-root loops these BBs belong to now. */ @@ -1601,6 +3992,8 @@ pass_complete_unroll::execute (function *fun) re-peeling the same loop multiple times. */ if (flag_peel_loops) peeled_loops = BITMAP_ALLOC (NULL); + do_replicative_unroll = flag_cunroll_replicative; + unsigned int val = tree_unroll_loops_completely (flag_cunroll_grow_size, true); if (peeled_loops) @@ -1658,6 +4051,7 @@ pass_complete_unrolli::execute (function *fun) if (number_of_loops (fun) > 1) { scev_initialize (); + do_replicative_unroll = false; ret = tree_unroll_loops_completely (optimize >= 3, false); scev_finalize (); } diff --git a/gcc/tree.h b/gcc/tree.h index 07af584d637330030c4c957196609ddbef5ff3dd..96ac82835e46fd4d26fe9774505c94ade2640b78 100644 --- a/gcc/tree.h +++ b/gcc/tree.h @@ -2157,6 +2157,9 @@ extern tree vector_element_bits_tree (const_tree); #define TYPE_ALIAS_SET_KNOWN_P(NODE) \ (TYPE_CHECK (NODE)->type_common.alias_set != -1) +#define TYPE_FIELDS_NONALIAS(NODE) \ + (RECORD_OR_UNION_CHECK (NODE)->base.private_flag) + /* A TREE_LIST of IDENTIFIER nodes of the attributes that apply to this type. */ #define TYPE_ATTRIBUTES(NODE) (TYPE_CHECK (NODE)->type_common.attributes)