From 8fe5f80ca18afd2e5da23299fe65c9472027637d Mon Sep 17 00:00:00 2001 From: zhaowenyu <804544223@qq.com> Date: Mon, 20 Jun 2022 18:25:58 +0800 Subject: [PATCH] [initcall-cp] a variant of interprocedural constant propagation Reference: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=92538#c3 The proposed transformation (ipa-initcall-cp) is a variant of interprocedural constant propagation. ipa-initcall-cp collects all variables with static lifetime that contain only a single write (like in the cases of initialization functions) and propagates it to reads across the whole link unit. In order for this transformation to be sound * writes that can be reached from a function pointer, * writes that can be reached from a function with outside visibility, and * writes that may happen after any read are not taken into account. In order to determine that no read happens before the write, we have to: * find the main function * find the function and basic block of the write * for each read in another function * make sure that call to write dominates all callees of the read function * for each read in the same function * make sure that write dominates read Currently, all SPEC CPU 2017 benchmarks can be compiled with this patch and run successfully. Furthermore, it detects several constants that can be propagated in 12 of the SPEC CPU benchmarks. --- gcc/Makefile.in | 1 + gcc/cgraph.c | 1 + gcc/cgraph.h | 4 +- gcc/common.opt | 8 + gcc/ipa-cp.c | 4 +- gcc/ipa-initcall-cp.c | 1467 +++++++++++++++++++++++++++++++++++++++++ gcc/passes.def | 1 + gcc/tree-pass.h | 2 + 8 files changed, 1485 insertions(+), 3 deletions(-) create mode 100644 gcc/ipa-initcall-cp.c diff --git a/gcc/Makefile.in b/gcc/Makefile.in index 23394c64bc3..5740f583a58 100644 --- a/gcc/Makefile.in +++ b/gcc/Makefile.in @@ -1401,6 +1401,7 @@ OBJS = \ ipa-struct-reorg/ipa-struct-reorg.o \ ipa-cp.o \ ipa-sra.o \ + ipa-initcall-cp.o \ ipa-devirt.o \ ipa-fnsummary.o \ ipa-polymorphic-call.o \ diff --git a/gcc/cgraph.c b/gcc/cgraph.c index 4a44b4a4896..58f0a461c0b 100644 --- a/gcc/cgraph.c +++ b/gcc/cgraph.c @@ -3918,6 +3918,7 @@ cgraph_node::get_untransformed_body (void) struct lto_in_decl_state *decl_state = lto_get_function_in_decl_state (file_data, decl); + gcc_assert (decl_state != NULL); cgraph_node *origin = this; while (origin->clone_of) origin = origin->clone_of; diff --git a/gcc/cgraph.h b/gcc/cgraph.h index 8e71af07238..faf0b5318d6 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -123,7 +123,7 @@ public: order (false), next_sharing_asm_name (NULL), previous_sharing_asm_name (NULL), same_comdat_group (NULL), ref_list (), alias_target (NULL), lto_file_data (NULL), aux (NULL), - x_comdat_group (NULL_TREE), x_section (NULL) + x_comdat_group (NULL_TREE), x_section (NULL), skip_ipa_cp(false) {} /* Return name. */ @@ -625,6 +625,8 @@ public: /* Section name. Again can be private, if allowed. */ section_hash_entry *x_section; + bool skip_ipa_cp; + protected: /* Dump base fields of symtab nodes to F. Not to be used directly. */ void dump_base (FILE *); diff --git a/gcc/common.opt b/gcc/common.opt index 7fc075d354a..d39ddc33256 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3511,4 +3511,12 @@ fipa-ra Common Report Var(flag_ipa_ra) Optimization Use caller save register across calls if possible. +finitcall-cp +Common Report Var(flag_initcall_cp) + + +finitcall-cp-dryrun +Common Report Var(flag_initcall_cp_dryrun) + + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c index c208070c953..5c8883ff3a5 100644 --- a/gcc/ipa-cp.c +++ b/gcc/ipa-cp.c @@ -1189,8 +1189,8 @@ initialize_node_lattices (struct cgraph_node *node) gcc_checking_assert (node->has_gimple_body_p ()); - if (!ipa_get_param_count (info)) - disable = true; + if (!ipa_get_param_count (info) || node->skip_ipa_cp) + disable = true; else if (node->local) { int caller_count = 0; diff --git a/gcc/ipa-initcall-cp.c b/gcc/ipa-initcall-cp.c new file mode 100644 index 00000000000..a641ae24a26 --- /dev/null +++ b/gcc/ipa-initcall-cp.c @@ -0,0 +1,1467 @@ +/* Initcall constant propagation pass. */ + +#include "config.h" +#include "system.h" +#include "coretypes.h" +#include "backend.h" +#include "tree.h" +#include "tree-eh.h" +#include "gimple.h" +#include "gimple-expr.h" +#include "gimple-iterator.h" +#include "predict.h" +#include "alloc-pool.h" +#include "tree-pass.h" +#include "cgraph.h" +#include "diagnostic.h" +#include "fold-const.h" +#include "gimple-fold.h" +#include "symbol-summary.h" +#include "tree-vrp.h" +#include "ipa-prop.h" +#include "tree-pretty-print.h" +#include "tree-inline.h" +#include "ipa-fnsummary.h" +#include "ipa-utils.h" +#include "tree-ssa-ccp.h" +#include "stringpool.h" +#include "attribs.h" +#include "gimple-pretty-print.h" +#include "ssa.h" + +bool +reach_nodes_via_bb1 (cgraph_node *n2); +static bool +bb1_dominates_bb2 (basic_block, basic_block, cgraph_node *); +static bool +write_before_call (struct ipa_ref *, struct ipa_ref *); +static bool +call_before_read (struct ipa_ref *, struct ipa_ref *); +static hash_map *clone_num_suffixes1; +static hash_map *func_to_clone; +static vec +get_nondominated_callees (cgraph_node *caller, cgraph_node *callee, + bool *exitEarly = NULL); + +static bool +comdat_can_be_unshared_p_1 (symtab_node *node) +{ + if (!node->externally_visible) + return true; + if (node->address_can_be_compared_p ()) + { + struct ipa_ref *ref; + + for (unsigned int i = 0; node->iterate_referring (i, ref); i++) + if (ref->address_matters_p ()) + return false; + } + + /* If the symbol is used in some weird way, + * better to not touch it. */ + if (node->force_output) + return false; + + /* Explicit instantiations needs to be output when possibly + used externally. */ + if (node->forced_by_abi && TREE_PUBLIC (node->decl) + && (node->resolution != LDPR_PREVAILING_DEF_IRONLY + && !flag_whole_program)) + return false; + + /* Non-readonly and volatile variables cannot be duplicated. */ + if (is_a (node) + && (!TREE_READONLY (node->decl) || TREE_THIS_VOLATILE (node->decl))) + return false; + return true; +} + +/* COMDAT functions must be shared only if they have address taken, + otherwise we can produce our own private implementation with + -fwhole-program. + Return true when turning COMDAT function static cannot lead to wrong + code when the resulting object links with a library defining same + COMDAT. + + Virtual functions do have their addresses taken from the vtables, + but in C++ there is no way to compare their addresses + for equality. */ + +static bool +comdat_can_be_unshared_p (symtab_node *node) +{ + if (!comdat_can_be_unshared_p_1 (node)) + return false; + if (node->same_comdat_group) + { + symtab_node *next; + + /* If more than one function is in the same COMDAT group, it must + be shared even if just one function in the comdat group has + address taken. */ + for (next = node->same_comdat_group; next != node; + next = next->same_comdat_group) + if (!comdat_can_be_unshared_p_1 (next)) + return false; + } + return true; +} + +/* Return true when function NODE should + * be considered externally visible. */ + +static bool +cgraph_externally_visible_p (struct cgraph_node *node, bool whole_program) +{ + while (node->transparent_alias && node->definition) + node = node->get_alias_target (); + if (!node->definition) + return false; + if (!TREE_PUBLIC (node->decl) || DECL_EXTERNAL (node->decl)) + return false; + + /* Do not try to localize built-in functions yet. + One of problems is that we + end up mangling their asm for + WHOPR that makes it impossible to call them + using the implicit built-in declarations + anymore. Similarly this enables + us to remove them as unreachable before + actual calls may appear during + expansion or folding. */ + if (fndecl_built_in_p (node->decl)) + return true; + + /* If linker counts on us, we must preserve the function. */ + if (node->used_from_object_file_p ()) + return true; + if (DECL_PRESERVE_P (node->decl)) + return true; + if (lookup_attribute ("externally_visible", DECL_ATTRIBUTES (node->decl))) + return true; + if (lookup_attribute ("noipa", DECL_ATTRIBUTES (node->decl))) + return true; + if (TARGET_DLLIMPORT_DECL_ATTRIBUTES + && lookup_attribute ("dllexport", DECL_ATTRIBUTES (node->decl))) + return true; + if (node->resolution == LDPR_PREVAILING_DEF_IRONLY) + return false; + /* When doing LTO or whole program, we can bring COMDAT functions + static. + This improves code quality and we know we will duplicate them at + most twice + (in the case that we are not using plugin and link with object + file implementing same COMDAT) */ + if (((in_lto_p || whole_program) && !flag_incremental_link) + && DECL_COMDAT (node->decl) && comdat_can_be_unshared_p (node)) + return false; + + /* When doing link time optimizations, + * hidden symbols become local. */ + if ((in_lto_p && !flag_incremental_link) + && (DECL_VISIBILITY (node->decl) == VISIBILITY_HIDDEN + || DECL_VISIBILITY (node->decl) == VISIBILITY_INTERNAL) + /* Be sure that node is defined in IR file, not in other object + file. In that case we don't set + used_from_other_object_file. */ + && node->definition) + ; + else if (!whole_program) + return true; + + if (MAIN_NAME_P (DECL_NAME (node->decl))) + return true; + + return false; +} + +typedef vec ipa_ref_vec; + +// XXX: Move to ipa-ref.h +static const char * +stringify_ipa_ref_use (ipa_ref_use use) +{ + switch (use) + { + case IPA_REF_LOAD: + return "IPA_REF_LOAD"; + break; + case IPA_REF_STORE: + return "IPA_REF_STORE"; + break; + case IPA_REF_ADDR: + return "IPA_REF_ADDR"; + break; + case IPA_REF_ALIAS: + return "IPA_REF_ALIAS"; + break; + default: + return ""; + } +} + +static void +load_function_body_of_ipa_ref (cgraph_node *node) +{ + if (in_lto_p) + { + cgraph_node *f_cnode2 = node->ultimate_alias_target (); + if (dump_file) + { + fprintf (dump_file, "%s: for function '%s'\n", __func__, + f_cnode2->dump_asm_name ()); + dump_node (f_cnode2->decl, TDF_DETAILS, dump_file); + } + f_cnode2->get_untransformed_body (); + } +} + +static void +load_function_body_of_ipa_ref (struct ipa_ref *ref) +{ + /* IPA passes don't get the function bodies during LTO. */ + if (in_lto_p) + { + symtab_node *f_node = ref->referring; + cgraph_node *f_cnode = dyn_cast (f_node); + load_function_body_of_ipa_ref (f_cnode); + } +} + +static void +dump_vnode_ipa_ref (ipa_ref *ref) +{ + fprintf (dump_file, "Reference type: %s\n", stringify_ipa_ref_use (ref->use)); + + symtab_node *f_node = ref->referring; + fprintf (dump_file, "Reference function: %s\n", f_node->dump_asm_name ()); + + fprintf (dump_file, "Gimple stmt (@0x%lx, lto_stmt_uid: %u):\n", + (long) ref->stmt, ref->lto_stmt_uid); + load_function_body_of_ipa_ref (ref); + print_gimple_stmt (dump_file, ref->stmt, 0, TDF_NONE); +} + +// XXX: Move to tree.h +static bool +tree_code_is_cst (tree op) +{ + // XXX: use cprop_constant_p () here? + int code = TREE_CODE (op); + if (code == INTEGER_CST || code == REAL_CST || code == COMPLEX_CST + || code == VECTOR_CST) + return true; + return false; +} + +/* Return true of all ops of assignment are constants. */ +static bool +gimple_assign_is_single_const (gimple *stmt) +{ + tree op; + + gcc_assert (gimple_code (stmt) == GIMPLE_ASSIGN); + + if (gimple_has_volatile_ops (stmt)) + { + if (dump_file) + fprintf (dump_file, "has volatile ops!\n"); + return false; + } + + if (gimple_num_ops (stmt) != 2) + { + if (dump_file) + fprintf (dump_file, "wrong num of ops: %u!\n", gimple_num_ops (stmt)); + return false; + } + + op = gimple_op (stmt, 1); + if (!tree_code_is_cst (op)) + { + if (dump_file) + fprintf (dump_file, "op is not cst!\n"); + return false; + } + + return true; +} + +// XXX: Move to ipa-utils.c +// XXX: from/to could be const +static bool +path_exists_dfs (hash_set *visited, cgraph_node *current, + cgraph_node *target) +{ + if (current == target) + return true; + + visited->add (current); + + cgraph_edge *cs; + for (cs = current->callees; cs; cs = cs->next_callee) + { + cgraph_node *callee = cs->callee->function_symbol (); + if (!visited->contains (callee)) + { + if (path_exists_dfs (visited, callee, target)) + { + return true; + } + } + } + return false; +} + +// XXX: Move to ipa-utils.c +// XXX: from/to could be const +static bool +path_exists (cgraph_node *from, cgraph_node *to) +{ + hash_set visited; + bool found_path = path_exists_dfs (&visited, from, to); + visited.empty (); + + if (dump_file) + { + fprintf (dump_file, "Found "); + if (!found_path) + fprintf (dump_file, "*no* "); + fprintf (dump_file, "path from %s to %s.\n", from->name (), to->name ()); + } + return found_path; +} + +static vec +get_nondominated_callees (cgraph_node *caller, cgraph_node *callee, + bool *exitEarly) +{ + // assumptions: + // * there is a callsite from caller to callee + + if (in_lto_p) + caller->get_untransformed_body (); + + function *func = DECL_STRUCT_FUNCTION (caller->decl); + basic_block bb; + bool found = false; + FOR_EACH_BB_FN (bb, func) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + // The question here is, what happens if we don't have + // a tree that is of the type + // var = call() + // and instead we have a tree of the type + // call() + // are these trees even possible? + // I would prefer if they are all the same... + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + tree rhs = gimple_op (stmt, 1); + if (TREE_CODE (rhs) == RECORD_TYPE) + { + gcc_assert (exitEarly); + *exitEarly = true; + return vNULL; + } + tree callee_decl = gimple_call_fndecl (stmt); + if (!callee_decl) + { + gcc_assert (exitEarly); + *exitEarly = true; + return vNULL; + } + cgraph_node *callee_node = cgraph_node::get (callee_decl); + if (callee_node != callee) + continue; + + found = true; + } + if (found) + break; + } + + gcc_assert (found); + + // At this point in the program, we hold a valid bb. + // The callee is located + vec callees = vNULL; + basic_block bb_c; + FOR_EACH_BB_FN (bb_c, func) + { + bool self_dominates = bb == bb_c; + bool bb_dominates_bbc = bb1_dominates_bb2 (bb, bb_c, caller); + if (bb_dominates_bbc && !self_dominates) + continue; + + for (gimple_stmt_iterator gsi = gsi_start_bb (bb_c); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + tree callee_decl = gimple_call_fndecl (stmt); + cgraph_node *callee_node = cgraph_node::get (callee_decl); + + if (fndecl_built_in_p (callee_node->decl)) + continue; + + if (self_dominates && callee_node == callee) + { + break; + } + + callees.safe_push (callee_node); + } + } + return callees; +} + +// XXX: Move to class symtab_node in cgraph.h +static ipa_ref * +symtab_node_iterate_address_uses (symtab_node *n, unsigned i, ipa_ref *&ref) +{ + ipa_ref *retval = NULL; + for (int i = 0; n->iterate_referring (i, retval); i++) + if (ref->use == IPA_REF_ADDR) + return ref; + return NULL; +} + +// XXX: Move to class symtab_node in cgraph.h +static bool +symtab_node_address_matters_p (symtab_node *n) +{ + struct ipa_ref *ref; + + if (!n->address_can_be_compared_p ()) + return false; + if (n->externally_visible || n->force_output) + return true; + + for (unsigned int i = 0; n->iterate_referring (i, ref); i++) + if (ref->address_matters_p ()) + return true; + return false; +} + +static bool +bb1_dominates_bb2 (basic_block bb1, basic_block bb2, cgraph_node *cnode) +{ + // self dominance + if (bb1 == bb2) + return true; + + push_cfun (DECL_STRUCT_FUNCTION (cnode->decl)); + + /* If dominator info is not available, we need to calculate it. */ + if (!dom_info_available_p (CDI_DOMINATORS)) + calculate_dominance_info (CDI_DOMINATORS); + + /* Check if the read is dominated by the write. */ + bool ret = dominated_by_p (CDI_DOMINATORS, bb2, bb1); + + /* Restore cfun. */ + pop_cfun (); + + return ret; +} + +static bool +write_before_read_in_function (struct ipa_ref *write, struct ipa_ref *read) +{ + basic_block w_bb = gimple_bb (write->stmt); + basic_block r_bb = gimple_bb (read->stmt); + + if (w_bb != r_bb) + { + /* + * The dominator framework operates on cfun. + * Therefore we need to set cfun accordingly. + */ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast (w_node); + return bb1_dominates_bb2 (w_bb, r_bb, w_cnode); + } + + gimple_stmt_iterator gsi; + for (gsi = gsi_start_bb (w_bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + if (gsi_stmt (gsi) == write->stmt) + return true; + if (gsi_stmt (gsi) == read->stmt) + return false; + } + + /* Neither write nor read found it BB. */ + gcc_assert (1); + + return false; +} + +/* + * DFS over callees and return if callee is a or b. + */ +static cgraph_node * +find_cgraph_in_callee_set (cgraph_node *n, hash_set set, + cgraph_node *a, cgraph_node *b) +{ + cgraph_edge *cs; + for (cs = n->callees; cs; cs = cs->next_callee) + { + cgraph_node *callee = cs->callee->function_symbol (); + if (dump_file) + fprintf (dump_file, "Child of %s: %s\n", n->dump_asm_name (), + callee->dump_asm_name ()); + if (callee == a) + return a; + if (callee == b) + return b; + if (!set.contains (callee)) + continue; + return find_cgraph_in_callee_set (callee, set, a, b); + } + return NULL; +} + +/* Walks back along caller relations until main is found. */ +static cgraph_node * +get_ancestor_main_dfs (hash_set *visited, + vec *path, cgraph_node *node) +{ + if (MAIN_NAME_P (DECL_NAME (node->decl))) + { + path->safe_push (node); + return node; + } + + visited->add (node); + + cgraph_edge *cs; + for (cs = node->callers; cs; cs = cs->next_caller) + { + cgraph_node *caller = cs->caller->function_symbol (); + if (!visited->contains (caller)) + { + cgraph_node *main = get_ancestor_main_dfs (visited, path, caller); + if (main) + { + path->safe_push (node); + return main; + } + } + } + return NULL; +} + +static cgraph_node * +get_path_from_main_to (cgraph_node *node, vec *path) +{ + hash_set visited; + cgraph_node *main = get_ancestor_main_dfs (&visited, path, node); + visited.empty (); + return main; +} + +/* + * Verifying that a stmt s1 is dominated by a stmt s2 + * across function borders is not trivial with the available + * infrastructure (dominator algorithm on bb level plus cgraph). + * If we rule out external calls/callbacks, then we still need + * to guarantee a write on each possible path to the read. + * + * The implemented solution to this problem, which is of course + * too strict, + * but also not too compute/memory intensive is as follows: + * + * - Check if write is reachable by main () by only looking into + * the first bb in each function on the path. + * - All call stmts between main () and write must not possibly + * reach a read. We consider indirect call statements as + * possible reaching read (unless we can prove opposite). + * + * The external calls/callbacks are ruled out as follows: + * + * - all possible ancestors of read must not be external visible + * - all possible ancestors of read must not be function pointers + * - Something else? + */ +static bool +write_before_read_across_function_borders (struct ipa_ref *write, + struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast (r_node); + cgraph_node *main; + + /* Get main () function. */ + vec pw = vNULL; + main = get_path_from_main_to (w_cnode, &pw); + if (!main) + { + if (dump_file) + fprintf (dump_file, "main () is not ancestor of write -> skipping.\n"); + return false; + } + + vec pr = vNULL; + cgraph_node *main_to_read = get_path_from_main_to (r_cnode, &pr); + if (!main_to_read) + { + if (dump_file) + fprintf (dump_file, "main () is not ancestor of read -> skipping.\n"); + return false; + } + + // TODO: is write guaranteed to happen? + // if it is not guaranteed to happen, we shouldn't modify anything! + // The very first approximation. There should be a path from main to w_cnode + // such that only the first basic block is actually inspected. + // Future work will involve looking at unconditional paths. + // I think right now, all paths are reachable from first basic block + if (!reach_nodes_via_bb1 (w_cnode)) + return false; + + int i; + cgraph_node *node_in_pr; + FOR_EACH_VEC_ELT (pr, i, node_in_pr) + { + // I think main has to be externally visible. + if (node_in_pr == main) + continue; + + /* Assure that all paths to read are not externally visible. */ + if (cgraph_externally_visible_p (node_in_pr, flag_whole_program)) + { + if (dump_file) + fprintf (dump_file, "%s is externally visible... skipping\n", + node_in_pr->dump_asm_name ()); + return false; + } + + /* Assure that all paths to read are not + * used as function pointers. */ + if (node_in_pr->address_taken) + { + if (dump_file) + fprintf (dump_file, "%s might be function pointer... skipping\n", + node_in_pr->dump_asm_name ()); + return false; + } + } + + cgraph_node *caller = main; + cgraph_node *node_in_pw; + FOR_EACH_VEC_ELT (pw, i, node_in_pw) + { + gcc_assert (w_cnode != r_cnode); + if (node_in_pw == r_cnode && path_exists (r_cnode, w_cnode)) + { + return call_before_read (write, read); + } + + if (node_in_pw == w_cnode && path_exists (w_cnode, r_cnode)) + { + return write_before_call (write, read); + } + + if (node_in_pw == main) + { + continue; + } + + if (dump_file) + fprintf (dump_file, "get_nondominated_callees from %s to %s\n", + caller->dump_asm_name (), node_in_pw->dump_asm_name ()); + + bool exitEarly = false; + vec non_dominated_callees + = get_nondominated_callees (caller, node_in_pw, &exitEarly); + if (exitEarly) + return false; + cgraph_node *non_dominated_callee; + int j; + FOR_EACH_VEC_ELT (non_dominated_callees, j, non_dominated_callee) + { + if (dump_file) + fprintf (dump_file, "callee %d %s\n", j, + non_dominated_callee->dump_asm_name ()); + if (path_exists (non_dominated_callee, r_cnode)) + { + return false; + } + } + + caller = node_in_pw; + } + return true; +} + +static vec +get_bb1_callees (cgraph_node *c, cgraph_node *w_cnode) +{ + vec callees = vNULL; + if (fndecl_built_in_p (c->decl)) + return vNULL; + + if (dump_file) + fprintf (dump_file, "before get_untransformed_body %s\n", + c->dump_asm_name ()); + + if (!c->definition) + return vNULL; + + push_cfun (DECL_STRUCT_FUNCTION (c->decl)); + + if (in_lto_p) + c->get_untransformed_body (); + + pop_cfun (); + + function *func = DECL_STRUCT_FUNCTION (c->decl); + /* Get first BB (after the fake entry BB). */ + basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (func)->next_bb; + + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + tree callee_decl = gimple_call_fndecl (stmt); + cgraph_node *callee_node = cgraph_node::get (callee_decl); + if (!path_exists (callee_node, w_cnode)) + continue; + + callees.safe_push (callee_node); + } + + return callees; +} + +static bool +reach_nodes_via_bb1_dfs (hash_set *visited, cgraph_node *n1, + cgraph_node *n2) +{ + if (n1 == n2) + return true; + + visited->add (n1); + + vec callees = get_bb1_callees (n1, n2); + + int i; + cgraph_node *callee; + FOR_EACH_VEC_ELT (callees, i, callee) + { + if (!visited->contains (callee)) + { + bool found = reach_nodes_via_bb1_dfs (visited, callee, n2); + if (found) + { + callees.release (); + return true; + } + } + } + + return false; +} + +bool +reach_nodes_via_bb1 (cgraph_node *n2) +{ + vec pr = vNULL; + cgraph_node *main = get_path_from_main_to (n2, &pr); + hash_set visited; + bool path_exists = reach_nodes_via_bb1_dfs (&visited, main, n2); + visited.empty (); + pr.release (); + return path_exists; +} + +static bool +write_before_call (struct ipa_ref *write, struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast (r_node); + + gcc_assert (path_exists (w_cnode, r_cnode)); + gcc_assert (w_cnode != r_cnode); + + basic_block w_bb = gimple_bb (write->stmt); + basic_block r_bb = gimple_bb (read->stmt); + + if (in_lto_p) + { + w_cnode->get_untransformed_body (); + } + + function *func = DECL_STRUCT_FUNCTION (w_cnode->decl); + basic_block c_bb; + vec callees = vNULL; + FOR_EACH_BB_FN (c_bb, func) + { + bool self_dominates = w_bb == c_bb; + bool w_bb_dominates_c_bb = bb1_dominates_bb2 (w_bb, c_bb, w_cnode); + if (w_bb_dominates_c_bb && !self_dominates) + continue; + + for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + if (stmt == write->stmt) + { + break; + } + + if (gimple_code (stmt) != GIMPLE_CALL) + { + continue; + } + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + tree rhs = gimple_op (stmt, 1); + + if (rhs && TREE_CODE (rhs) == POINTER_TYPE) + return false; + + tree callee_decl = gimple_call_fndecl (stmt); + if (!callee_decl) + return false; + + cgraph_node *callee_node = cgraph_node::get (callee_decl); + const char *identifier + = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl)); + const char *sigsetjmp = "sigsetjmp"; + if (strstr (identifier, sigsetjmp) != NULL) + return false; + + if (fndecl_built_in_p (callee_node->decl)) + continue; + + if (dump_file) + fprintf (dump_file, "found callee %s\n", + callee_node->dump_asm_name ()); + callees.safe_push (callee_node); + } + } + cgraph_node *callee; + int i; + FOR_EACH_VEC_ELT (callees, i, callee) + { + if (path_exists (callee, r_cnode)) + { + return false; + } + } + + return true; +} + +static bool +call_before_read (struct ipa_ref *write, struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast (r_node); + + gcc_assert (path_exists (r_cnode, w_cnode)); + gcc_assert (w_cnode != r_cnode); + + basic_block w_bb = gimple_bb (write->stmt); + basic_block r_bb = gimple_bb (read->stmt); + + if (in_lto_p) + r_cnode->get_untransformed_body (); + + function *func = DECL_STRUCT_FUNCTION (r_cnode->decl); + basic_block c_bb; + FOR_EACH_BB_FN (c_bb, func) + { + bool self_dominates = c_bb == r_bb; + bool call_dominates_read = bb1_dominates_bb2 (c_bb, r_bb, r_cnode); + if (!call_dominates_read && !self_dominates) + continue; + + for (gimple_stmt_iterator gsi = gsi_start_bb (c_bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + + // self dominance + if (stmt == read->stmt) + break; + + if (gimple_code (stmt) != GIMPLE_CALL) + continue; + + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + tree rhs = gimple_op (stmt, 1); + if (TREE_CODE (rhs) == POINTER_TYPE) + return false; + tree callee_decl = gimple_call_fndecl (stmt); + + cgraph_node *callee_node = cgraph_node::get (callee_decl); + const char *identifier + = IDENTIFIER_POINTER (DECL_NAME (callee_node->decl)); + const char *sigsetjmp = "sigsetjmp"; + if (strstr (identifier, sigsetjmp) != NULL) + return false; + + if (path_exists (callee_node, w_cnode)) + return true; + } + } + return false; +} + +static bool +write_before_read (struct ipa_ref *write, struct ipa_ref *read) +{ + symtab_node *w_node = write->referring; + cgraph_node *w_cnode = dyn_cast (w_node); + symtab_node *r_node = read->referring; + cgraph_node *r_cnode = dyn_cast (r_node); + + if (w_cnode == r_cnode) + /* Within the same function. */ + return write_before_read_in_function (write, read); + + /* Not within the same function. */ + return write_before_read_across_function_borders (write, read); +} + +static bool +write_before_all_reads (struct ipa_ref *write, const ipa_ref_vec &reads) +{ + int i; + struct ipa_ref *read; + + FOR_EACH_VEC_ELT (reads, i, read) + { + load_function_body_of_ipa_ref (read); + if (!write_before_read (write, read)) + return false; + } + return true; +} + +static void +propagate_constant_to_read (tree write_val, struct ipa_ref *ref, + hash_set *func) +{ + gcc_assert (ref->use == IPA_REF_LOAD); + load_function_body_of_ipa_ref (ref); + + gimple *read_stmt = ref->stmt; + + gcc_assert (gimple_code (read_stmt) == GIMPLE_ASSIGN); + gcc_assert (gimple_num_ops (read_stmt) == 2); + + tree old_lhs = gimple_op (read_stmt, 0); + symtab_node *r_node = ref->referring; + cgraph_node *r_cnode = dyn_cast (r_node); + + // if (!func->contains (r_cnode)) func->add (r_cnode); + cgraph_node **possible_clone = func_to_clone->get (r_cnode); + if (!possible_clone) + { + const char *suffix = "test"; + // callers has to be vNULL, otherwise, we will be + // analyzing clones... + // and we don't want that... + // but that means that we will need to update the callers + // later... in update_callgraph + cgraph_node *clone + = r_cnode->create_version_clone_with_body (vNULL, NULL, NULL, NULL, + NULL, suffix, NULL); + clone->get_untransformed_body (); + func_to_clone->put (r_cnode, clone); + } + + possible_clone = func_to_clone->get (r_cnode); + cgraph_node *clone = *possible_clone; + + // Build new stmt and replace old. + gimple_stmt_iterator gsi; + basic_block bb; + // Let's create a clone with body here... + // The clone should not have callers as + // to not interfere with the current + // analysis. + // The callers will be set at the end... + + push_cfun (DECL_STRUCT_FUNCTION (clone->decl)); + function *clone_func = DECL_STRUCT_FUNCTION (clone->decl); + bool found = false; + FOR_EACH_BB_FN (bb, clone_func) + { + for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (gimple_code (stmt) != GIMPLE_ASSIGN) + continue; + tree old_val_clone = gimple_op (stmt, 1); + tree lhs = gimple_op (stmt, 0); + + // TODO: what happens when it is not a variable decl? + // This might actually be the source of imprecision + if (TREE_CODE (old_val_clone) != VAR_DECL) + continue; + + tree old_val = gimple_op (read_stmt, 1); + if (IDENTIFIER_POINTER (DECL_NAME (old_val_clone)) + != IDENTIFIER_POINTER (DECL_NAME (old_val))) + continue; + + found = true; + + // REPLACE!!! + gimple_stmt_iterator gsi2 = gsi_for_stmt (stmt); + tree new_lhs = make_ssa_name (TREE_TYPE (lhs)); + gimple *new_read_stmt = gimple_build_assign (new_lhs, write_val); + gsi_insert_before (&gsi2, new_read_stmt, GSI_SAME_STMT); + + gimple *use_stmt; + imm_use_iterator use_iter; + FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) + { + use_operand_p use_p; + FOR_EACH_IMM_USE_ON_STMT (use_p, use_iter) + SET_USE (use_p, new_lhs); + update_stmt (use_stmt); + } + } + + if (found) + break; + } + gcc_assert (found); + pop_cfun (); + } + + static bool + ipa_initcall_get_writes_and_reads (varpool_node *vnode, ipa_ref_vec *writes, + ipa_ref_vec *reads) + { + int i; + struct ipa_ref *ref; + + if (dump_file) + fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ()); + + /* Only IPA_REF_STORE and IPA_REF_LOAD left. */ + for (i = 0; vnode->iterate_referring (i, ref); i++) + { + symtab_node *f_node = ref->referring; + cgraph_node *f_cnode = dyn_cast (f_node); + + if (!f_cnode) + { + if (dump_file) + fprintf (dump_file, + "skipping variable %s due to static initialization\n", + vnode->name ()); + return false; + } + // it is possible that f_cnode is NULL if the dyn_cast fails. + // If the dyn_cast fails, this is an example of static initialization. + const char *identifier = IDENTIFIER_POINTER (DECL_NAME (f_cnode->decl)); + // This is the suffix we are adding to our clones. + // Therefore, if we find the suffix in the identifier, + // that means that we found a variable in a clone and + // we would like to skip it. + // TODO: make this a global static for easier changes... + const char *suffix = "test.0"; + if (strstr (identifier, suffix) != NULL) + continue; + + if (ref->use == IPA_REF_STORE) + writes->safe_push (ref); + + if (ref->use == IPA_REF_LOAD) + reads->safe_push (ref); + } + return true; + } + + static void + propagate_constant_to_reads (tree write_val, const ipa_ref_vec &reads_original, + hash_set *funcs) + { + /* Iterate over reads and replace them by constant. */ + struct ipa_ref *ref; + int i; + FOR_EACH_VEC_ELT (reads_original, i, ref) + { + propagate_constant_to_read (write_val, ref, funcs); + } + } + + /* + * Extracts the assigned constant, iff the given statement + * is a constant assignment. Returns NULL_TREE otherwise. + */ + static tree + extract_constant_from_initcall_write (struct ipa_ref *write) + { + symtab_node *f_node = write->referring; + cgraph_node *f_cnode = dyn_cast (f_node); + + tree decl = f_cnode->decl; + if (TREE_CODE (decl) != FUNCTION_DECL) + { + if (dump_file) + fprintf (dump_file, "Not function decl -> skipping.\n"); + return NULL_TREE; + } + + if (!f_cnode->has_gimple_body_p () && dump_file) + fprintf (dump_file, "Does NOT have gimple body!\n"); + if (f_cnode->inlined_to && dump_file) + fprintf (dump_file, "Inlined To\n"); + + if (dump_file) + { + fprintf (dump_file, "%s: for writer gimple:\n", __func__); + dump_vnode_ipa_ref (write); + } + + /* Get the write function's body. */ + load_function_body_of_ipa_ref (write); + + gimple *stmt = write->stmt; + + /* Verify that we have an assignment. */ + if (gimple_code (stmt) != GIMPLE_ASSIGN) + { + if (dump_file) + fprintf (dump_file, "Write stmt not assignment -> skipping.\n"); + return NULL_TREE; + } + + /* Check if write's LHS (vnode) is not volatile. */ + tree lhs = gimple_assign_lhs (stmt); + if (TREE_THIS_VOLATILE (TREE_TYPE (lhs))) + { + if (dump_file) + fprintf (dump_file, "Variable volatile -> skipping.\n"); + return NULL_TREE; + } + + /* Check if RHS of write stmt is constant. */ + if (!gimple_assign_is_single_const (stmt)) + { + if (dump_file) + { + fprintf (dump_file, "Found non-const operands.\n"); + } + return NULL_TREE; + } + + /* Extract the constant. */ + tree write_val = gimple_op (stmt, 1); + + if (dump_file) + { + fprintf (dump_file, "Const op:\n"); + dump_node (write_val, TDF_DETAILS, dump_file); + } + + return write_val; + } + + static void + ipa_initcall_cp_execute_for_var (varpool_node *vnode, + hash_set *update_functions) + { + ipa_ref_vec writes = vNULL; + ipa_ref_vec reads = vNULL; + struct ipa_ref *write; + tree write_val; + gimple_stmt_iterator gsi; + bool remove_permanently; + + if (dump_file) + fprintf (dump_file, "%s for variable '%s'.\n", __func__, vnode->name ()); + + if (dump_file) + { + dump_node (vnode->decl, TDF_DETAILS, dump_file); + } + + /* Variable must not be externally visible. */ + if (vnode->externally_visible_p ()) + { + if (dump_file) + fprintf (dump_file, "\texternally visible " + "-> skipping\n"); + return; + } + + /* All refs must be explicit. */ + if (!vnode->all_refs_explicit_p ()) + { + if (dump_file) + fprintf (dump_file, "Not explicit variable refs " + "-> skipping.\n"); + return; + } + + /* Check if any ref->use is IPA_REF_ALIAS. */ + if (vnode->has_aliases_p ()) + { + if (dump_file) + fprintf (dump_file, "Found IPA_REF_ALIAS -> skipping.\n"); + return; + } + + /* Check if any ref->use is IPA_REF_ADDR. */ + if (symtab_node_address_matters_p (vnode)) + { + if (dump_file) + fprintf (dump_file, "Found IPA_REF_ADDR -> skipping.\n"); + return; + } + + /* We don't touch arrays. */ + if (TREE_CODE (TREE_TYPE (vnode->decl)) == ARRAY_TYPE) + { + if (dump_file) + fprintf (dump_file, "Variable is array -> skipping.\n"); + return; + } + + /* We don't touch structs. */ + if (TREE_CODE (TREE_TYPE (vnode->decl)) == RECORD_TYPE) + { + if (dump_file) + fprintf (dump_file, "Variable is struct -> skipping.\n"); + return; + } + + /* We don't touch unions. */ + if (TREE_CODE (TREE_TYPE (vnode->decl)) == UNION_TYPE) + { + if (dump_file) + fprintf (dump_file, "Variable is union -> skipping.\n"); + return; + } + + /* Get all refs (must be REF_STORE or REF_LOAD). */ + bool continue_work + = ipa_initcall_get_writes_and_reads (vnode, &writes, &reads); + if (!continue_work) + { + if (dump_file) + fprintf (dump_file, "Static initialization -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + if (writes.length () > 1) + { + /* More than one writer. */ + if (dump_file) + fprintf (dump_file, "More than one writer -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + else if (writes.length () < 1) + { + /* No writer. */ + if (dump_file) + fprintf (dump_file, "No writer -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + /* + * Note: + * Limiting ourselfs to only one write is not necessary in general, + * but good enough to address the typical init () case. + * Big advantage is, that it makes the following code much easier. + * + * TODO: I believe that if we create clones, we might get two writes + * Investigate + */ + + /* Get (only) write ref. */ + write = writes.pop (); + + /* Check if write's RHS is a constant and get it. */ + write_val = extract_constant_from_initcall_write (write); + if (write_val == NULL_TREE) + { + if (dump_file) + fprintf (dump_file, "Write's RHS is not constant" + " -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + /* Assure all reads are after the write. */ + if (!write_before_all_reads (write, reads)) + { + if (dump_file) + fprintf (dump_file, "Write not guaranteed " + "to be before read -> skipping.\n"); + writes.release (); + reads.release (); + return; + } + + /* Propagate constant to reads. */ + if (!flag_initcall_cp_dryrun) + propagate_constant_to_reads (write_val, reads, update_functions); + + /* Finally remove the write. */ + gsi = gsi_for_stmt (write->stmt); + remove_permanently = false; // XXX: fails with true? + // gsi_remove (&gsi, remove_permanently); + + if (dump_file) + fprintf (dump_file, "Eliminated variable '%s'.\n\n", vnode->name ()); + + writes.release (); + reads.release (); + } + + bool + update_callgraph (cgraph_node *const &r_cnode, cgraph_node **clone_ptr, void *) + { + vec callers = r_cnode->collect_callers (); + cgraph_node *clone = *clone_ptr; + cgraph_edge *e; + int i; + profile_count count = r_cnode->count; + + FOR_EACH_VEC_ELT (callers, i, e) + e->redirect_callee (clone); + + /* + for (e = r_cnode->callees; e; e=e->next_callee) + e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true); + + for (e = r_cnode->indirect_calls; e; e=e->next_callee) + e->clone (clone, e->call_stmt, e->lto_stmt_uid, count, count, true); + */ + for (e = clone->callers; e; e = e->next_caller) + { + e->caller->get_untransformed_body (); + function *inner_function = DECL_STRUCT_FUNCTION (e->caller->decl); + gimple_call_set_fndecl (e->call_stmt, clone->decl); + maybe_clean_eh_stmt_fn (inner_function, e->call_stmt); + } + + r_cnode->skip_ipa_cp = true; + // r_cnode->remove (); + push_cfun (DECL_STRUCT_FUNCTION (r_cnode->decl)); + + if (dump_file) + fprintf (dump_file, "dumping function %s\n", r_cnode->dump_asm_name ()); + function *func = DECL_STRUCT_FUNCTION (r_cnode->decl); + basic_block bb; + FOR_EACH_BB_FN (bb, func) + { + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + gimple *stmt = gsi_stmt (gsi); + if (dump_file) + print_gimple_stmt (dump_file, stmt, 0, TDF_NONE); + } + } + if (dom_info_available_p (CDI_DOMINATORS)) + free_dominance_info (CDI_DOMINATORS); + pop_cfun (); + return true; + } + + static unsigned int + ipa_initcall_cp_execute (void) + { + varpool_node *vnode; + + clone_num_suffixes1 = new hash_map; + hash_set update_functions; + func_to_clone = new hash_map; + FOR_EACH_VARIABLE (vnode) + { + ipa_initcall_cp_execute_for_var (vnode, &update_functions); + } + + if (!flag_initcall_cp_dryrun) + func_to_clone->traverse (NULL); + + delete clone_num_suffixes1; + delete func_to_clone; + + return 0; + } + + namespace { + + const pass_data pass_data_ipa_initcall_cp = { + IPA_PASS, + "initcall_cp", + OPTGROUP_NONE, + TV_NONE, + (PROP_cfg | PROP_ssa), + 0, + 0, + 0, + (TODO_update_ssa | TODO_cleanup_cfg | TODO_dump_symtab + | TODO_rebuild_cgraph_edges | TODO_discard_function), + }; + + class pass_ipa_initcall_cp : public ipa_opt_pass_d + { + public: + pass_ipa_initcall_cp (gcc::context *ctxt) + : ipa_opt_pass_d (pass_data_ipa_initcall_cp, ctxt, NULL, NULL, NULL, NULL, + NULL, NULL, 0, NULL, NULL) + {} + + /* opt_pass methods: */ + virtual bool gate (function *) + { + return flag_initcall_cp || flag_initcall_cp_dryrun; + } + + virtual unsigned int execute (function *) + { + return ipa_initcall_cp_execute (); + } + + }; // class pass_ipa_initcall_cp + + } // namespace + + ipa_opt_pass_d * + make_pass_ipa_initcall_cp (gcc::context *ctxt) + { + return new pass_ipa_initcall_cp (ctxt); + } diff --git a/gcc/passes.def b/gcc/passes.def index eea4d780887..4694b3e05b5 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -151,6 +151,7 @@ along with GCC; see the file COPYING3. If not see NEXT_PASS (pass_ipa_profile); NEXT_PASS (pass_ipa_icf); NEXT_PASS (pass_ipa_devirt); + NEXT_PASS (pass_ipa_initcall_cp); NEXT_PASS (pass_ipa_cp); NEXT_PASS (pass_ipa_sra); NEXT_PASS (pass_ipa_cdtor_merge); diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 187f1a85cd3..97a9a2a126c 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -501,6 +501,8 @@ extern ipa_opt_pass_d *make_pass_ipa_fn_summary (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_fn_summary (gcc::context *ctxt); +extern ipa_opt_pass_d *make_pass_ipa_initcall_cp (gcc::context *ctxt); + extern ipa_opt_pass_d *make_pass_ipa_cp (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_sra (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_icf (gcc::context *ctxt); -- Gitee