From 65e65a9d81c0c89da3e32f1f3d28a378e798fb36 Mon Sep 17 00:00:00 2001 From: benniaobufeijiushiji Date: Thu, 27 Oct 2022 10:26:34 +0800 Subject: [PATCH 1/2] [Loop-distribution] Insert temp arrays built from isomorphic stmts Use option -ftree-slp-transpose-vectorize Build temp arrays for isomorphic stmt and regard them as new seed_stmts for loop distribution. --- gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c | 67 +++ gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c | 17 + gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c | 19 + gcc/tree-loop-distribution.c | 577 +++++++++++++++++++- 4 files changed, 663 insertions(+), 17 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c new file mode 100644 index 00000000000..6494636477c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-1.c @@ -0,0 +1,67 @@ +/* { dg-do compile { target { aarch64*-*-linux* } } } */ +/* { dg-do run { target { aarch64*-*-linux* } } } */ +/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details -save-temps" } */ + +#include +#include + +static unsigned inline abs2 (unsigned a) +{ + unsigned s = ((a>>15)&0x10001)*0xffff; + return (a+s)^s; +} + +int foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib) +{ + unsigned tmp[4][4]; + unsigned a0, a1, a2, a3; + int sum = 0; + for (int i = 0; i < 4; i++, oxa += ia, oxb += ib) + { + a0 = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16); + a1 = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16); + a2 = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16); + a3 = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16); + int t0 = a0 + a1; + int t1 = a0 - a1; + int t2 = a2 + a3; + int t3 = a2 - a3; + tmp[i][0] = t0 + t2; + tmp[i][2] = t0 - t2; + tmp[i][1] = t1 + t3; + tmp[i][3] = t1 - t3; + } + for (int i = 0; i < 4; i++) + { + int t0 = tmp[0][i] + tmp[1][i]; + int t1 = tmp[0][i] - tmp[1][i]; + int t2 = tmp[2][i] + tmp[3][i]; + int t3 = tmp[2][i] - tmp[3][i]; + a0 = t0 + t2; + a2 = t0 - t2; + a1 = t1 + t3; + a3 = t1 - t3; + sum += abs2 (a0) + abs2 (a1) + abs2 (a2) + abs2 (a3); + } + return (((unsigned short) sum) + ((unsigned) sum >>16)) >> 1; +} + +int main () +{ + unsigned char oxa[128] = {0}; + unsigned char oxb[128] = {0}; + for (int i = 0; i < 128; i++) + { + oxa[i] += i * 3; + oxb[i] = i * 2; + } + int sum = foo (oxa, 16, oxb, 32); + if (sum != 736) + { + abort (); + } + return 0; +} + +/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "distributed: split to 2 loops" 1 "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c new file mode 100644 index 00000000000..1b50fd27d6a --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-2.c @@ -0,0 +1,17 @@ +/* { dg-do compile { target { aarch64*-*-linux* } } } */ +/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */ + +unsigned a0[4], a1[4], a2[4], a3[4]; + +void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib) +{ + for (int i = 0; i < 4; i++, oxa += ia, oxb += ib) + { + a0[i] = (oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16); + a1[i] = (oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16); + a2[i] = (oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16); + a3[i] = (oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16); + } +} + +/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c new file mode 100644 index 00000000000..94b992b050d --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/ins-ldist-3.c @@ -0,0 +1,19 @@ +/* { dg-do compile { target { aarch64*-*-linux* } } } */ +/* { dg-options "-O3 -ftree-slp-transpose-vectorize -fdump-tree-ldist-all-details" } */ + +unsigned a0[4], a1[4], a2[4], a3[4]; + +void foo (unsigned char *oxa, int ia, unsigned char *oxb, int ib) +{ + for (int i = 0; i < 4; i++, oxa += ia, oxb += ib) + { + a0[i] = ((oxa[0] - oxb[0]) + ((oxa[4] - oxb[4]) << 16)) + 1; + a1[i] = ((oxa[1] - oxb[1]) + ((oxa[5] - oxb[5]) << 16)) - 2; + a2[i] = ((oxa[2] - oxb[2]) + ((oxa[6] - oxb[6]) << 16)) * 3; + a3[i] = ((oxa[3] - oxb[3]) + ((oxa[7] - oxb[7]) << 16)) / 4; + } +} + +/* { dg-final { scan-tree-dump-times "Insertion done: 4 temp arrays inserted" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "Insertion removed" 1 "ldist" } } */ +/* { dg-final { scan-tree-dump-times "Loop 1 not distributed." 1 "ldist" } } */ \ No newline at end of file diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c index c08af656244..88b56379c84 100644 --- a/gcc/tree-loop-distribution.c +++ b/gcc/tree-loop-distribution.c @@ -36,6 +36,47 @@ along with GCC; see the file COPYING3. If not see | D(I) = A(I-1)*E |ENDDO + If an unvectorizable loop has grouped loads, and calculations from grouped + loads are isomorphic, build temp arrays using stmts where isomorphic + calculations end. Afer distribution, the partition built from temp + arrays can be vectorized in pass SLP after loop unrolling. For example, + + |DO I = 1, N + | A = FOO (ARG_1); + | B = FOO (ARG_2); + | C = BAR_0 (A); + | D = BAR_1 (B); + |ENDDO + + is transformed to + + |DO I = 1, N + | J = FOO (ARG_1); + | K = FOO (ARG_2); + | X[I] = J; + | Y[I] = K; + | A = X[I]; + | B = Y[I]; + | C = BAR_0 (A); + | D = BAR_1 (B); + |ENDDO + + and is then distributed to + + |DO I = 1, N + | J = FOO (ARG_1); + | K = FOO (ARG_2); + | X[I] = J; + | Y[I] = K; + |ENDDO + + |DO I = 1, N + | A = X[I]; + | B = Y[I]; + | C = BAR_0 (A); + | D = BAR_1 (B); + |ENDDO + Loop distribution is the dual of loop fusion. It separates statements of a loop (or loop nest) into multiple loops (or loop nests) with the same loop header. The major goal is to separate statements which may @@ -44,7 +85,9 @@ along with GCC; see the file COPYING3. If not see 1) Seed partitions with specific type statements. For now we support two types seed statements: statement defining variable used outside - of loop; statement storing to memory. + of loop; statement storing to memory. Moreover, for unvectorizable + loops, we try to find isomorphic stmts from grouped load and build + temp arrays as new seed statements. 2) Build reduced dependence graph (RDG) for loop to be distributed. The vertices (RDG:V) model all statements in the loop and the edges (RDG:E) model flow and control dependencies between statements. @@ -643,7 +686,8 @@ class loop_distribution /* Returns true when PARTITION1 and PARTITION2 access the same memory object in RDG. */ bool share_memory_accesses (struct graph *rdg, - partition *partition1, partition *partition2); + partition *partition1, partition *partition2, + hash_set *excluded_arrays); /* For each seed statement in STARTING_STMTS, this function builds partition for it by adding depended statements according to RDG. @@ -686,8 +730,9 @@ class loop_distribution /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. ALIAS_DDRS contains ddrs which need runtime alias check. */ - void finalize_partitions (class loop *loop, vec - *partitions, vec *alias_ddrs); + void finalize_partitions (class loop *loop, + vec *partitions, + vec *alias_ddrs, bitmap producers); /* Analyze loop form and if it's vectorizable to decide if we need to insert temp arrays to distribute it. */ @@ -701,6 +746,28 @@ class loop_distribution inline void rebuild_rdg (loop_p loop, struct graph *&rdg, control_dependences *cd); + + /* If loop is not distributed, remove inserted temp arrays. */ + void remove_insertion (loop_p loop, struct graph *flow_only_rdg, + bitmap producers, struct partition *partition); + + /* Insert temp arrays if isomorphic computation exists. Temp arrays will be + regarded as SEED_STMTS for building partitions in succeeding processes. */ + bool insert_temp_arrays (loop_p loop, vec seed_stmts, + hash_set *tmp_array_vars, bitmap producers); + + void build_producers (loop_p loop, bitmap producers, + vec &transformed); + + void do_insertion (loop_p loop, struct graph *flow_only_rdg, tree iv, + bitmap cut_points, hash_set *tmp_array_vars, + bitmap producers); + + /* Fuse PARTITIONS built from inserted temp arrays into one partition, + fuse the rest into another. */ + void merge_remaining_partitions (vec *partitions, + bitmap producers); + /* Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate only the statements from STMTS into separate loops. Returns the number of @@ -1913,7 +1980,8 @@ loop_distribution::classify_partition (loop_p loop, bool loop_distribution::share_memory_accesses (struct graph *rdg, - partition *partition1, partition *partition2) + partition *partition1, partition *partition2, + hash_set *excluded_arrays) { unsigned i, j; bitmap_iterator bi, bj; @@ -1947,7 +2015,10 @@ loop_distribution::share_memory_accesses (struct graph *rdg, if (operand_equal_p (DR_BASE_ADDRESS (dr1), DR_BASE_ADDRESS (dr2), 0) && operand_equal_p (DR_OFFSET (dr1), DR_OFFSET (dr2), 0) && operand_equal_p (DR_INIT (dr1), DR_INIT (dr2), 0) - && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0)) + && operand_equal_p (DR_STEP (dr1), DR_STEP (dr2), 0) + /* An exception, if PARTITION1 and PARTITION2 contain the + temp array we inserted, do not merge them. */ + && !excluded_arrays->contains (DR_REF (dr1))) return true; } } @@ -2909,13 +2980,47 @@ fuse_memset_builtins (vec *partitions) } } +void +loop_distribution::merge_remaining_partitions + (vec *partitions, + bitmap producers) +{ + struct partition *partition = NULL; + struct partition *p1 = NULL, *p2 = NULL; + for (unsigned i = 0; partitions->iterate (i, &partition); i++) + { + if (bitmap_intersect_p (producers, partition->stmts)) + { + if (p1 == NULL) + { + p1 = partition; + continue; + } + partition_merge_into (NULL, p1, partition, FUSE_FINALIZE); + } + else + { + if (p2 == NULL) + { + p2 = partition; + continue; + } + partition_merge_into (NULL, p2, partition, FUSE_FINALIZE); + } + partitions->unordered_remove (i); + partition_free (partition); + i--; + } +} + void loop_distribution::finalize_partitions (class loop *loop, vec *partitions, - vec *alias_ddrs) + vec *alias_ddrs, + bitmap producers) { unsigned i; - struct partition *partition, *a; + struct partition *partition; if (partitions->length () == 1 || alias_ddrs->length () > 0) @@ -2947,13 +3052,7 @@ loop_distribution::finalize_partitions (class loop *loop, || (loop->inner == NULL && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin)) { - a = (*partitions)[0]; - for (i = 1; partitions->iterate (i, &partition); ++i) - { - partition_merge_into (NULL, a, partition, FUSE_FINALIZE); - partition_free (partition); - } - partitions->truncate (1); + merge_remaining_partitions (partitions, producers); } /* Fuse memset builtins if possible. */ @@ -3758,6 +3857,404 @@ find_isomorphic_stmts (loop_vec_info vinfo, vec &stmts) return decide_stmts_by_profit (candi_stmts, stmts); } +/* Get iv from SEED_STMTS and make sure each seed_stmt has only one iv as index + and all indices are the same. */ + +static tree +find_index (vec seed_stmts) +{ + if (seed_stmts.length () == 0) + return NULL; + bool found_index = false; + tree index = NULL; + unsigned ui = 0; + for (ui = 0; ui < seed_stmts.length (); ui++) + { + if (!gimple_vdef (seed_stmts[ui])) + return NULL; + tree lhs = gimple_assign_lhs (seed_stmts[ui]); + unsigned num_index = 0; + while (TREE_CODE (lhs) == ARRAY_REF) + { + if (TREE_CODE (TREE_OPERAND (lhs, 1)) == SSA_NAME) + { + num_index++; + if (num_index > 1) + return NULL; + if (index == NULL) + { + index = TREE_OPERAND (lhs, 1); + found_index = true; + } + else if (index != TREE_OPERAND (lhs, 1)) + return NULL; + } + lhs = TREE_OPERAND (lhs, 0); + } + if (!found_index) + return NULL; + } + return index; +} + +/* Check if expression of phi is an increament of a const. */ + +static void +check_phi_inc (struct vertex *v_phi, struct graph *rdg, bool &found_inc) +{ + struct graph_edge *e_phi; + for (e_phi = v_phi->succ; e_phi; e_phi = e_phi->succ_next) + { + struct vertex *v_inc = &(rdg->vertices[e_phi->dest]); + if (!is_gimple_assign (RDGV_STMT (v_inc)) + || gimple_expr_code (RDGV_STMT (v_inc)) != PLUS_EXPR) + continue; + tree rhs1 = gimple_assign_rhs1 (RDGV_STMT (v_inc)); + tree rhs2 = gimple_assign_rhs2 (RDGV_STMT (v_inc)); + if (!(integer_onep (rhs1) || integer_onep (rhs2))) + continue; + struct graph_edge *e_inc; + /* find cycle with only two vertices inc and phi: inc <--> phi. */ + bool found_cycle = false; + for (e_inc = v_inc->succ; e_inc; e_inc = e_inc->succ_next) + { + if (e_inc->dest == e_phi->src) + { + found_cycle = true; + break; + } + } + if (!found_cycle) + continue; + found_inc = true; + } +} + +/* Check if phi satisfies form like PHI <0, i>. */ + +static inline bool +iv_check_phi_stmt (gimple *phi_stmt) +{ + return gimple_phi_num_args (phi_stmt) == 2 + && (integer_zerop (gimple_phi_arg_def (phi_stmt, 0)) + || integer_zerop (gimple_phi_arg_def (phi_stmt, 1))); +} + +/* Make sure the iteration varible is a phi. */ + +static tree +get_iv_from_seed (struct graph *flow_only_rdg, vec seed_stmts) +{ + tree index = find_index (seed_stmts); + if (index == NULL) + return NULL; + for (int i = 0; i < flow_only_rdg->n_vertices; i++) + { + struct vertex *v = &(flow_only_rdg->vertices[i]); + if (RDGV_STMT (v) != seed_stmts[0]) + continue; + struct graph_edge *e; + bool found_phi = false; + for (e = v->pred; e; e = e->pred_next) + { + struct vertex *v_phi = &(flow_only_rdg->vertices[e->src]); + gimple *phi_stmt = RDGV_STMT (v_phi); + if (gimple_code (phi_stmt) != GIMPLE_PHI + || gimple_phi_result (phi_stmt) != index) + continue; + if (!iv_check_phi_stmt (phi_stmt)) + return NULL; + /* find inc expr in succ of phi. */ + bool found_inc = false; + check_phi_inc (v_phi, flow_only_rdg, found_inc); + if (!found_inc) + return NULL; + found_phi = true; + break; + } + if (!found_phi) + return NULL; + break; + } + return index; +} + +/* Do not distribute loop if vertexes in ROOT_MAP have antidependence with in + FLOW_ONLY_RDG. */ + +static bool +check_no_dependency (struct graph *flow_only_rdg, bitmap root_map) +{ + bitmap_iterator bi; + unsigned ui; + auto_vec visited_nodes; + auto_bitmap visited_map; + EXECUTE_IF_SET_IN_BITMAP (root_map, 0, ui, bi) + visited_nodes.safe_push (ui); + for (ui = 0; ui < visited_nodes.length (); ui++) + { + struct vertex *v = &(flow_only_rdg->vertices[visited_nodes[ui]]); + struct graph_edge *e; + for (e = v->succ; e; e = e->succ_next) + { + if (bitmap_bit_p (root_map, e->dest)) + return false; + if (bitmap_bit_p (visited_map, e->dest)) + continue; + visited_nodes.safe_push (e->dest); + bitmap_set_bit (visited_map, e->dest); + } + } + return true; +} + +/* Find isomorphic stmts from GROUPED_LOADS in VINFO and make sure + there is no dependency among those STMT we found. */ + +static unsigned +get_cut_points (struct graph *flow_only_rdg, bitmap cut_points, + loop_vec_info vinfo) +{ + unsigned n_stmts = 0; + + /* STMTS that may be CUT_POINTS. */ + auto_vec stmts; + if (!find_isomorphic_stmts (vinfo, stmts)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "No temp array insertion: no isomorphic stmts" + " were found.\n"); + return 0; + } + + for (int i = 0; i < flow_only_rdg->n_vertices; i++) + { + if (stmts.contains (RDG_STMT (flow_only_rdg, i))) + bitmap_set_bit (cut_points, i); + } + n_stmts = bitmap_count_bits (cut_points); + + bool succ = check_no_dependency (flow_only_rdg, cut_points); + if (!succ) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "No temp array inserted: data dependency" + " among isomorphic stmts.\n"); + return 0; + } + return n_stmts; +} + +static void +build_temp_array (struct vertex *v, gimple_stmt_iterator &gsi, + poly_uint64 array_extent, tree iv, + hash_set *tmp_array_vars, vec *transformed) +{ + gimple *stmt = RDGV_STMT (v); + tree lhs = gimple_assign_lhs (stmt); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "original stmt:\t"); + print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS|TDF_MEMSYMS); + } + tree var_ssa = duplicate_ssa_name (lhs, stmt); + gimple_assign_set_lhs (stmt, var_ssa); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "changed to:\t"); + print_gimple_stmt (dump_file, stmt, 0, TDF_VOPS | TDF_MEMSYMS); + } + gimple_set_uid (gsi_stmt (gsi), -1); + tree vect_elt_type = TREE_TYPE (lhs); + tree array_type = build_array_type_nelts (vect_elt_type, array_extent); + tree array = create_tmp_var (array_type); + tree array_ssa = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL); + tmp_array_vars->add (array_ssa); + gimple *store = gimple_build_assign (array_ssa, var_ssa); + tree new_vdef = make_ssa_name (gimple_vop (cfun), store); + gsi_insert_after (&gsi, store, GSI_NEW_STMT); + gimple_set_vdef (store, new_vdef); + transformed->safe_push (store); + gimple_set_uid (gsi_stmt (gsi), -1); + tree array_ssa2 = build4 (ARRAY_REF, vect_elt_type, array, iv, NULL, NULL); + tmp_array_vars->add (array_ssa2); + gimple *load = gimple_build_assign (lhs, array_ssa2); + if (dump_file && (dump_flags & TDF_DETAILS)) + { + fprintf (dump_file, "insert stmt:\t"); + print_gimple_stmt (dump_file, store, 0, TDF_VOPS|TDF_MEMSYMS); + fprintf (dump_file, " and stmt:\t"); + print_gimple_stmt (dump_file, load, 0, TDF_VOPS|TDF_MEMSYMS); + } + gimple_set_vuse (load, new_vdef); + gsi_insert_after (&gsi, load, GSI_NEW_STMT); + gimple_set_uid (gsi_stmt (gsi), -1); +} + +/* Set bitmap PRODUCERS based on vec TRANSFORMED. */ + +void +loop_distribution::build_producers (loop_p loop, bitmap producers, + vec &transformed) +{ + auto_vec stmts; + stmts_from_loop (loop, &stmts); + int i = 0; + gimple *stmt = NULL; + + FOR_EACH_VEC_ELT (stmts, i, stmt) + gimple_set_uid (stmt, i); + i = 0; + FOR_EACH_VEC_ELT (transformed, i, stmt) + bitmap_set_bit (producers, stmt->uid); +} + +/* Transform stmt + + A = FOO (ARG_1); + + to + + STMT_1: A1 = FOO (ARG_1); + STMT_2: X[I] = A1; + STMT_3: A = X[I]; + + Producer is STMT_2 who defines the temp array and consumer is + STMT_3 who uses the temp array. */ + +void +loop_distribution::do_insertion (loop_p loop, struct graph *flow_only_rdg, + tree iv, bitmap cut_points, + hash_set *tmp_array_vars, + bitmap producers) +{ + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "=== do insertion ===\n"); + + auto_vec transformed; + + /* Execution times of loop. */ + poly_uint64 array_extent + = tree_to_poly_uint64 (number_of_latch_executions (loop)) + 1; + + basic_block *bbs = get_loop_body_in_custom_order (loop, this, + bb_top_order_cmp_r); + + for (int i = 0; i < int (loop->num_nodes); i++) + { + basic_block bb = bbs[i]; + + /* Find all cut points in bb and transform them. */ + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + unsigned j = gimple_uid (gsi_stmt (gsi)); + if (bitmap_bit_p (cut_points, j)) + { + struct vertex *v = &(flow_only_rdg->vertices[j]); + build_temp_array (v, gsi, array_extent, iv, tmp_array_vars, + &transformed); + } + } + } + build_producers (loop, producers, transformed); + update_ssa (TODO_update_ssa); + free (bbs); +} + +/* After temp array insertion, given stmts + STMT_1: M = FOO (ARG_1); + STMT_2: X[I] = M; + STMT_3: A = X[I]; + STMT_2 is the producer, STMT_1 is its prev and STMT_3 is its next. + Replace M with A, and remove STMT_2 and STMT_3. */ + +static void +reset_gimple_assign (struct graph *flow_only_rdg, struct partition *partition, + gimple_stmt_iterator &gsi, int j) +{ + struct vertex *v = &(flow_only_rdg->vertices[j]); + gimple *stmt = RDGV_STMT (v); + gimple *prev = stmt->prev; + gimple *next = stmt->next; + tree n_lhs = gimple_assign_lhs (next); + gimple_assign_set_lhs (prev, n_lhs); + unlink_stmt_vdef (stmt); + if (partition) + bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi))); + gsi_remove (&gsi, true); + release_defs (stmt); + if (partition) + bitmap_clear_bit (partition->stmts, gimple_uid (gsi_stmt (gsi))); + gsi_remove (&gsi, true); +} + +void +loop_distribution::remove_insertion (loop_p loop, struct graph *flow_only_rdg, + bitmap producers, struct partition *partition) +{ + basic_block *bbs = get_loop_body_in_custom_order (loop, this, + bb_top_order_cmp_r); + for (int i = 0; i < int (loop->num_nodes); i++) + { + basic_block bb = bbs[i]; + for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); + gsi_next (&gsi)) + { + unsigned j = gimple_uid (gsi_stmt (gsi)); + if (bitmap_bit_p (producers, j)) + reset_gimple_assign (flow_only_rdg, partition, gsi, j); + } + } + update_ssa (TODO_update_ssa); + free (bbs); +} + +/* Insert temp arrays if isomorphic computation exists. Temp arrays will be + regarded as SEED_STMTS for building partitions in succeeding processes. */ + +bool +loop_distribution::insert_temp_arrays (loop_p loop, vec seed_stmts, + hash_set *tmp_array_vars, bitmap producers) +{ + struct graph *flow_only_rdg = build_rdg (loop, NULL); + gcc_checking_assert (flow_only_rdg != NULL); + tree iv = get_iv_from_seed (flow_only_rdg, seed_stmts); + if (iv == NULL) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d no temp array insertion: failed to get" + " iteration variable.\n", loop->num); + free_rdg (flow_only_rdg); + return false; + } + auto_bitmap cut_points; + loop_vec_info vinfo = loop_vec_info_for_loop (loop); + unsigned n_cut_points = get_cut_points (flow_only_rdg, cut_points, vinfo); + delete vinfo; + loop->aux = NULL; + if (n_cut_points == 0) + { + if (dump_file && (dump_flags & TDF_DETAILS)) + fprintf (dump_file, "Loop %d no temp array insertion: no cut points" + " found.\n", loop->num); + free_rdg (flow_only_rdg); + return false; + } + do_insertion (loop, flow_only_rdg, iv, cut_points, tmp_array_vars, producers); + if (dump_enabled_p ()) + { + dump_user_location_t loc = find_loop_location (loop); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion done:" + " %d temp arrays inserted in Loop %d.\n", + n_cut_points, loop->num); + } + free_rdg (flow_only_rdg); + return true; +} + +static bool find_seed_stmts_for_distribution (class loop *, vec *); + /* Distributes the code from LOOP in such a way that producer statements are placed before consumer statements. Tries to separate only the statements from STMTS into separate loops. Returns the number of @@ -3814,6 +4311,34 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, return 0; } + /* Try to distribute LOOP if LOOP is simple enough and unable to vectorize. + If LOOP has grouped loads, recursively find isomorphic stmts and insert + temp arrays, rebuild RDG and call find_seed_stmts_for_distribution + to replace STMTS. */ + + hash_set tmp_array_vars; + + /* STMTs that define those inserted TMP_ARRAYs. */ + auto_bitmap producers; + + /* New SEED_STMTS after insertion. */ + auto_vec work_list; + bool insert_success = false; + if (may_insert_temp_arrays (loop, rdg, cd)) + { + if (insert_temp_arrays (loop, stmts, &tmp_array_vars, producers)) + { + if (find_seed_stmts_for_distribution (loop, &work_list)) + { + insert_success = true; + stmts = work_list; + } + else + remove_insertion (loop, rdg, producers, NULL); + rebuild_rdg (loop, rdg, cd); + } + } + data_reference_p dref; for (i = 0; datarefs_vec.iterate (i, &dref); ++i) dref->aux = (void *) (uintptr_t) i; @@ -3894,7 +4419,7 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, for (int j = i + 1; partitions.iterate (j, &partition); ++j) { - if (share_memory_accesses (rdg, into, partition)) + if (share_memory_accesses (rdg, into, partition, &tmp_array_vars)) { partition_merge_into (rdg, into, partition, FUSE_SHARE_REF); partitions.unordered_remove (j); @@ -3944,7 +4469,7 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, } } - finalize_partitions (loop, &partitions, &alias_ddrs); + finalize_partitions (loop, &partitions, &alias_ddrs, producers); /* If there is a reduction in all partitions make sure the last one is not classified for builtin code generation. */ @@ -3962,6 +4487,24 @@ loop_distribution::distribute_loop (class loop *loop, vec stmts, } nbp = partitions.length (); + + /* If we have inserted TMP_ARRAYs but there is only one partition left in + the succeeding processes, remove those inserted TMP_ARRAYs back to the + original version. */ + + if (nbp == 1 && insert_success) + { + struct partition *partition = NULL; + partitions.iterate (0, &partition); + remove_insertion (loop, rdg, producers, partition); + if (dump_enabled_p ()) + { + dump_user_location_t loc = find_loop_location (loop); + dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc, "Insertion removed:" + " unable to distribute loop %d.\n", loop->num); + } + } + if (nbp == 0 || (nbp == 1 && !partition_builtin_p (partitions[0])) || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) -- Gitee From 133e53c976ae4afb0444cf5d86455367e3c4f360 Mon Sep 17 00:00:00 2001 From: benniaobufeijiushiji Date: Wed, 30 Nov 2022 22:42:35 +0800 Subject: [PATCH 2/2] [Struct reorg] Add struct-semi-relayout optimize Add support for structs with multi-allocation which is escaped in complete-relayout. Add flag -fipa-struct-reorg=6 and parameter semi-relayout-level. --- gcc/common.opt | 7 +- gcc/ipa-struct-reorg/ipa-struct-reorg.c | 916 +++++++++++++++++- gcc/ipa-struct-reorg/ipa-struct-reorg.h | 8 + gcc/params.opt | 4 + .../gcc.dg/struct/semi_relayout_rewrite.c | 86 ++ gcc/testsuite/gcc.dg/struct/struct-reorg.exp | 4 + 6 files changed, 992 insertions(+), 33 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c diff --git a/gcc/common.opt b/gcc/common.opt index 384595f1635..588e194009b 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1889,9 +1889,10 @@ Common Report Var(flag_ipa_struct_reorg) Init(0) Optimization Perform structure layout optimizations. fipa-struct-reorg= -Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 5) --fipa-struct-reorg=[0,1,2,3,4,5] adding none, struct-reorg, reorder-fields, -dfe, safe-pointer-compression, unsafe-pointer-compression optimizations. +Common RejectNegative Joined UInteger Var(struct_layout_optimize_level) Init(0) IntegerRange(0, 6) +-fipa-struct-reorg=[0,1,2,3,4,5,6] adding none, struct-reorg, reorder-fields, +dfe, safe-pointer-compression, unsafe-pointer-compression, semi-relayout +optimizations. fipa-extend-auto-profile Common Report Var(flag_ipa_extend_auto_profile) diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c index ee4893dfba1..4751711fed6 100644 --- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c +++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c @@ -265,7 +265,8 @@ enum struct_layout_opt_level STRUCT_REORDER_FIELDS = 1 << 2, DEAD_FIELD_ELIMINATION = 1 << 3, POINTER_COMPRESSION_SAFE = 1 << 4, - POINTER_COMPRESSION_UNSAFE = 1 << 5 + POINTER_COMPRESSION_UNSAFE = 1 << 5, + SEMI_RELAYOUT = 1 << 6 }; /* Defines the target pointer size of compressed pointer, which should be 8, @@ -280,6 +281,7 @@ void get_base (tree &base, tree expr); static unsigned int current_layout_opt_level; hash_map replace_type_map; +hash_map semi_relayout_map; /* Return true if one of these types is created by struct-reorg. */ @@ -398,7 +400,9 @@ srtype::srtype (tree type) visited (false), pc_candidate (false), has_legal_alloc_num (false), - has_alloc_array (0) + has_alloc_array (0), + semi_relayout (false), + bucket_parts (0) { for (int i = 0; i < max_split; i++) newtype[i] = NULL_TREE; @@ -883,6 +887,66 @@ srfield::create_new_optimized_fields (tree newtype[max_split], newfield[0] = field; } +/* Given a struct s whose fields has already reordered by size, we try to + combine fields less than 8 bytes together to 8 bytes. Example: + struct s { + uint64_t a, + uint32_t b, + uint32_t c, + uint32_t d, + uint16_t e, + uint8_t f + } + + We allocate memory for arrays of struct S, before semi-relayout, their + layout in memory is shown as below: + [a,b,c,d,e,f,padding;a,b,c,d,e,f,padding;...] + + During semi-relayout, we put a number of structs into a same region called + bucket. The number is determined by param realyout-bucket-capacity-level. + Using 1024 here as example. After semi-relayout, the layout in a bucket is + shown as below: + part1 [a;a;a...] + part2 [b,c;b,c;b,c;...] + part3 [d,e,f,pad;d,e,f,pad;d,e,f,pad;...] + + In the last bucket, if the amount of rest structs is less than the capacity + of a bucket, the rest of allcated memory will be wasted as padding. */ + +unsigned +srtype::calculate_bucket_size () +{ + unsigned parts = 0; + unsigned bit_sum = 0; + unsigned relayout_offset = 0; + /* Currently, limit each 8 bytes with less than 2 fields. */ + unsigned curr_part_num = 0; + unsigned field_num = 0; + for (tree f = TYPE_FIELDS (newtype[0]); f; f = DECL_CHAIN (f)) + { + unsigned size = TYPE_PRECISION (TREE_TYPE (f)); + bit_sum += size; + field_num++; + if (++curr_part_num > 2 || bit_sum > 64) + { + bit_sum = size; + parts++; + relayout_offset = relayout_part_size * parts; + curr_part_num = 1; + } + else + { + relayout_offset = relayout_part_size * parts + (bit_sum - size) / 8; + } + new_field_offsets.put (f, relayout_offset); + } + /* Donnot relayout a struct with only one field after DFE. */ + if (field_num == 1) + return 0; + bucket_parts = ++parts; + return parts * relayout_part_size; +} + /* Create the new TYPE corresponding to THIS type. */ bool @@ -994,6 +1058,15 @@ srtype::create_new_type (void) if (pc_candidate && pc_gptr == NULL_TREE) create_global_ptr_for_pc (); + if (semi_relayout) + { + bucket_size = calculate_bucket_size (); + if (bucket_size == 0) + return false; + if (semi_relayout_map.get (this->newtype[0]) == NULL) + semi_relayout_map.put (this->newtype[0], this->type); + } + if (dump_file && (dump_flags & TDF_DETAILS)) { fprintf (dump_file, "Created %d types:\n", maxclusters); @@ -1393,7 +1466,7 @@ public: bool should_create = false, bool can_escape = false); bool wholeaccess (tree expr, tree base, tree accesstype, srtype *t); - void check_alloc_num (gimple *stmt, srtype *type); + void check_alloc_num (gimple *stmt, srtype *type, bool ptrptr); void check_definition_assign (srdecl *decl, vec &worklist); void check_definition_call (srdecl *decl, vec &worklist); void check_definition (srdecl *decl, vec&); @@ -1440,6 +1513,33 @@ public: tree &); basic_block create_bb_for_compress_nullptr (basic_block, tree &); basic_block create_bb_for_decompress_nullptr (basic_block, tree, tree &); + + // Semi-relayout methods: + bool is_semi_relayout_candidate (tree); + srtype *get_semi_relayout_candidate_type (tree); + void check_and_prune_struct_for_semi_relayout (void); + tree rewrite_pointer_diff (gimple_stmt_iterator *, tree, tree, srtype *); + tree rewrite_pointer_plus_integer (gimple *, gimple_stmt_iterator *, tree, + tree, srtype *); + tree build_div_expr (gimple_stmt_iterator *, tree, tree); + tree get_true_pointer_base (gimple_stmt_iterator *, tree, srtype *); + tree get_real_allocated_ptr (tree, gimple_stmt_iterator *); + tree set_ptr_for_use (tree, gimple_stmt_iterator *); + void record_allocated_size (tree, gimple_stmt_iterator *, tree); + tree read_allocated_size (tree, gimple_stmt_iterator *); + gimple *create_aligned_alloc (gimple_stmt_iterator *, srtype *, tree, + tree &); + void create_memset_zero (tree, gimple_stmt_iterator *, tree); + void create_memcpy (tree, tree, tree, gimple_stmt_iterator *); + void create_free (tree, gimple_stmt_iterator *); + void copy_to_lhs (tree, tree, gimple_stmt_iterator *); + srtype *get_relayout_candidate_type (tree); + long unsigned int get_true_field_offset (srfield *, srtype *); + tree rewrite_address (tree, srfield *, srtype *, gimple_stmt_iterator *); + bool check_sr_copy (gimple *); + void relayout_field_copy (gimple_stmt_iterator *, gimple *, tree, tree, + tree&, tree &); + void do_semi_relayout (gimple_stmt_iterator *, gimple *, tree &, tree &); }; struct ipa_struct_relayout @@ -4528,7 +4628,7 @@ ipa_struct_reorg::check_type_and_push (tree newdecl, srdecl *decl, } void -ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) +ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type, bool ptrptr) { if (current_layout_opt_level >= COMPLETE_STRUCT_RELAYOUT && handled_allocation_stmt (stmt)) @@ -4536,6 +4636,14 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) tree arg0 = gimple_call_arg (stmt, 0); basic_block bb = gimple_bb (stmt); cgraph_node *node = current_function->node; + if (!ptrptr && current_layout_opt_level >= SEMI_RELAYOUT + && gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)) + { + /* Malloc is commonly used for allocations of a single struct + and semi-relayout will waste a mess of memory, so we skip it. */ + type->has_alloc_array = -4; + return; + } if (integer_onep (arg0)) { /* Actually NOT an array, but may ruin other array. */ @@ -4544,6 +4652,10 @@ ipa_struct_reorg::check_alloc_num (gimple *stmt, srtype *type) else if (bb->loop_father != NULL && loop_outer (bb->loop_father) != NULL) { + /* For semi-relayout, do not escape realloc. */ + if (current_layout_opt_level & SEMI_RELAYOUT + && gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) + return; /* The allocation is in a loop. */ type->has_alloc_array = -2; } @@ -4635,6 +4747,13 @@ ipa_struct_reorg::check_definition_assign (srdecl *decl, vec &worklist) return; } + if (semi_relayout_map.get (type->type) != NULL) + { + if (current_layout_opt_level != COMPLETE_STRUCT_RELAYOUT) + type->mark_escape (escape_unhandled_rewrite, stmt); + return; + } + /* d) if the name is from a cast/assignment, make sure it is used as that type or void* i) If void* then push the ssa_name into worklist. */ @@ -4679,7 +4798,8 @@ ipa_struct_reorg::check_definition_call (srdecl *decl, vec &worklist) } } - check_alloc_num (stmt, type); + bool ptrptr = isptrptr (decl->orig_type); + check_alloc_num (stmt, type, ptrptr); return; } @@ -6249,6 +6369,53 @@ ipa_struct_reorg::pc_candidate_tree_p (tree xhs) return false; } +srtype * +ipa_struct_reorg::get_semi_relayout_candidate_type (tree xhs) +{ + if (xhs == NULL) + return NULL; + if (TREE_CODE (xhs) == SSA_NAME || TREE_CODE (xhs) == COMPONENT_REF) + { + srtype *access_type = find_type (inner_type (TREE_TYPE (xhs))); + if (access_type != NULL && access_type->semi_relayout) + return access_type; + } + return NULL; +} + +bool +ipa_struct_reorg::is_semi_relayout_candidate (tree xhs) +{ + if (xhs == NULL) + return false; + + if (TREE_CODE (xhs) == SSA_NAME) + xhs = TREE_TYPE (xhs); + + if (TREE_CODE (xhs) == POINTER_TYPE) + { + srtype *var_type = find_type (TREE_TYPE (xhs)); + if (!var_type || var_type->has_escaped ()) + return false; + if (var_type->semi_relayout) + return true; + } + + if (TREE_CODE (xhs) == COMPONENT_REF) + { + tree mem = TREE_OPERAND (xhs, 0); + if (TREE_CODE (mem) == MEM_REF) + { + tree type = TREE_TYPE (mem); + srtype *old_type = get_relayout_candidate_type (type); + if (types_compatible_p (type, old_type->type) + && old_type->semi_relayout) + return true; + } + } + return false; +} + /* True if xhs is a component_ref that base has escaped but uses a compression candidate type. */ @@ -6782,6 +6949,404 @@ ipa_struct_reorg::try_rewrite_with_pointer_compression (gassign *stmt, } } +tree +ipa_struct_reorg::rewrite_pointer_diff (gimple_stmt_iterator *gsi, tree ptr1, + tree ptr2, srtype *type) +{ + tree shifts = build_int_cst (long_integer_type_node, semi_relayout_align); + tree pointer_type = build_pointer_type (unsigned_char_type_node); + /* addr_high_1 = (intptr_t)ptr1 >> shifts */ + tree ptr1_cvt = fold_convert (pointer_type, ptr1); + tree addr_high_1 = gimplify_build2 (gsi, RSHIFT_EXPR, pointer_type, + ptr1_cvt, shifts); + /* addr_high_2 = (intptr_t)ptr2 >> shifts */ + tree ptr2_cvt = fold_convert (pointer_type, ptr2); + tree addr_high_2 = gimplify_build2 (gsi, RSHIFT_EXPR, pointer_type, + ptr2_cvt, shifts); + /* off1 = (intptr_t)ptr1 - (addr_high_1 << shifts) */ + tree bucket_start_1 = gimplify_build2 (gsi, LSHIFT_EXPR, pointer_type, + addr_high_1, shifts); + tree off1 = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, + ptr1_cvt, bucket_start_1); + /* off2 = (intptr_t)ptr2 - (addr_high_2 << shifts) */ + tree bucket_start_2 = gimplify_build2 (gsi, LSHIFT_EXPR, pointer_type, + addr_high_2, shifts); + tree off2 = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, + ptr2_cvt, bucket_start_2); + /* group_diff = (addr_high_1 - addr_high_2) / bucket_parts */ + tree bucket_sub = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, + addr_high_1, addr_high_2); + tree bucket_parts = build_int_cst (long_integer_type_node, + type->bucket_parts); + tree group_diff = gimplify_build2 (gsi, TRUNC_DIV_EXPR, + long_integer_type_node, + bucket_sub, bucket_parts); + /* off_addr_diff = off1 - off2 */ + tree off_addr_diff = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, + off1, off2); + /* res = group_diff * bucket_capacity + off_diff / 8 */ + tree capacity = build_int_cst (long_integer_type_node, + relayout_part_size / 8); + tree unit_size = build_int_cst (long_integer_type_node, 8); + tree bucket_index_diff = gimplify_build2 (gsi, MULT_EXPR, + long_integer_type_node, + group_diff, capacity); + tree off_index = gimplify_build2 (gsi, TRUNC_DIV_EXPR, + long_integer_type_node, + off_addr_diff, unit_size); + tree res = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, + bucket_index_diff, off_index); + return res; +} + +basic_block +create_bb_for_group_diff_eq_0 (basic_block last_bb, tree phi, tree new_granule) +{ + basic_block new_bb = create_empty_bb (last_bb); + if (last_bb->loop_father != NULL) + { + add_bb_to_loop (new_bb, last_bb->loop_father); + loops_state_set (LOOPS_NEED_FIXUP); + } + /* Emit res = new_granule; */ + gimple_stmt_iterator gsi = gsi_last_bb (new_bb); + gimple *new_stmt = gimple_build_assign (phi, new_granule); + gsi_insert_after (&gsi, new_stmt, GSI_NEW_STMT); + return new_bb; +} + +basic_block +create_bb_for_group_diff_ne_0 (basic_block new_bb, tree &phi, tree ptr, + tree group_diff, tree off_times_8, srtype *type) +{ + tree shifts = build_int_cst (long_unsigned_type_node, semi_relayout_align); + gimple_stmt_iterator gsi = gsi_last_bb (new_bb); + gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT); + /* curr_group_start = (ptr >> shifts) << shifts; */ + tree ptr_r_1 = gimplify_build2 (&gsi, RSHIFT_EXPR, long_integer_type_node, + ptr, shifts); + tree curr_group_start = gimplify_build2 (&gsi, LSHIFT_EXPR, long_integer_type_node, + ptr_r_1, shifts); + /* curr_off_from_group = ptr - curr_group_start; */ + tree curr_off_from_group = gimplify_build2 (&gsi, MINUS_EXPR, + long_integer_type_node, + ptr, curr_group_start); + /* res = curr_group_start + ((group_diff * parts) << shifts) + + ((curr_off_from_group + off_times_8) % shifts); */ + tree step1 = gimplify_build2 (&gsi, MULT_EXPR, long_integer_type_node, + group_diff, build_int_cst ( + long_integer_type_node, type->bucket_parts)); + tree step2 = gimplify_build2 (&gsi, LSHIFT_EXPR, long_integer_type_node, + step1, shifts); + tree step3 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, + curr_off_from_group, off_times_8); + tree step4 = gimplify_build2 (&gsi, TRUNC_MOD_EXPR, long_integer_type_node, + step3, build_int_cst ( + long_integer_type_node, relayout_part_size)); + tree step5 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, + step2, step4); + tree res_phi1 = gimplify_build2 (&gsi, PLUS_EXPR, long_integer_type_node, + curr_group_start, step5); + /* if (group_diff < 0) */ + gcond *cond = gimple_build_cond (LT_EXPR, group_diff, + build_int_cst (long_integer_type_node, 0), + NULL_TREE, NULL_TREE); + gsi_insert_before (&gsi, cond, GSI_SAME_STMT); + /* remove nop */ + gsi_remove (&gsi, true); + /* res += shifts */ + basic_block true_bb = create_empty_bb (new_bb); + if (new_bb->loop_father != NULL) + { + add_bb_to_loop (true_bb, new_bb->loop_father); + loops_state_set (LOOPS_NEED_FIXUP); + } + gimple_stmt_iterator true_gsi = gsi_last_bb (true_bb); + tree res_phi2 = make_ssa_name (long_integer_type_node); + gimple *new_stmt + = gimple_build_assign (res_phi2, PLUS_EXPR, res_phi1, + build_int_cst (long_integer_type_node, relayout_part_size)); + gsi_insert_after (&true_gsi, new_stmt, GSI_NEW_STMT); + /* create phi bb */ + basic_block res_bb = create_empty_bb (true_bb); + if (new_bb->loop_father != NULL) + { + add_bb_to_loop (res_bb, new_bb->loop_father); + loops_state_set (LOOPS_NEED_FIXUP); + } + /* rebuild cfg */ + edge etrue = make_edge (new_bb, true_bb, EDGE_TRUE_VALUE); + etrue->probability = profile_probability::unlikely (); + true_bb->count = etrue->count (); + + edge efalse = make_edge (new_bb, res_bb, EDGE_FALSE_VALUE); + efalse->probability = profile_probability::likely (); + res_bb->count = efalse->count (); + + edge efall = make_single_succ_edge (true_bb, res_bb, EDGE_FALLTHRU); + + phi = make_ssa_name (long_integer_type_node); + gphi *phi_node = create_phi_node (phi, res_bb); + add_phi_arg (phi_node, res_phi2, efall, UNKNOWN_LOCATION); + add_phi_arg (phi_node, res_phi1, efalse, UNKNOWN_LOCATION); + + if (dom_info_available_p (CDI_DOMINATORS)) + { + set_immediate_dominator (CDI_DOMINATORS, true_bb, new_bb); + set_immediate_dominator (CDI_DOMINATORS, res_bb, new_bb); + } + return res_bb; +} + +tree +ipa_struct_reorg::rewrite_pointer_plus_integer (gimple *stmt, + gimple_stmt_iterator *gsi, + tree ptr, tree offset, + srtype *type) +{ + gcc_assert (type->semi_relayout); + tree off = fold_convert (long_integer_type_node, offset); + tree num_8 = build_int_cst (integer_type_node, 8); + tree shifts = build_int_cst (integer_type_node, semi_relayout_align); + /* off_times_8 = off * 8; */ + tree off_times_8 = gimplify_build2 (gsi, MULT_EXPR, long_integer_type_node, + off, num_8); + /* new_granule = ptr + off * 8; */ + tree ptr_int = fold_convert (long_integer_type_node, ptr); + tree new_granule = gimplify_build2 (gsi, PLUS_EXPR, long_integer_type_node, + ptr_int, off_times_8); + /* group_diff = (new_granule >> shifts) - (ptr >> shifts); */ + tree group_diff_rhs_1 = gimplify_build2 (gsi, RSHIFT_EXPR, + long_integer_type_node, + new_granule, shifts); + tree group_diff_rhs_2 = gimplify_build2 (gsi, RSHIFT_EXPR, + long_integer_type_node, + ptr, shifts); + tree group_diff = gimplify_build2 (gsi, MINUS_EXPR, long_integer_type_node, + group_diff_rhs_1, group_diff_rhs_2); + /* if (group_diff == 0) */ + gcond *cond = gimple_build_cond (EQ_EXPR, group_diff, + build_int_cst (long_integer_type_node, 0), + NULL_TREE, NULL_TREE); + gimple_set_location (cond, UNKNOWN_LOCATION); + gsi_insert_before (gsi, cond, GSI_SAME_STMT); + + gimple *curr_stmt = as_a (cond); + edge e = split_block (curr_stmt->bb, curr_stmt); + basic_block split_src_bb = e->src; + basic_block split_dst_bb = e->dest; + remove_edge_raw (e); + /* if (group_diff == 0) + res = new_granule; */ + tree res_phi_1 = make_ssa_name (long_integer_type_node); + basic_block true_bb = create_bb_for_group_diff_eq_0 (split_src_bb, res_phi_1, + new_granule); + /* else */ + tree res_phi_2 = NULL_TREE; + basic_block false_bb = create_empty_bb (split_src_bb); + if (split_src_bb->loop_father != NULL) + { + add_bb_to_loop (false_bb, split_src_bb->loop_father); + loops_state_set (LOOPS_NEED_FIXUP); + } + + edge etrue = make_edge (split_src_bb, true_bb, EDGE_TRUE_VALUE); + etrue->probability = profile_probability::very_likely (); + true_bb->count = etrue->count (); + + edge efalse = make_edge (split_src_bb, false_bb, EDGE_FALSE_VALUE); + efalse->probability = profile_probability::unlikely (); + false_bb->count = efalse->count (); + basic_block res_bb = create_bb_for_group_diff_ne_0 (false_bb, res_phi_2, + ptr_int, group_diff, + off_times_8, type); + /* rebuild cfg */ + edge e_true_fall = make_single_succ_edge (true_bb, split_dst_bb, + EDGE_FALLTHRU); + edge e_false_fall = make_single_succ_edge (res_bb, split_dst_bb, + EDGE_FALLTHRU); + tree res_int = make_ssa_name (long_integer_type_node); + gphi *phi_node = create_phi_node (res_int, split_dst_bb); + add_phi_arg (phi_node, res_phi_1, e_true_fall, UNKNOWN_LOCATION); + add_phi_arg (phi_node, res_phi_2, e_false_fall, UNKNOWN_LOCATION); + if (dom_info_available_p (CDI_DOMINATORS)) + { + set_immediate_dominator (CDI_DOMINATORS, split_dst_bb, split_src_bb); + set_immediate_dominator (CDI_DOMINATORS, true_bb, split_src_bb); + set_immediate_dominator (CDI_DOMINATORS, false_bb, split_src_bb); + } + *gsi = gsi_start_bb (split_dst_bb); + tree pointer_type = build_pointer_type (unsigned_char_type_node); + tree res = gimplify_build1 (gsi, NOP_EXPR, pointer_type, res_int); + return res; +} + +tree +ipa_struct_reorg::build_div_expr (gimple_stmt_iterator *gsi, + tree expr, tree orig_size) +{ + tree div_expr = build2 (TRUNC_DIV_EXPR, long_unsigned_type_node, + expr, orig_size); + tree num = make_ssa_name (long_unsigned_type_node); + gimple *g = gimple_build_assign (num, div_expr); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + return num; +} + +srtype * +ipa_struct_reorg::get_relayout_candidate_type (tree type) +{ + if (type == NULL) + return NULL; + if (TREE_CODE (type) != RECORD_TYPE) + return NULL; + return find_type (inner_type (type)); +} + +long unsigned int +ipa_struct_reorg::get_true_field_offset (srfield *field, srtype *type) +{ + unsigned HOST_WIDE_INT new_offset; + new_offset = *(type->new_field_offsets.get (field->newfield[0])); + return new_offset; +} + +tree +ipa_struct_reorg::get_true_pointer_base (gimple_stmt_iterator *gsi, + tree mem_ref, srtype *type) +{ + tree ptr = TREE_OPERAND (mem_ref, 0); + tree off_bytes = TREE_OPERAND (mem_ref, 1); + unsigned num = tree_to_shwi (off_bytes); + if (num == 0) + return ptr; + tree orig_size = TYPE_SIZE_UNIT (TREE_TYPE (mem_ref)); + tree off = build_int_cst (long_integer_type_node, + num / tree_to_uhwi (orig_size)); + gimple *stmt = gsi_stmt (*gsi); + tree new_pointer_base = rewrite_pointer_plus_integer (stmt, gsi, ptr, + off, type); + return new_pointer_base; +} + +tree +ipa_struct_reorg::rewrite_address (tree pointer_base, srfield *field, + srtype *type, gimple_stmt_iterator *gsi) +{ + unsigned HOST_WIDE_INT field_offset = get_true_field_offset (field, type); + + tree pointer_ssa = fold_convert (long_unsigned_type_node, pointer_base); + tree step1 = gimplify_build1 (gsi, NOP_EXPR, long_unsigned_type_node, + pointer_ssa); + tree new_offset_ssa = build_int_cst (long_unsigned_type_node, field_offset); + tree step2 = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, step1, + new_offset_ssa); + tree field_ssa = fold_convert ( + build_pointer_type (TREE_TYPE (field->newfield[0])), step2); + tree step3 = gimplify_build1 (gsi, NOP_EXPR, + TREE_TYPE (field_ssa), field_ssa); + + tree new_mem_ref = fold_build2 (MEM_REF, TREE_TYPE (field->newfield[0]), + step3, build_int_cst (TREE_TYPE (field_ssa), 0)); + return new_mem_ref; +} + +bool +ipa_struct_reorg::check_sr_copy (gimple *stmt) +{ + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + + if (TREE_CODE (lhs) != MEM_REF || TREE_CODE (rhs) != MEM_REF) + return false; + srtype *t1 = get_relayout_candidate_type (TREE_TYPE (lhs)); + srtype *t2 = get_relayout_candidate_type (TREE_TYPE (rhs)); + if (!t1 || !t2 || !t1->semi_relayout || !t2->semi_relayout || t1 != t2) + return false; + tree pointer1 = TREE_OPERAND (lhs, 0); + tree pointer2 = TREE_OPERAND (rhs, 0); + if (TREE_CODE (TREE_TYPE (pointer1)) != POINTER_TYPE + || TREE_CODE (TREE_TYPE (pointer2)) != POINTER_TYPE) + return false; + + tree type1 = TREE_TYPE (TREE_TYPE (pointer1)); + tree type2 = TREE_TYPE (TREE_TYPE (pointer2)); + + srtype *t3 = get_relayout_candidate_type (type1); + srtype *t4 = get_relayout_candidate_type (type2); + + if (t3 != t4 || t3 != t1) + return false; + + return true; +} + +void +ipa_struct_reorg::relayout_field_copy (gimple_stmt_iterator *gsi, gimple *stmt, + tree lhs, tree rhs, + tree &newlhs, tree &newrhs) +{ + srtype *type = get_relayout_candidate_type (TREE_TYPE (lhs)); + tree lhs_base_pointer = get_true_pointer_base (gsi, newlhs, type); + tree rhs_base_pointer = get_true_pointer_base (gsi, newrhs, type); + tree new_l_mem_ref = NULL_TREE; + tree new_r_mem_ref = NULL_TREE; + srfield *field = NULL; + unsigned i = 0; + FOR_EACH_VEC_ELT (type->fields, i, field) + { + if (!field->newfield[0]) + continue; + new_l_mem_ref = rewrite_address (lhs_base_pointer, field, type, gsi); + new_r_mem_ref = rewrite_address (rhs_base_pointer, field, type, gsi); + gimple *new_stmt = gimple_build_assign (new_l_mem_ref, new_r_mem_ref); + gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); + } + newlhs = new_l_mem_ref; + newrhs = new_r_mem_ref; +} + +void +ipa_struct_reorg::do_semi_relayout (gimple_stmt_iterator *gsi, gimple *stmt, + tree &newlhs, tree &newrhs) +{ + tree lhs = gimple_assign_lhs (stmt); + tree rhs = gimple_assign_rhs1 (stmt); + + bool l = TREE_CODE (lhs) == COMPONENT_REF ? is_semi_relayout_candidate (lhs) + : false; + bool r = TREE_CODE (rhs) == COMPONENT_REF ? is_semi_relayout_candidate (rhs) + : false; + + gcc_assert (!(l && r)); + + if (!l && !r) + { + if (check_sr_copy (stmt)) + relayout_field_copy (gsi, stmt, lhs, rhs, newlhs, newrhs); + } + else if (l) + { + srtype *type = get_relayout_candidate_type ( + TREE_TYPE (TREE_OPERAND (lhs, 0))); + srfield *new_field = type->find_field ( + int_byte_position (TREE_OPERAND (lhs, 1))); + tree pointer_base = get_true_pointer_base ( + gsi, TREE_OPERAND (newlhs, 0), type); + newlhs = rewrite_address (pointer_base, new_field, type, gsi); + } + else if (r) + { + srtype *type = get_relayout_candidate_type ( + TREE_TYPE (TREE_OPERAND (rhs, 0))); + srfield *new_field = type->find_field ( + int_byte_position (TREE_OPERAND (rhs, 1))); + tree pointer_base = get_true_pointer_base ( + gsi, TREE_OPERAND (newrhs, 0), type); + newrhs = rewrite_address (pointer_base, new_field, type, gsi); + } +} + bool ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) { @@ -6876,7 +7441,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) tree size = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (lhs))); tree num; /* Check if rhs2 is a multiplication of the size of the type. */ - if (!is_result_of_mult (rhs2, &num, size)) + if (!is_result_of_mult (rhs2, &num, size) + && !(current_layout_opt_level & SEMI_RELAYOUT)) internal_error ("the rhs of pointer was not a multiplicate and it slipped through."); /* Add the judgment of num, support for POINTER_DIFF_EXPR. @@ -6898,11 +7464,34 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) tree newsize = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (newlhs[i]))); newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); + if (current_layout_opt_level >= SEMI_RELAYOUT) + { + if (is_semi_relayout_candidate (lhs)) + { + srtype *type = get_semi_relayout_candidate_type (lhs); + newrhs[i] = rewrite_pointer_plus_integer (stmt, gsi, + newrhs[i], num, type); + newsize = build_int_cst (long_unsigned_type_node, 0); + } + } new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, newrhs[i], newsize); } else { + /* rhs2 is not a const integer */ + if (current_layout_opt_level >= SEMI_RELAYOUT) + { + if (is_semi_relayout_candidate (lhs)) + { + num = build_div_expr (gsi, rhs2, + build_int_cst (long_unsigned_type_node, 1)); + srtype *type = get_semi_relayout_candidate_type (lhs); + newrhs[i] = rewrite_pointer_plus_integer (stmt, + gsi, newrhs[i], num, type); + rhs2 = build_int_cst (long_unsigned_type_node, 0); + } + } new_stmt = gimple_build_assign (newlhs[i], POINTER_PLUS_EXPR, newrhs[i], rhs2); } @@ -6952,13 +7541,32 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) return false; /* The two operands always have pointer/reference type. */ - for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++) + if (current_layout_opt_level >= SEMI_RELAYOUT + && (is_semi_relayout_candidate (rhs1) + || is_semi_relayout_candidate (rhs2))) { - gimple_assign_set_rhs1 (stmt, newrhs1[i]); - gimple_assign_set_rhs2 (stmt, newrhs2[i]); - update_stmt (stmt); + for (unsigned i = 0; i < max_split && newrhs1[i] &&newrhs2[i]; i++) + { + srtype *type = get_semi_relayout_candidate_type (rhs1); + if (!type) + type = get_semi_relayout_candidate_type (rhs2); + gcc_assert (type != NULL); + tree res = rewrite_pointer_diff (gsi, newrhs1[i], + newrhs2[i], type); + gimple *g = gimple_build_assign (gimple_assign_lhs (stmt), res); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + } + remove = true; + } + else + { + for (unsigned i = 0; i < max_split && newrhs1[i] && newrhs2[i]; i++) + { + gimple_assign_set_rhs1 (stmt, newrhs1[i]); + gimple_assign_set_rhs2 (stmt, newrhs2[i]); + update_stmt (stmt); + } } - remove = false; return remove; } @@ -6985,6 +7593,8 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) fprintf (dump_file, "replaced with:\n"); for (unsigned i = 0; i < max_split && (newlhs[i] || newrhs[i]); i++) { + if (current_layout_opt_level & SEMI_RELAYOUT) + do_semi_relayout (gsi, stmt, newlhs[i], newrhs[i]); if (current_layout_opt_level >= POINTER_COMPRESSION_SAFE) try_rewrite_with_pointer_compression (stmt, gsi, lhs, rhs, newlhs[i], newrhs[i]); @@ -7003,6 +7613,108 @@ ipa_struct_reorg::rewrite_assign (gassign *stmt, gimple_stmt_iterator *gsi) return remove; } +tree +ipa_struct_reorg::get_real_allocated_ptr (tree ptr, gimple_stmt_iterator *gsi) +{ + tree ptr_to_int = fold_convert (long_unsigned_type_node, ptr); + tree align = build_int_cst (long_unsigned_type_node, relayout_part_size); + tree real_addr = gimplify_build2 (gsi, MINUS_EXPR, long_unsigned_type_node, + ptr_to_int, align); + tree res = gimplify_build1 (gsi, NOP_EXPR, + build_pointer_type (long_unsigned_type_node), real_addr); + return res; +} + +tree +ipa_struct_reorg::set_ptr_for_use (tree ptr, gimple_stmt_iterator *gsi) +{ + tree ptr_to_int = fold_convert (long_unsigned_type_node, ptr); + tree align = build_int_cst (long_unsigned_type_node, relayout_part_size); + tree ptr_int = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, + ptr_to_int, align); + tree res = gimplify_build1 (gsi, NOP_EXPR, + build_pointer_type (long_unsigned_type_node), ptr_int); + return res; +} + +void +ipa_struct_reorg::record_allocated_size (tree ptr, gimple_stmt_iterator *gsi, + tree size) +{ + tree to_type = build_pointer_type (long_unsigned_type_node); + tree type_cast = fold_convert (to_type, ptr); + tree lhs = fold_build2 (MEM_REF, long_unsigned_type_node, ptr, + build_int_cst (build_pointer_type (long_unsigned_type_node), 0)); + gimple *stmt = gimple_build_assign (lhs, size); + gsi_insert_before (gsi, stmt, GSI_SAME_STMT); +} + +tree +ipa_struct_reorg::read_allocated_size (tree ptr, gimple_stmt_iterator *gsi) +{ + tree to_type = build_pointer_type (long_unsigned_type_node); + tree off = build_int_cst (to_type, 0); + tree size = gimplify_build2 (gsi, MEM_REF, long_unsigned_type_node, + ptr, off); + return size; +} + +gimple * +ipa_struct_reorg::create_aligned_alloc (gimple_stmt_iterator *gsi, + srtype *type, tree num, tree &size) +{ + tree fn = builtin_decl_implicit (BUILT_IN_ALIGNED_ALLOC); + + tree align = build_int_cst (long_unsigned_type_node, relayout_part_size); + unsigned bucket_size = type->bucket_size; + + tree nbuckets = gimplify_build2 (gsi, CEIL_DIV_EXPR, long_unsigned_type_node, + num, build_int_cst (long_unsigned_type_node, + relayout_part_size / 8)); + tree use_size = gimplify_build2 (gsi, MULT_EXPR, long_unsigned_type_node, + nbuckets, build_int_cst ( + long_unsigned_type_node, bucket_size)); + size = gimplify_build2 (gsi, PLUS_EXPR, long_unsigned_type_node, + use_size, align); + gimple *g = gimple_build_call (fn, 2, align, size); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + return g; +} + +void +ipa_struct_reorg::create_memset_zero (tree ptr, gimple_stmt_iterator *gsi, + tree size) +{ + tree fn = builtin_decl_implicit (BUILT_IN_MEMSET); + tree val = build_int_cst (long_unsigned_type_node, 0); + gimple *g = gimple_build_call (fn, 3, ptr, val, size); + gsi_insert_before (gsi, g, GSI_SAME_STMT); +} + +void +ipa_struct_reorg::create_memcpy (tree src, tree dst, tree size, + gimple_stmt_iterator *gsi) +{ + tree fn = builtin_decl_implicit (BUILT_IN_MEMCPY); + gimple *g = gimple_build_call (fn, 3, dst, src, size); + gsi_insert_before (gsi, g, GSI_SAME_STMT); +} + +void +ipa_struct_reorg::create_free (tree ptr, gimple_stmt_iterator *gsi) +{ + tree fn = builtin_decl_implicit (BUILT_IN_FREE); + gimple *g = gimple_build_call (fn, 1, ptr); + gsi_insert_before (gsi, g, GSI_SAME_STMT); +} + +void +ipa_struct_reorg::copy_to_lhs (tree lhs, tree new_lhs, gimple_stmt_iterator *gsi) +{ + gimple *g = gimple_build_assign (lhs, new_lhs); + gsi_insert_before (gsi, g, GSI_SAME_STMT); +} + /* Rewrite function call statement STMT. Return TRUE if the statement is to be removed. */ @@ -7044,24 +7756,77 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) ? TYPE_SIZE_UNIT (decl->orig_type) : TYPE_SIZE_UNIT (type->newtype[i]); gimple *g; - /* Every allocation except for calloc needs the size multiplied out. */ - if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) - newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, num, newsize); - - if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) - || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) - g = gimple_build_call (gimple_call_fndecl (stmt), - 1, newsize); - else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) - g = gimple_build_call (gimple_call_fndecl (stmt), - 2, num, newsize); - else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) - g = gimple_build_call (gimple_call_fndecl (stmt), - 2, newrhs1[i], newsize); - else - gcc_assert (false); - gimple_call_set_lhs (g, decl->newdecl[i]); - gsi_insert_before (gsi, g, GSI_SAME_STMT); + bool rewrite = false; + if (current_layout_opt_level >= SEMI_RELAYOUT + && type->semi_relayout) + { + if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC)) + ; + else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + { + tree rhs2 = gimple_call_arg (stmt, 1); + if (tree_to_uhwi (rhs2) == tree_to_uhwi ( + TYPE_SIZE_UNIT (type->type))) + { + rewrite = true; + tree size = NULL_TREE; + g = create_aligned_alloc (gsi, type, num, size); + tree real_ptr = make_ssa_name ( + build_pointer_type (unsigned_char_type_node)); + gimple_set_lhs (g, real_ptr); + create_memset_zero (real_ptr, gsi, size); + record_allocated_size (real_ptr, gsi, size); + tree lhs_use = set_ptr_for_use (real_ptr, gsi); + copy_to_lhs (decl->newdecl[i], lhs_use, gsi); + } + } + else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) + { + rewrite = true; + tree size = NULL_TREE; + g = create_aligned_alloc (gsi, type, num, size); + tree real_ptr = make_ssa_name ( + build_pointer_type (unsigned_char_type_node)); + gimple_set_lhs (g, real_ptr); + create_memset_zero (real_ptr, gsi, size); + tree src = get_real_allocated_ptr (newrhs1[i], gsi); + tree old_size = read_allocated_size (src, gsi); + create_memcpy (src, real_ptr, old_size, gsi); + record_allocated_size (real_ptr, gsi, size); + tree lhs_use = set_ptr_for_use (real_ptr, gsi); + create_free (src, gsi); + copy_to_lhs (decl->newdecl[i], lhs_use, gsi); + } + else + { + gcc_assert (false); + internal_error ("supported type for semi-relayout."); + } + } + if (!rewrite + && (current_layout_opt_level >= STRUCT_REORDER_FIELDS + || current_layout_opt_level == STRUCT_SPLIT)) + { + /* Every allocation except for calloc needs the size + multiplied out. */ + if (!gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + newsize = gimplify_build2 (gsi, MULT_EXPR, sizetype, + num, newsize); + if (gimple_call_builtin_p (stmt, BUILT_IN_MALLOC) + || gimple_call_builtin_p (stmt, BUILT_IN_ALLOCA)) + g = gimple_build_call (gimple_call_fndecl (stmt), + 1, newsize); + else if (gimple_call_builtin_p (stmt, BUILT_IN_CALLOC)) + g = gimple_build_call (gimple_call_fndecl (stmt), + 2, num, newsize); + else if (gimple_call_builtin_p (stmt, BUILT_IN_REALLOC)) + g = gimple_build_call (gimple_call_fndecl (stmt), + 2, newrhs1[i], newsize); + else + gcc_assert (false); + gimple_call_set_lhs (g, decl->newdecl[i]); + gsi_insert_before (gsi, g, GSI_SAME_STMT); + } if (type->pc_candidate) { /* Init global header for pointer compression. */ @@ -7081,8 +7846,11 @@ ipa_struct_reorg::rewrite_call (gcall *stmt, gimple_stmt_iterator *gsi) if (!rewrite_expr (expr, newexpr)) return false; + srtype *t = find_type (TREE_TYPE (TREE_TYPE (expr))); if (newexpr[1] == NULL) { + if (t && t->semi_relayout) + newexpr[0] = get_real_allocated_ptr (newexpr[0], gsi); gimple_call_set_arg (stmt, 0, newexpr[0]); update_stmt (stmt); return false; @@ -7789,6 +8557,85 @@ ipa_struct_reorg::check_and_prune_struct_for_pointer_compression (void) } } +void +ipa_struct_reorg::check_and_prune_struct_for_semi_relayout (void) +{ + unsigned relayout_transform = 0; + for (unsigned i = 0; i < types.length (); i++) + { + srtype *type = types[i]; + if (dump_file) + { + print_generic_expr (dump_file, type->type); + } + if (type->has_escaped ()) + { + if (dump_file) + { + fprintf (dump_file, " has escaped by %s, skip relayout.\n", + type->escape_reason ()); + } + continue; + } + if (TYPE_FIELDS (type->type) == NULL) + { + if (dump_file) + { + fprintf (dump_file, " has zero field, skip relayout.\n"); + } + continue; + } + if (type->chain_type) + { + if (dump_file) + { + fprintf (dump_file, " is chain_type, skip relayout.\n"); + } + continue; + } + if (type->has_alloc_array == 0 || type->has_alloc_array == 1 + || type->has_alloc_array == -1 || type->has_alloc_array == -3 + || type->has_alloc_array == -4) + { + if (dump_file) + { + fprintf (dump_file, " has alloc number: %d, skip relayout.\n", + type->has_alloc_array); + } + continue; + } + if (get_type_name (type->type) == NULL) + { + if (dump_file) + { + fprintf (dump_file, " has empty struct name," + " skip relayout.\n"); + } + continue; + } + relayout_transform++; + type->semi_relayout = true; + if (dump_file) + { + fprintf (dump_file, " attempts to do semi-relayout.\n"); + } + } + + if (dump_file) + { + if (relayout_transform) + { + fprintf (dump_file, "\nNumber of structures to transform in " + "semi-relayout is %d\n", relayout_transform); + } + else + { + fprintf (dump_file, "\nNo structures to transform in " + "semi-relayout.\n"); + } + } +} + /* Init pointer size from parameter param_pointer_compression_size. */ static void @@ -7829,7 +8676,8 @@ ipa_struct_reorg::execute (unsigned int opt) } if (opt >= POINTER_COMPRESSION_SAFE) check_and_prune_struct_for_pointer_compression (); - + if (opt >= SEMI_RELAYOUT) + check_and_prune_struct_for_semi_relayout (); ret = rewrite_functions (); } else // do COMPLETE_STRUCT_RELAYOUT @@ -7881,6 +8729,8 @@ public: unsigned int level = 0; switch (struct_layout_optimize_level) { + case 6: level |= SEMI_RELAYOUT; + // FALLTHRU case 5: level |= POINTER_COMPRESSION_UNSAFE; // FALLTHRU case 4: level |= POINTER_COMPRESSION_SAFE; @@ -7900,6 +8750,12 @@ public: if (level & POINTER_COMPRESSION_SAFE) init_pointer_size_for_pointer_compression (); + if (level & SEMI_RELAYOUT) + { + semi_relayout_align = semi_relayout_level; + relayout_part_size = 1 << semi_relayout_level; + } + /* Preserved for backward compatibility, reorder fields needs run before struct split and complete struct relayout. */ if (flag_ipa_reorder_fields && level < STRUCT_REORDER_FIELDS) diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.h b/gcc/ipa-struct-reorg/ipa-struct-reorg.h index d8879998251..982f43e5804 100644 --- a/gcc/ipa-struct-reorg/ipa-struct-reorg.h +++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.h @@ -25,6 +25,9 @@ namespace struct_reorg { const int max_split = 2; +unsigned semi_relayout_align = semi_relayout_level; +unsigned relayout_part_size = 1 << semi_relayout_level; + template struct auto_vec_del : auto_vec { @@ -127,6 +130,10 @@ public: bool pc_candidate; bool has_legal_alloc_num; int has_alloc_array; + bool semi_relayout; + hash_map new_field_offsets; + unsigned bucket_parts; + unsigned bucket_size; // Constructors srtype(tree type); @@ -148,6 +155,7 @@ public: bool has_dead_field (void); void mark_escape (escape_type, gimple *stmt); void create_global_ptr_for_pc (); + unsigned calculate_bucket_size (); bool has_escaped (void) { return escapes != does_not_escape; diff --git a/gcc/params.opt b/gcc/params.opt index 1d355819ce5..83fd705eed7 100644 --- a/gcc/params.opt +++ b/gcc/params.opt @@ -988,4 +988,8 @@ Threshold functions of cache miss counts to be analyzed in prefetching. Common Joined UInteger Var(param_pointer_compression_size) Init(32) IntegerRange(8, 32) Param Optimization Target size of compressed pointer, which should be 8, 16 or 32. +-param=semi-relayout-level= +Common Joined UInteger Var(semi_relayout_level) Init(13) IntegerRange(11, 15) Param Optimization +Set capacity of each bucket to semi-relayout to (1 << semi-relayout-level) / 8 . + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c b/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c new file mode 100644 index 00000000000..87c756c79d5 --- /dev/null +++ b/gcc/testsuite/gcc.dg/struct/semi_relayout_rewrite.c @@ -0,0 +1,86 @@ +// Check simplify rewrite chance for semi-relayout +/* { dg-do compile } */ + +#include +#include + +typedef struct node node_t; +typedef struct node *node_p; + +typedef struct arc arc_t; +typedef struct arc *arc_p; + +typedef struct network +{ + arc_p arcs; + arc_p sorted_arcs; + int x; + node_p nodes; + node_p stop_nodes; +} network_t; + +struct node +{ + int64_t potential; + int orientation; + node_p child; + node_p pred; + node_p sibling; + node_p sibling_prev; + arc_p basic_arc; + arc_p firstout; + arc_p firstin; + arc_p arc_tmp; + int64_t flow; + int64_t depth; + int number; + int time; +}; + +struct arc +{ + int id; + int64_t cost; + node_p tail; + node_p head; + short ident; + arc_p nextout; + arc_p nextin; + int64_t flow; + int64_t org_cost; + network_t* net_add; +}; + + +const int MAX = 100; +network_t* net; +node_p node; +arc_p arc; + +int +main () +{ + net = (network_t*) calloc (1, sizeof(network_t)); + net->arcs = (arc_p) calloc (MAX, sizeof (arc_t)); + net->sorted_arcs = (arc_p) calloc (MAX, sizeof (arc_t)); + net->nodes = (node_p) calloc (MAX, sizeof (node_t)); + net->arcs->id = 100; + + node = net->nodes; + arc = net->arcs; + + for (unsigned i = 0; i < MAX; i++) + { + arc->head = node; + arc->head->child = node; + node->potential = i + 1; + arc->cost = arc->head->potential; + arc->tail = node->sibling; + node = node + 1; + arc = arc + 1; + } + + return 0; +} + +/* { dg-final { scan-ipa-dump "Number of structures to transform in semi-relayout is 1" "struct_reorg" } } */ \ No newline at end of file diff --git a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp index d7367ed9601..281046b48ae 100644 --- a/gcc/testsuite/gcc.dg/struct/struct-reorg.exp +++ b/gcc/testsuite/gcc.dg/struct/struct-reorg.exp @@ -93,6 +93,10 @@ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \ gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/pc*.c]] \ "" "-fipa-struct-reorg=5 -fdump-ipa-all -flto-partition=one -fwhole-program" +# -fipa-struct-reorg=6 +gcc-dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/semi_relayout*.c]] \ + "" "-fipa-struct-reorg=6 -fdump-ipa-all -flto-partition=one -fwhole-program" + # All done. torture-finish dg-finish -- Gitee