diff --git a/gcc/common.opt b/gcc/common.opt index b5ea3c7a12793e4f6e866a7f90b0f140cb84a797..5e6e32b8f4affee4cc1c657deec227cde40f3378 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -3511,4 +3511,12 @@ fipa-ra Common Report Var(flag_ipa_ra) Optimization Use caller save register across calls if possible. +flim-count-check +Common Report Var(flag_lim_count_check) Init(0) Optimization +Limit count check in loop-invariant. + +favoid-propagating-conflicts +Common Report Var(flag_avoid_propagating_conflicts) Init(0) Optimization +Avoid propagating conficts in ira. + ; This comment is to ensure we retain the blank line above. diff --git a/gcc/ira-build.c b/gcc/ira-build.c index 0bbdb4d0c4b93fe55c9b2c6f1636074210ec21d3..734e89fe8702b3cf339a77cd82c7cbec6421865e 100644 --- a/gcc/ira-build.c +++ b/gcc/ira-build.c @@ -499,6 +499,7 @@ ira_create_allocno (int regno, bool cap_p, bitmap_set_bit (loop_tree_node->all_allocnos, ALLOCNO_NUM (a)); ALLOCNO_NREFS (a) = 0; ALLOCNO_FREQ (a) = 0; + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = false; ALLOCNO_HARD_REGNO (a) = -1; ALLOCNO_CALL_FREQ (a) = 0; ALLOCNO_CALLS_CROSSED_NUM (a) = 0; @@ -1990,6 +1991,37 @@ propagate_modified_regnos (ira_loop_tree_node_t loop_tree_node) loop_tree_node->modified_regnos); } +/* Propagate ALLOCNO_HARD_REG_COSTS from A to PARENT_A. Use SPILL_COST + as the cost of spilling a register throughout A (which we have to do + for PARENT_A allocations that conflict with A). */ +static void +ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a, + int spill_cost) +{ + HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a); + if (ira_caller_save_loop_spill_p (parent_a, a, spill_cost)) + conflicts |= ira_need_caller_save_regs (a); + conflicts &= ~ira_total_conflict_hard_regs (parent_a); + + auto costs = ALLOCNO_HARD_REG_COSTS (a); + if (!hard_reg_set_empty_p (conflicts)) + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = true; + else if (!costs) + return; + + auto aclass = ALLOCNO_CLASS (a); + ira_allocate_and_set_costs (&ALLOCNO_HARD_REG_COSTS (parent_a), + aclass, ALLOCNO_CLASS_COST (parent_a)); + auto parent_costs = ALLOCNO_HARD_REG_COSTS (parent_a); + for (int i = 0; i < ira_class_hard_regs_num[aclass]; ++i) + if (TEST_HARD_REG_BIT (conflicts, ira_class_hard_regs[aclass][i])) + parent_costs[i] += spill_cost; + else if (costs) + /* The cost to A of allocating this register to PARENT_A can't + be more than the cost of spilling the register throughout A. */ + parent_costs[i] += MIN (costs[i], spill_cost); +} + /* Propagate new info about allocno A (see comments about accumulated info in allocno definition) to the corresponding allocno on upper loop tree level. So allocnos on upper levels accumulate @@ -2018,11 +2050,37 @@ propagate_allocno_info (void) && bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos, ALLOCNO_NUM (a))) { + + int spill_cost = INT_MAX; + if(flag_avoid_propagating_conflicts) { + /* Calculate the cost of storing to memory on entry to A's loop, + referencing as memory within A's loop, and restoring from + memory on exit from A's loop. */ + ira_loop_border_costs border_costs (a); + + if (ira_subloop_allocnos_can_differ_p (parent_a)) + spill_cost = (border_costs.spill_inside_loop_cost () + + ALLOCNO_MEMORY_COST (a)); + } + if (! ALLOCNO_BAD_SPILL_P (a)) ALLOCNO_BAD_SPILL_P (parent_a) = false; ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a); ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a); - ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a); + + + if (flag_avoid_propagating_conflicts) { + /* If A's allocation can differ from PARENT_A's, we can if necessary + spill PARENT_A on entry to A's loop and restore it afterwards. + Doing that has cost SPILL_COST. */ + if (!ira_subloop_allocnos_can_differ_p (parent_a)) + merge_hard_reg_conflicts (a, parent_a, true); + + } + + if (!flag_avoid_propagating_conflicts) { + + ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a); merge_hard_reg_conflicts (a, parent_a, true); ALLOCNO_CALLS_CROSSED_NUM (parent_a) += ALLOCNO_CALLS_CROSSED_NUM (a); @@ -2046,7 +2104,39 @@ propagate_allocno_info (void) ALLOCNO_CLASS_COST (parent_a) += ALLOCNO_CLASS_COST (a); ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a); + } + + else { + + + if (!ira_caller_save_loop_spill_p (parent_a, a, spill_cost)) + { + ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a); + ALLOCNO_CALLS_CROSSED_NUM (parent_a) + += ALLOCNO_CALLS_CROSSED_NUM (a); + ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a) + += ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a); + ALLOCNO_CROSSED_CALLS_ABIS (parent_a) + |= ALLOCNO_CROSSED_CALLS_ABIS (a); + ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a) + |= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a); + } + ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a) + += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a); + aclass = ALLOCNO_CLASS (a); + ira_assert (aclass == ALLOCNO_CLASS (parent_a)); + ira_propagate_hard_reg_costs (parent_a, a, spill_cost); + ira_allocate_and_accumulate_costs + (&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a), + aclass, + ALLOCNO_CONFLICT_HARD_REG_COSTS (a)); + /* The cost to A of allocating a register to PARENT_A can't be + more than the cost of spilling the register throughout A. */ + ALLOCNO_CLASS_COST (parent_a) + += MIN (ALLOCNO_CLASS_COST (a), spill_cost); + ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a); } + } } /* Create allocnos corresponding to pseudo-registers in the current diff --git a/gcc/ira-color.c b/gcc/ira-color.c index b0fc159a84933f1b8afdd969fab4539a0e35664c..263de6fae8fcc69b814b66c6d980834d3f6e5c06 100644 --- a/gcc/ira-color.c +++ b/gcc/ira-color.c @@ -36,6 +36,11 @@ along with GCC; see the file COPYING3. If not see #include "reload.h" #include "cfgloop.h" +/* To prevent soft conflict detection becoming quadratic in the + loop depth. Only for very pathological cases, so it hardly + seems worth a --param. */ +const int max_soft_conflict_loop_depth = 64; + typedef struct allocno_hard_regs *allocno_hard_regs_t; /* The structure contains information about hard registers can be @@ -1698,6 +1703,167 @@ calculate_saved_nregs (int hard_regno, machine_mode mode) return nregs; } +/* Allocnos A1 and A2 are known to conflict. Check whether, in some loop L + that is either the current loop or a nested subloop, the conflict is of + the following form: + + - One allocno (X) is a cap allocno for some non-cap allocno X2. + + - X2 belongs to some loop L2. + + - The other allocno (Y) is a non-cap allocno. + + - Y is an ancestor of some allocno Y2 in L2. (Note that such a Y2 + must exist, given that X and Y conflict.) + + - Y2 is not referenced in L2 (that is, ALLOCNO_NREFS (Y2) == 0). + + - Y can use a different allocation from Y2. + + In this case, Y's register is live across L2 but is not used within it, + whereas X's register is used only within L2. The conflict is therefore + only "soft", in that it can easily be avoided by spilling Y2 inside L2 + without affecting any insn references. + + If the conflict does have this form, return the Y2 that would need to be + spilled in order to allow X and Y (and thus A1 and A2) to use the same + register. Return null otherwise. Returning null is conservatively correct; + any nonnnull return value is an optimization. */ +ira_allocno_t +ira_soft_conflict (ira_allocno_t a1, ira_allocno_t a2) +{ + /* Search for the loop L and its associated allocnos X and Y. */ + int search_depth = 0; + while (ALLOCNO_CAP_MEMBER (a1) && ALLOCNO_CAP_MEMBER (a2)) + { + a1 = ALLOCNO_CAP_MEMBER (a1); + a2 = ALLOCNO_CAP_MEMBER (a2); + if (search_depth++ > max_soft_conflict_loop_depth) + return nullptr; + } + /* This must be true if A1 and A2 conflict. */ + ira_assert (ALLOCNO_LOOP_TREE_NODE (a1) == ALLOCNO_LOOP_TREE_NODE (a2)); + + /* Make A1 the cap allocno (X in the comment above) and A2 the + non-cap allocno (Y in the comment above). */ + if (ALLOCNO_CAP_MEMBER (a2)) + std::swap (a1, a2); + if (!ALLOCNO_CAP_MEMBER (a1)) + return nullptr; + + /* Search for the real allocno that A1 caps (X2 in the comment above). */ + do + { + a1 = ALLOCNO_CAP_MEMBER (a1); + if (search_depth++ > max_soft_conflict_loop_depth) + return nullptr; + } + while (ALLOCNO_CAP_MEMBER (a1)); + + /* Find the associated allocno for A2 (Y2 in the comment above). */ + auto node = ALLOCNO_LOOP_TREE_NODE (a1); + auto local_a2 = node->regno_allocno_map[ALLOCNO_REGNO (a2)]; + + /* Find the parent of LOCAL_A2/Y2. LOCAL_A2 must be a descendant of A2 + for the conflict query to make sense, so this parent lookup must succeed. + + If the parent allocno has no references, it is usually cheaper to + spill at that loop level instead. Keep searching until we find + a parent allocno that does have references (but don't look past + the starting allocno). */ + ira_allocno_t local_parent_a2; + for (;;) + { + local_parent_a2 = ira_parent_allocno (local_a2); + if (local_parent_a2 == a2 || ALLOCNO_NREFS (local_parent_a2) != 0) + break; + local_a2 = local_parent_a2; + } + if (CHECKING_P) + { + /* Sanity check to make sure that the conflict we've been given + makes sense. */ + auto test_a2 = local_parent_a2; + while (test_a2 != a2) + { + test_a2 = ira_parent_allocno (test_a2); + ira_assert (test_a2); + } + } + if (local_a2 + && ALLOCNO_NREFS (local_a2) == 0 + && ira_subloop_allocnos_can_differ_p (local_parent_a2)) + return local_a2; + return nullptr; +} + +/* The caller has decided to allocate HREGNO to A and has proved that + this is safe. However, the allocation might require the kind of + spilling described in the comment above ira_soft_conflict. + The caller has recorded that: + + - The allocnos in ALLOCNOS_TO_SPILL are the ones that would need + to be spilled to satisfy soft conflicts for at least one allocation + (not necessarily HREGNO). + + - The soft conflicts apply only to A allocations that overlap + SOFT_CONFLICT_REGS. + + If allocating HREGNO is subject to any soft conflicts, record the + subloop allocnos that need to be spilled. */ +static void +spill_soft_conflicts (ira_allocno_t a, bitmap allocnos_to_spill, + HARD_REG_SET soft_conflict_regs, int hregno) +{ + auto nregs = hard_regno_nregs (hregno, ALLOCNO_MODE (a)); + bitmap_iterator bi; + unsigned int i; + EXECUTE_IF_SET_IN_BITMAP (allocnos_to_spill, 0, i, bi) + { + /* SPILL_A needs to be spilled for at least one allocation + (not necessarily this one). */ + auto spill_a = ira_allocnos[i]; + + /* Find the corresponding allocno for this loop. */ + auto conflict_a = spill_a; + do + { + conflict_a = ira_parent_or_cap_allocno (conflict_a); + ira_assert (conflict_a); + } + while (ALLOCNO_LOOP_TREE_NODE (conflict_a)->level + > ALLOCNO_LOOP_TREE_NODE (a)->level); + + ira_assert (ALLOCNO_LOOP_TREE_NODE (conflict_a) + == ALLOCNO_LOOP_TREE_NODE (a)); + + if (conflict_a == a) + { + /* SPILL_A is a descendant of A. We don't know (and don't need + to know) which cap allocnos have a soft conflict with A. + All we need to do is test whether the soft conflict applies + to the chosen allocation. */ + if (ira_hard_reg_set_intersection_p (hregno, ALLOCNO_MODE (a), + soft_conflict_regs)) + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true; + } + else + { + /* SPILL_A is a descendant of CONFLICT_A, which has a soft conflict + with A. Test whether the soft conflict applies to the current + allocation. */ + ira_assert (ira_soft_conflict (a, conflict_a) == spill_a); + auto conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a); + ira_assert (conflict_hregno >= 0); + auto conflict_nregs = hard_regno_nregs (conflict_hregno, + ALLOCNO_MODE (conflict_a)); + if (hregno + nregs > conflict_hregno + && conflict_hregno + conflict_nregs > hregno) + ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true; + } + } +} + /* Choose a hard register for allocno A. If RETRY_P is TRUE, it means that the function called from function `ira_reassign_conflict_allocnos' and `allocno_reload_assign'. In @@ -1737,6 +1903,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) #ifdef STACK_REGS bool no_stack_reg_p; #endif + auto_bitmap allocnos_to_spill; + HARD_REG_SET soft_conflict_regs = {}; ira_assert (! ALLOCNO_ASSIGNED_P (a)); get_conflict_and_start_profitable_regs (a, retry_p, @@ -1824,23 +1992,56 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) mode = ALLOCNO_MODE (conflict_a); conflict_nregs = hard_regno_nregs (hard_regno, mode); - if (conflict_nregs == n_objects && conflict_nregs > 1) + auto spill_a = (retry_p + ? nullptr + : ira_soft_conflict (a, conflict_a)); + if (spill_a) { - int num = OBJECT_SUBWORD (conflict_obj); - - if (REG_WORDS_BIG_ENDIAN) - SET_HARD_REG_BIT (conflicting_regs[word], - hard_regno + n_objects - num - 1); - else - SET_HARD_REG_BIT (conflicting_regs[word], - hard_regno + num); + if (bitmap_set_bit (allocnos_to_spill, + ALLOCNO_NUM (spill_a))) + { + ira_loop_border_costs border_costs (spill_a); + auto cost = border_costs.spill_inside_loop_cost (); + auto note_conflict = [&](int r) + { + SET_HARD_REG_BIT (soft_conflict_regs, r); + auto hri = ira_class_hard_reg_index[aclass][r]; + if (hri >= 0) + { + costs[hri] += cost; + full_costs[hri] += cost; + } + }; + for (int r = hard_regno; + r >= 0 && (int) end_hard_regno (mode, r) > hard_regno; + r--) + note_conflict (r); + for (int r = hard_regno + 1; + r < hard_regno + conflict_nregs; + r++) + note_conflict (r); + } } else - conflicting_regs[word] - |= ira_reg_mode_hard_regset[hard_regno][mode]; - if (hard_reg_set_subset_p (profitable_hard_regs, - conflicting_regs[word])) - goto fail; + { + if (conflict_nregs == n_objects && conflict_nregs > 1) + { + int num = OBJECT_SUBWORD (conflict_obj); + + if (REG_WORDS_BIG_ENDIAN) + SET_HARD_REG_BIT (conflicting_regs[word], + hard_regno + n_objects - num - 1); + else + SET_HARD_REG_BIT (conflicting_regs[word], + hard_regno + num); + } + else + conflicting_regs[word] + |= ira_reg_mode_hard_regset[hard_regno][mode]; + if (hard_reg_set_subset_p (profitable_hard_regs, + conflicting_regs[word])) + goto fail; + } } } else if (! retry_p @@ -1951,6 +2152,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p) { for (i = hard_regno_nregs (best_hard_regno, mode) - 1; i >= 0; i--) allocated_hardreg_p[best_hard_regno + i] = true; + spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, + best_hard_regno); } if (! retry_p) restore_costs_from_copies (a); @@ -2549,13 +2752,23 @@ ira_loop_edge_freq (ira_loop_tree_node_t loop_node, int regno, bool exit_p) return REG_FREQ_FROM_EDGE_FREQ (freq); } +/* Construct an object that describes the boundary between A and its + parent allocno. */ +ira_loop_border_costs::ira_loop_border_costs (ira_allocno_t a) + : m_mode (ALLOCNO_MODE (a)), + m_class (ALLOCNO_CLASS (a)), + m_entry_freq (ira_loop_edge_freq (ALLOCNO_LOOP_TREE_NODE (a), + ALLOCNO_REGNO (a), false)), + m_exit_freq (ira_loop_edge_freq (ALLOCNO_LOOP_TREE_NODE (a), + ALLOCNO_REGNO (a), true)) +{ +} + /* Calculate and return the cost of putting allocno A into memory. */ static int calculate_allocno_spill_cost (ira_allocno_t a) { int regno, cost; - machine_mode mode; - enum reg_class rclass; ira_allocno_t parent_allocno; ira_loop_tree_node_t parent_node, loop_node; @@ -2568,24 +2781,12 @@ calculate_allocno_spill_cost (ira_allocno_t a) return cost; if ((parent_allocno = parent_node->regno_allocno_map[regno]) == NULL) return cost; - mode = ALLOCNO_MODE (a); - rclass = ALLOCNO_CLASS (a); + ira_loop_border_costs border_costs (a); if (ALLOCNO_HARD_REGNO (parent_allocno) < 0) - cost -= (ira_memory_move_cost[mode][rclass][0] - * ira_loop_edge_freq (loop_node, regno, true) - + ira_memory_move_cost[mode][rclass][1] - * ira_loop_edge_freq (loop_node, regno, false)); + cost -= border_costs.spill_outside_loop_cost (); else - { - ira_init_register_move_cost_if_necessary (mode); - cost += ((ira_memory_move_cost[mode][rclass][1] - * ira_loop_edge_freq (loop_node, regno, true) - + ira_memory_move_cost[mode][rclass][0] - * ira_loop_edge_freq (loop_node, regno, false)) - - (ira_register_move_cost[mode][rclass][rclass] - * (ira_loop_edge_freq (loop_node, regno, false) - + ira_loop_edge_freq (loop_node, regno, true)))); - } + cost += (border_costs.spill_inside_loop_cost () + - border_costs.move_between_loops_cost ()); return cost; } @@ -2948,6 +3149,8 @@ improve_allocation (void) assigning hard register to allocno A even without spilling conflicting allocnos. */ continue; + auto_bitmap allocnos_to_spill; + HARD_REG_SET soft_conflict_regs = {}; mode = ALLOCNO_MODE (a); nwords = ALLOCNO_NUM_OBJECTS (a); /* Process each allocno conflicting with A and update the cost @@ -2973,31 +3176,49 @@ improve_allocation (void) ALLOCNO_COLOR_DATA (conflict_a)->temp = check; if ((conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a)) < 0) continue; - spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a); - k = (ira_class_hard_reg_index - [ALLOCNO_CLASS (conflict_a)][conflict_hregno]); - ira_assert (k >= 0); - if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a)) - != NULL) - spill_cost -= allocno_costs[k]; + auto spill_a = ira_soft_conflict (a, conflict_a); + if (spill_a) + { + if (!bitmap_set_bit (allocnos_to_spill, + ALLOCNO_NUM (spill_a))) + continue; + ira_loop_border_costs border_costs (spill_a); + spill_cost = border_costs.spill_inside_loop_cost (); + } else - spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a); - spill_cost - += allocno_copy_cost_saving (conflict_a, conflict_hregno); + { + spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a); + k = (ira_class_hard_reg_index + [ALLOCNO_CLASS (conflict_a)][conflict_hregno]); + ira_assert (k >= 0); + if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a)) + != NULL) + spill_cost -= allocno_costs[k]; + else + spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a); + spill_cost + += allocno_copy_cost_saving (conflict_a, conflict_hregno); + } conflict_nregs = hard_regno_nregs (conflict_hregno, ALLOCNO_MODE (conflict_a)); + auto note_conflict = [&](int r) + { + if (check_hard_reg_p (a, r, + conflicting_regs, profitable_hard_regs)) + { + if (spill_a) + SET_HARD_REG_BIT (soft_conflict_regs, r); + costs[r] += spill_cost; + } + }; for (r = conflict_hregno; r >= 0 && (int) end_hard_regno (mode, r) > conflict_hregno; r--) - if (check_hard_reg_p (a, r, - conflicting_regs, profitable_hard_regs)) - costs[r] += spill_cost; + note_conflict (r); for (r = conflict_hregno + 1; r < conflict_hregno + conflict_nregs; r++) - if (check_hard_reg_p (a, r, - conflicting_regs, profitable_hard_regs)) - costs[r] += spill_cost; + note_conflict (r); } } min_cost = INT_MAX; @@ -3020,6 +3241,7 @@ improve_allocation (void) by spilling some conflicting allocnos does not improve the allocation cost. */ continue; + spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, best); nregs = hard_regno_nregs (best, mode); /* Now spill conflicting allocnos which contain a hard register of A when we assign the best chosen hard register to it. */ @@ -3306,7 +3528,7 @@ static void color_pass (ira_loop_tree_node_t loop_tree_node) { int regno, hard_regno, index = -1, n; - int cost, exit_freq, enter_freq; + int cost; unsigned int j; bitmap_iterator bi; machine_mode mode; @@ -3355,10 +3577,12 @@ color_pass (ira_loop_tree_node_t loop_tree_node) /* Remove from processing in the next loop. */ bitmap_clear_bit (consideration_allocno_bitmap, j); rclass = ALLOCNO_CLASS (a); + + pclass = ira_pressure_class_translate[rclass]; if (flag_ira_region == IRA_REGION_MIXED && (loop_tree_node->reg_pressure[pclass] - <= ira_class_hard_regs_num[pclass])) + <= ira_class_hard_regs_num[pclass]) && !flag_avoid_propagating_conflicts) { mode = ALLOCNO_MODE (a); hard_regno = ALLOCNO_HARD_REGNO (a); @@ -3377,7 +3601,28 @@ color_pass (ira_loop_tree_node_t loop_tree_node) update_costs_from_copies (subloop_allocno, true, true); /* We don't need updated costs anymore. */ ira_free_allocno_updated_costs (subloop_allocno); + } else { + subloop_allocno = ALLOCNO_CAP_MEMBER (a); + subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno); + if (ira_single_region_allocno_p (a, subloop_allocno) && flag_avoid_propagating_conflicts) + { + mode = ALLOCNO_MODE (a); + hard_regno = ALLOCNO_HARD_REGNO (a); + if (hard_regno >= 0) + { + index = ira_class_hard_reg_index[rclass][hard_regno]; + ira_assert (index >= 0); + } + regno = ALLOCNO_REGNO (a); + ira_assert (!ALLOCNO_ASSIGNED_P (subloop_allocno)); + ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; + ALLOCNO_ASSIGNED_P (subloop_allocno) = true; + if (hard_regno >= 0) + update_costs_from_copies (subloop_allocno, true, true); + /* We don't need updated costs anymore. */ + ira_free_allocno_updated_costs (subloop_allocno); } + } } /* Update costs of the corresponding allocnos (not caps) in the subloops. */ @@ -3392,7 +3637,9 @@ color_pass (ira_loop_tree_node_t loop_tree_node) ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL); mode = ALLOCNO_MODE (a); rclass = ALLOCNO_CLASS (a); - pclass = ira_pressure_class_translate[rclass]; + + pclass = ira_pressure_class_translate[rclass]; + hard_regno = ALLOCNO_HARD_REGNO (a); /* Use hard register class here. ??? */ if (hard_regno >= 0) @@ -3409,32 +3656,16 @@ color_pass (ira_loop_tree_node_t loop_tree_node) ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass); ira_assert (bitmap_bit_p (subloop_node->all_allocnos, ALLOCNO_NUM (subloop_allocno))); - if ((flag_ira_region == IRA_REGION_MIXED +if (((ira_single_region_allocno_p (a, subloop_allocno) + || !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0))&& flag_avoid_propagating_conflicts) || (!flag_avoid_propagating_conflicts && (flag_ira_region == IRA_REGION_MIXED && (loop_tree_node->reg_pressure[pclass] <= ira_class_hard_regs_num[pclass])) - || (pic_offset_table_rtx != NULL - && regno == (int) REGNO (pic_offset_table_rtx)) - /* Avoid overlapped multi-registers. Moves between them - might result in wrong code generation. */ - || (hard_regno >= 0 - && ira_reg_class_max_nregs[pclass][mode] > 1)) - { - if (! ALLOCNO_ASSIGNED_P (subloop_allocno)) - { - ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; - ALLOCNO_ASSIGNED_P (subloop_allocno) = true; - if (hard_regno >= 0) - update_costs_from_copies (subloop_allocno, true, true); - /* We don't need updated costs anymore. */ - ira_free_allocno_updated_costs (subloop_allocno); - } - continue; - } - exit_freq = ira_loop_edge_freq (subloop_node, regno, true); - enter_freq = ira_loop_edge_freq (subloop_node, regno, false); - ira_assert (regno < ira_reg_equiv_len); - if (ira_equiv_no_lvalue_p (regno)) + || !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0) )) { + if (flag_avoid_propagating_conflicts) { + gcc_assert (!ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P + (subloop_allocno)); + } if (! ALLOCNO_ASSIGNED_P (subloop_allocno)) { ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno; @@ -3447,16 +3678,23 @@ color_pass (ira_loop_tree_node_t loop_tree_node) } else if (hard_regno < 0) { + /* If we allocate a register to SUBLOOP_ALLOCNO, we'll need + to load the register on entry to the subloop and store + the register back on exit from the subloop. This incurs + a fixed cost for all registers. Since UPDATED_MEMORY_COST + is (and should only be) used relative to the register costs + for the same allocno, we can subtract this shared register + cost from the memory cost. */ + ira_loop_border_costs border_costs (subloop_allocno); ALLOCNO_UPDATED_MEMORY_COST (subloop_allocno) - -= ((ira_memory_move_cost[mode][rclass][1] * enter_freq) - + (ira_memory_move_cost[mode][rclass][0] * exit_freq)); + -= border_costs.spill_outside_loop_cost (); } else { + ira_loop_border_costs border_costs (subloop_allocno); aclass = ALLOCNO_CLASS (subloop_allocno); ira_init_register_move_cost_if_necessary (mode); - cost = (ira_register_move_cost[mode][rclass][rclass] - * (exit_freq + enter_freq)); + cost = border_costs.move_between_loops_cost (); ira_allocate_and_set_or_copy_costs (&ALLOCNO_UPDATED_HARD_REG_COSTS (subloop_allocno), aclass, ALLOCNO_UPDATED_CLASS_COST (subloop_allocno), @@ -3471,9 +3709,11 @@ color_pass (ira_loop_tree_node_t loop_tree_node) > ALLOCNO_UPDATED_HARD_REG_COSTS (subloop_allocno)[index]) ALLOCNO_UPDATED_CLASS_COST (subloop_allocno) = ALLOCNO_UPDATED_HARD_REG_COSTS (subloop_allocno)[index]; + /* If we spill SUBLOOP_ALLOCNO, we'll need to store HARD_REGNO + on entry to the subloop and restore HARD_REGNO on exit from + the subloop. */ ALLOCNO_UPDATED_MEMORY_COST (subloop_allocno) - += (ira_memory_move_cost[mode][rclass][0] * enter_freq - + ira_memory_move_cost[mode][rclass][1] * exit_freq); + += border_costs.spill_inside_loop_cost (); } } } @@ -3514,7 +3754,6 @@ move_spill_restore (void) { int cost, regno, hard_regno, hard_regno2, index; bool changed_p; - int enter_freq, exit_freq; machine_mode mode; enum reg_class rclass; ira_allocno_t a, parent_allocno, subloop_allocno; @@ -3562,45 +3801,88 @@ move_spill_restore (void) if (subloop_allocno == NULL) continue; ira_assert (rclass == ALLOCNO_CLASS (subloop_allocno)); - /* We have accumulated cost. To get the real cost of - allocno usage in the loop we should subtract costs of - the subloop allocnos. */ - cost -= (ALLOCNO_MEMORY_COST (subloop_allocno) + ira_loop_border_costs border_costs (subloop_allocno); + + if (!flag_avoid_propagating_conflicts) { + cost -= (ALLOCNO_MEMORY_COST (subloop_allocno) - (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL ? ALLOCNO_CLASS_COST (subloop_allocno) : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index])); - exit_freq = ira_loop_edge_freq (subloop_node, regno, true); - enter_freq = ira_loop_edge_freq (subloop_node, regno, false); + ira_loop_border_costs border_costs (subloop_allocno); + } else { + + /* We have accumulated cost. To get the real cost of + allocno usage in the loop we should subtract the costs + added by propagate_allocno_info for the subloop allocnos. */ + int reg_cost + = (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL + ? ALLOCNO_CLASS_COST (subloop_allocno) + : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]); + + int spill_cost + = (border_costs.spill_inside_loop_cost () + + ALLOCNO_MEMORY_COST (subloop_allocno)); + + /* If HARD_REGNO conflicts with SUBLOOP_A then + propagate_allocno_info will have propagated + the cost of spilling HARD_REGNO in SUBLOOP_NODE. + (ira_subloop_allocnos_can_differ_p must be true + in that case.) If HARD_REGNO is a caller-saved + register, we might have modelled it in the same way. + + Otherwise, SPILL_COST acted as a cap on the propagated + register cost, in cases where the allocations can differ. */ + auto conflicts = ira_total_conflict_hard_regs (subloop_allocno); + if (TEST_HARD_REG_BIT (conflicts, hard_regno) + || (ira_need_caller_save_p (subloop_allocno, hard_regno) + && ira_caller_save_loop_spill_p (a, subloop_allocno, + spill_cost))) + reg_cost = spill_cost; + else if (ira_subloop_allocnos_can_differ_p (a)) + reg_cost = MIN (reg_cost, spill_cost); + + cost -= ALLOCNO_MEMORY_COST (subloop_allocno) - reg_cost; + + + } + if ((hard_regno2 = ALLOCNO_HARD_REGNO (subloop_allocno)) < 0) - cost -= (ira_memory_move_cost[mode][rclass][0] * exit_freq - + ira_memory_move_cost[mode][rclass][1] * enter_freq); + /* The register was spilled in the subloop. If we spill + it in the outer loop too then we'll no longer need to + save the register on entry to the subloop and restore + the register on exit from the subloop. */ + cost -= border_costs.spill_inside_loop_cost (); else { - cost - += (ira_memory_move_cost[mode][rclass][0] * exit_freq - + ira_memory_move_cost[mode][rclass][1] * enter_freq); + /* The register was also allocated in the subloop. If we + spill it in the outer loop then we'll need to load the + register on entry to the subloop and store the register + back on exit from the subloop. */ + cost += border_costs.spill_outside_loop_cost (); if (hard_regno2 != hard_regno) - cost -= (ira_register_move_cost[mode][rclass][rclass] - * (exit_freq + enter_freq)); + cost -= border_costs.move_between_loops_cost (); } } if ((parent = loop_node->parent) != NULL && (parent_allocno = parent->regno_allocno_map[regno]) != NULL) { ira_assert (rclass == ALLOCNO_CLASS (parent_allocno)); - exit_freq = ira_loop_edge_freq (loop_node, regno, true); - enter_freq = ira_loop_edge_freq (loop_node, regno, false); + ira_loop_border_costs border_costs (a); if ((hard_regno2 = ALLOCNO_HARD_REGNO (parent_allocno)) < 0) - cost -= (ira_memory_move_cost[mode][rclass][0] * exit_freq - + ira_memory_move_cost[mode][rclass][1] * enter_freq); + /* The register was spilled in the parent loop. If we spill + it in this loop too then we'll no longer need to load the + register on entry to this loop and save the register back + on exit from this loop. */ + cost -= border_costs.spill_outside_loop_cost (); else { - cost - += (ira_memory_move_cost[mode][rclass][1] * exit_freq - + ira_memory_move_cost[mode][rclass][0] * enter_freq); + /* The register was also allocated in the parent loop. + If we spill it in this loop then we'll need to save + the register on entry to this loop and restore the + register on exit from this loop. */ + cost += border_costs.spill_inside_loop_cost (); if (hard_regno2 != hard_regno) - cost -= (ira_register_move_cost[mode][rclass][rclass] - * (exit_freq + enter_freq)); + cost -= border_costs.move_between_loops_cost (); } } if (cost < 0) diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c index 6891156b5aaa6cdc7d43e536a74e077e216eef0c..4ac2b7888921491eb23ee2f24518cfe32f6d2247 100644 --- a/gcc/ira-costs.c +++ b/gcc/ira-costs.c @@ -781,7 +781,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops, case CT_SPECIAL_MEMORY: insn_allows_mem[i] = allows_mem[i] = 1; - if (MEM_P (op) && constraint_satisfied_p (op, cn)) + if (MEM_P (extract_mem_from_operand (op)) + && constraint_satisfied_p (op, cn)) win = 1; break; @@ -1397,15 +1398,16 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref) commutative. */ for (i = 0; i < recog_data.n_operands; i++) { + rtx op_mem = extract_mem_from_operand (recog_data.operand[i]); memcpy (op_costs[i], init_cost, struct_costs_size); if (GET_CODE (recog_data.operand[i]) == SUBREG) recog_data.operand[i] = SUBREG_REG (recog_data.operand[i]); - if (MEM_P (recog_data.operand[i])) - record_address_regs (GET_MODE (recog_data.operand[i]), - MEM_ADDR_SPACE (recog_data.operand[i]), - XEXP (recog_data.operand[i], 0), + if (MEM_P (op_mem)) + record_address_regs (GET_MODE (op_mem), + MEM_ADDR_SPACE (op_mem), + XEXP (op_mem, 0), 0, MEM, SCRATCH, frequency * 2); else if (constraints[i][0] == 'p' || (insn_extra_address_constraint @@ -2326,7 +2328,7 @@ ira_tune_allocno_costs (void) { int j, n, regno; int cost, min_cost, *reg_costs; - enum reg_class aclass, rclass; + enum reg_class aclass; machine_mode mode; ira_allocno_t a; ira_allocno_iterator ai; @@ -2365,12 +2367,9 @@ ira_tune_allocno_costs (void) } if (skip_p) continue; - rclass = REGNO_REG_CLASS (regno); cost = 0; if (ira_need_caller_save_p (a, regno)) - cost += (ALLOCNO_CALL_FREQ (a) - * (ira_memory_move_cost[mode][rclass][0] - + ira_memory_move_cost[mode][rclass][1])); + cost += ira_caller_save_cost (a); #ifdef IRA_HARD_REGNO_ADD_COST_MULTIPLIER cost += ((ira_memory_move_cost[mode][rclass][0] + ira_memory_move_cost[mode][rclass][1]) diff --git a/gcc/ira-int.h b/gcc/ira-int.h index 4bee4eec66eee26951e888e193d5adeb1e559c63..121147cbb1e915f2ed35d3b0886316de74a9e3f2 100644 --- a/gcc/ira-int.h +++ b/gcc/ira-int.h @@ -314,6 +314,13 @@ struct ira_allocno vector where a bit with given index represents allocno with the same number. */ unsigned int conflict_vec_p : 1; + /* True if the parent loop has an allocno for the same register and + if the parent allocno's assignment might not be valid in this loop. + This means that we cannot merge this allocno and the parent allocno + together. + + This is only ever true for non-cap allocnos. */ + unsigned int might_conflict_with_parent_p : 1; /* Hard register assigned to given allocno. Negative value means that memory was allocated to the allocno. During the reload, spilled allocno has value equal to the corresponding stack slot @@ -423,6 +430,8 @@ struct ira_allocno #define ALLOCNO_CAP_MEMBER(A) ((A)->cap_member) #define ALLOCNO_NREFS(A) ((A)->nrefs) #define ALLOCNO_FREQ(A) ((A)->freq) +#define ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P(A) \ + ((A)->might_conflict_with_parent_p) #define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno) #define ALLOCNO_CALL_FREQ(A) ((A)->call_freq) #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num) @@ -1058,6 +1067,7 @@ extern void ira_debug_conflicts (bool); extern void ira_build_conflicts (void); /* ira-color.c */ +extern ira_allocno_t ira_soft_conflict (ira_allocno_t, ira_allocno_t); extern void ira_debug_hard_regs_forest (void); extern int ira_loop_edge_freq (ira_loop_tree_node_t, int, bool); extern void ira_reassign_conflict_allocnos (int); @@ -1539,4 +1549,155 @@ ira_need_caller_save_p (ira_allocno_t a, unsigned int regno) ALLOCNO_MODE (a), regno); } +/* Represents the boundary between an allocno in one loop and its parent + allocno in the enclosing loop. It is usually possible to change a + register's allocation on this boundary; the class provides routines + for calculating the cost of such changes. */ +class ira_loop_border_costs +{ +public: + ira_loop_border_costs (ira_allocno_t); + + int move_between_loops_cost () const; + int spill_outside_loop_cost () const; + int spill_inside_loop_cost () const; + +private: + /* The mode and class of the child allocno. */ + machine_mode m_mode; + reg_class m_class; + + /* Sums the frequencies of the entry edges and the exit edges. */ + int m_entry_freq, m_exit_freq; +}; + +/* Return the cost of storing the register on entry to the loop and + loading it back on exit from the loop. This is the cost to use if + the register is spilled within the loop but is successfully allocated + in the parent loop. */ +inline int +ira_loop_border_costs::spill_inside_loop_cost () const +{ + return (m_entry_freq * ira_memory_move_cost[m_mode][m_class][0] + + m_exit_freq * ira_memory_move_cost[m_mode][m_class][1]); +} + +/* Return the cost of loading the register on entry to the loop and + storing it back on exit from the loop. This is the cost to use if + the register is successfully allocated within the loop but is spilled + in the parent loop. */ +inline int +ira_loop_border_costs::spill_outside_loop_cost () const +{ + return (m_entry_freq * ira_memory_move_cost[m_mode][m_class][1] + + m_exit_freq * ira_memory_move_cost[m_mode][m_class][0]); +} + +/* Return the cost of moving the pseudo register between different hard + registers on entry and exit from the loop. This is the cost to use + if the register is successfully allocated within both this loop and + the parent loop, but the allocations for the loops differ. */ +inline int +ira_loop_border_costs::move_between_loops_cost () const +{ + ira_init_register_move_cost_if_necessary (m_mode); + auto move_cost = ira_register_move_cost[m_mode][m_class][m_class]; + return move_cost * (m_entry_freq + m_exit_freq); +} + +/* Return true if subloops that contain allocnos for A's register can + use a different assignment from A. ALLOCATED_P is true for the case + in which allocation succeeded for A. */ +inline bool +ira_subloop_allocnos_can_differ_p (ira_allocno_t a, bool allocated_p = true) +{ + auto regno = ALLOCNO_REGNO (a); + + if (pic_offset_table_rtx != NULL + && regno == (int) REGNO (pic_offset_table_rtx)) + return false; + + ira_assert (regno < ira_reg_equiv_len); + if (ira_equiv_no_lvalue_p (regno)) + return false; + + /* Avoid overlapping multi-registers. Moves between them might result + in wrong code generation. */ + if (allocated_p) + { + auto pclass = ira_pressure_class_translate[ALLOCNO_CLASS (a)]; + if (ira_reg_class_max_nregs[pclass][ALLOCNO_MODE (a)] > 1) + return false; + } + + return true; +} + +/* Return true if we should treat A and SUBLOOP_A as belonging to a + single region. */ +inline bool +ira_single_region_allocno_p (ira_allocno_t a, ira_allocno_t subloop_a) +{ + if (flag_ira_region != IRA_REGION_MIXED) + return false; + + if (ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (subloop_a)) + return false; + + auto rclass = ALLOCNO_CLASS (a); + auto pclass = ira_pressure_class_translate[rclass]; + auto loop_used_regs = ALLOCNO_LOOP_TREE_NODE (a)->reg_pressure[pclass]; + return loop_used_regs <= ira_class_hard_regs_num[pclass]; +} + +/* Return the set of all hard registers that conflict with A. */ +inline HARD_REG_SET +ira_total_conflict_hard_regs (ira_allocno_t a) +{ + auto obj_0 = ALLOCNO_OBJECT (a, 0); + HARD_REG_SET conflicts = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj_0); + for (int i = 1; i < ALLOCNO_NUM_OBJECTS (a); i++) + conflicts |= OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i)); + return conflicts; +} + +/* Return the cost of saving a caller-saved register before each call + in A's live range and restoring the same register after each call. */ +inline int +ira_caller_save_cost (ira_allocno_t a) +{ + auto mode = ALLOCNO_MODE (a); + auto rclass = ALLOCNO_CLASS (a); + return (ALLOCNO_CALL_FREQ (a) + * (ira_memory_move_cost[mode][rclass][0] + + ira_memory_move_cost[mode][rclass][1])); +} + +/* A and SUBLOOP_A are allocnos for the same pseudo register, with A's + loop immediately enclosing SUBLOOP_A's loop. If we allocate to A a + hard register R that is clobbered by a call in SUBLOOP_A, decide + which of the following approaches should be used for handling the + conflict: + + (1) Spill R on entry to SUBLOOP_A's loop, assign memory to SUBLOOP_A, + and restore R on exit from SUBLOOP_A's loop. + + (2) Spill R before each necessary call in SUBLOOP_A's live range and + restore R after each such call. + + Return true if (1) is better than (2). SPILL_COST is the cost of + doing (1). */ +inline bool +ira_caller_save_loop_spill_p (ira_allocno_t a, ira_allocno_t subloop_a, + int spill_cost) +{ + if (!ira_subloop_allocnos_can_differ_p (a)) + return false; + + /* Calculate the cost of saving a call-clobbered register + before each call and restoring it afterwards. */ + int call_cost = ira_caller_save_cost (subloop_a); + return call_cost && call_cost >= spill_cost; +} + #endif /* GCC_IRA_INT_H */ diff --git a/gcc/ira.c b/gcc/ira.c index 681ec2f46f9d9c57b47ed740f6fbe78fb617216f..b7551c1c4e94d3dad3ec604f8ab820e90167a46d 100644 --- a/gcc/ira.c +++ b/gcc/ira.c @@ -1845,6 +1845,7 @@ ira_setup_alts (rtx_insn *insn) default: { enum constraint_num cn = lookup_constraint (p); + rtx mem = NULL; switch (get_constraint_type (cn)) { case CT_REGISTER: @@ -1867,8 +1868,12 @@ ira_setup_alts (rtx_insn *insn) goto op_success; case CT_MEMORY: + mem = op; + /* Fall through. */ case CT_SPECIAL_MEMORY: - if (MEM_P (op)) + if (!mem) + mem = extract_mem_from_operand (op); + if (MEM_P (mem)) goto op_success; win_p = true; break; @@ -2526,6 +2531,8 @@ check_allocation (void) int conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a); if (conflict_hard_regno < 0) continue; + if (ira_soft_conflict (a, conflict_a)) + continue; conflict_nregs = hard_regno_nregs (conflict_hard_regno, ALLOCNO_MODE (conflict_a)); @@ -5130,8 +5137,201 @@ move_unallocated_pseudos (void) INSN_UID (newinsn), i); SET_REG_N_REFS (i, 0); } + + first_moveable_pseudo = last_moveable_pseudo = 0; } + + +/* Code dealing with scratches (changing them onto + pseudos and restoring them from the pseudos). + + We change scratches into pseudos at the beginning of IRA to + simplify dealing with them (conflicts, hard register assignments). + + If the pseudo denoting scratch was spilled it means that we do not + need a hard register for it. Such pseudos are transformed back to + scratches at the end of LRA. */ + +/* Description of location of a former scratch operand. */ +struct sloc +{ + rtx_insn *insn; /* Insn where the scratch was. */ + int nop; /* Number of the operand which was a scratch. */ + unsigned regno; /* regno gnerated instead of scratch */ + int icode; /* Original icode from which scratch was removed. */ +}; + +typedef struct sloc *sloc_t; + +/* Locations of the former scratches. */ +static vec scratches; + +/* Bitmap of scratch regnos. */ +static bitmap_head scratch_bitmap; + +/* Bitmap of scratch operands. */ +static bitmap_head scratch_operand_bitmap; + +/* Return true if pseudo REGNO is made of SCRATCH. */ +bool +ira_former_scratch_p (int regno) +{ + return bitmap_bit_p (&scratch_bitmap, regno); +} + +/* Return true if the operand NOP of INSN is a former scratch. */ +bool +ira_former_scratch_operand_p (rtx_insn *insn, int nop) +{ + return bitmap_bit_p (&scratch_operand_bitmap, + INSN_UID (insn) * MAX_RECOG_OPERANDS + nop) != 0; +} + +/* Register operand NOP in INSN as a former scratch. It will be + changed to scratch back, if it is necessary, at the LRA end. */ +void +ira_register_new_scratch_op (rtx_insn *insn, int nop, int icode) +{ + rtx op = *recog_data.operand_loc[nop]; + sloc_t loc = XNEW (struct sloc); + ira_assert (REG_P (op)); + loc->insn = insn; + loc->nop = nop; + loc->regno = REGNO (op); + loc->icode = icode; + scratches.safe_push (loc); + bitmap_set_bit (&scratch_bitmap, REGNO (op)); + bitmap_set_bit (&scratch_operand_bitmap, + INSN_UID (insn) * MAX_RECOG_OPERANDS + nop); + add_reg_note (insn, REG_UNUSED, op); +} + +/* Return true if string STR contains constraint 'X'. */ +static bool +contains_X_constraint_p (const char *str) +{ + int c; + + while ((c = *str)) + { + str += CONSTRAINT_LEN (c, str); + if (c == 'X') return true; + } + return false; +} + +/* Change INSN's scratches into pseudos and save their location. + Return true if we changed any scratch. */ +bool +ira_remove_insn_scratches (rtx_insn *insn, bool all_p, FILE *dump_file, + rtx (*get_reg) (rtx original)) +{ + int i; + bool insn_changed_p; + rtx reg, *loc; + + extract_insn (insn); + insn_changed_p = false; + for (i = 0; i < recog_data.n_operands; i++) + { + loc = recog_data.operand_loc[i]; + if (GET_CODE (*loc) == SCRATCH && GET_MODE (*loc) != VOIDmode) + { + if (! all_p && contains_X_constraint_p (recog_data.constraints[i])) + continue; + insn_changed_p = true; + *loc = reg = get_reg (*loc); + ira_register_new_scratch_op (insn, i, INSN_CODE (insn)); + if (ira_dump_file != NULL) + fprintf (dump_file, + "Removing SCRATCH to p%u in insn #%u (nop %d)\n", + REGNO (reg), INSN_UID (insn), i); + } + } + return insn_changed_p; +} + +/* Return new register of the same mode as ORIGINAL. Used in + remove_scratches. */ +static rtx +get_scratch_reg (rtx original) +{ + return gen_reg_rtx (GET_MODE (original)); +} + +/* Change scratches into pseudos and save their location. Return true + if we changed any scratch. */ +static bool +remove_scratches (void) +{ + bool change_p = false; + basic_block bb; + rtx_insn *insn; + + scratches.create (get_max_uid ()); + bitmap_initialize (&scratch_bitmap, ®_obstack); + bitmap_initialize (&scratch_operand_bitmap, ®_obstack); + FOR_EACH_BB_FN (bb, cfun) + FOR_BB_INSNS (bb, insn) + if (INSN_P (insn) + && ira_remove_insn_scratches (insn, false, ira_dump_file, get_scratch_reg)) + { + /* Because we might use DF, we need to keep DF info up to date. */ + df_insn_rescan (insn); + change_p = true; + } + return change_p; +} + +/* Changes pseudos created by function remove_scratches onto scratches. */ +void +ira_restore_scratches (FILE *dump_file) +{ + int regno, n; + unsigned i; + rtx *op_loc; + sloc_t loc; + + for (i = 0; scratches.iterate (i, &loc); i++) + { + /* Ignore already deleted insns. */ + if (NOTE_P (loc->insn) + && NOTE_KIND (loc->insn) == NOTE_INSN_DELETED) + continue; + extract_insn (loc->insn); + if (loc->icode != INSN_CODE (loc->insn)) + { + /* The icode doesn't match, which means the insn has been modified + (e.g. register elimination). The scratch cannot be restored. */ + continue; + } + op_loc = recog_data.operand_loc[loc->nop]; + if (REG_P (*op_loc) + && ((regno = REGNO (*op_loc)) >= FIRST_PSEUDO_REGISTER) + && reg_renumber[regno] < 0) + { + /* It should be only case when scratch register with chosen + constraint 'X' did not get memory or hard register. */ + ira_assert (ira_former_scratch_p (regno)); + *op_loc = gen_rtx_SCRATCH (GET_MODE (*op_loc)); + for (n = 0; n < recog_data.n_dups; n++) + *recog_data.dup_loc[n] + = *recog_data.operand_loc[(int) recog_data.dup_num[n]]; + if (dump_file != NULL) + fprintf (dump_file, "Restoring SCRATCH in insn #%u(nop %d)\n", + INSN_UID (loc->insn), loc->nop); + } + } + for (i = 0; scratches.iterate (i, &loc); i++) + free (loc); + scratches.release (); + bitmap_clear (&scratch_bitmap); + bitmap_clear (&scratch_operand_bitmap); +} + + + /* If the backend knows where to allocate pseudos for hard register initial values, register these allocations now. */ static void @@ -5180,8 +5380,10 @@ allocate_initial_values (void) &hreg, &preg)); } } + + /* True when we use LRA instead of reload pass for the current function. */ bool ira_use_lra_p; @@ -5202,6 +5404,17 @@ ira (FILE *f) bool saved_flag_caller_saves = flag_caller_saves; enum ira_region saved_flag_ira_region = flag_ira_region; + if (flag_ira_verbose < 10) + { + internal_flag_ira_verbose = flag_ira_verbose; + ira_dump_file = f; + } + else + { + internal_flag_ira_verbose = flag_ira_verbose - 10; + ira_dump_file = stderr; + } + clear_bb_flags (); /* Determine if the current function is a leaf before running IRA @@ -5248,17 +5461,6 @@ ira (FILE *f) if (flag_caller_saves && !ira_use_lra_p) init_caller_save (); - if (flag_ira_verbose < 10) - { - internal_flag_ira_verbose = flag_ira_verbose; - ira_dump_file = f; - } - else - { - internal_flag_ira_verbose = flag_ira_verbose - 10; - ira_dump_file = stderr; - } - setup_prohibited_mode_move_regs (); decrease_live_ranges_number (); df_note_add_problem (); @@ -5303,9 +5505,6 @@ ira (FILE *f) if (warn_clobbered) generate_setjmp_warnings (); - if (resize_reg_info () && flag_ira_loop_pressure) - ira_set_pseudo_classes (true, ira_dump_file); - init_alias_analysis (); loop_optimizer_init (AVOID_CFG_MODIFICATIONS); reg_equiv = XCNEWVEC (struct equivalence, max_reg_num ()); @@ -5329,6 +5528,30 @@ ira (FILE *f) end_alias_analysis (); free (reg_equiv); + /* Once max_regno changes, we need to free and re-init/re-compute + some data structures like regstat_n_sets_and_refs and reg_info_p. */ + auto regstat_recompute_for_max_regno = []() { + regstat_free_n_sets_and_refs (); + regstat_free_ri (); + regstat_init_n_sets_and_refs (); + regstat_compute_ri (); + }; + + int max_regno_before_rm = max_reg_num (); + if (ira_use_lra_p && remove_scratches ()) + { + ira_expand_reg_equiv (); + /* For now remove_scatches is supposed to create pseudos when it + succeeds, assert this happens all the time. Once it doesn't + hold, we should guard the regstat recompute for the case + max_regno changes. */ + gcc_assert (max_regno_before_rm != max_reg_num ()); + regstat_recompute_for_max_regno (); + } + + if (resize_reg_info () && flag_ira_loop_pressure) + ira_set_pseudo_classes (true, ira_dump_file); + setup_reg_equiv (); grow_reg_equivs (); setup_reg_equiv_init (); @@ -5451,12 +5674,7 @@ ira (FILE *f) #endif if (max_regno != max_regno_before_ira) - { - regstat_free_n_sets_and_refs (); - regstat_free_ri (); - regstat_init_n_sets_and_refs (); - regstat_compute_ri (); - } + regstat_recompute_for_max_regno (); overall_cost_before = ira_overall_cost; if (! ira_conflicts_p) diff --git a/gcc/ira.h b/gcc/ira.h index 09f40ef6a78a185e3af396f2881b55d3fa51da97..c30f36aeccaee86f7a7bf0a6e724ab93c2d26a92 100644 --- a/gcc/ira.h +++ b/gcc/ira.h @@ -207,6 +207,13 @@ extern bool ira_bad_reload_regno (int, rtx, rtx); extern void ira_adjust_equiv_reg_cost (unsigned, int); +extern bool ira_former_scratch_p (int regno); +extern bool ira_former_scratch_operand_p (rtx_insn *insn, int nop); +extern void ira_register_new_scratch_op (rtx_insn *insn, int nop, int icode); +extern bool ira_remove_insn_scratches (rtx_insn *insn, bool all_p, FILE *dump_file, + rtx (*get_reg) (rtx original)); +extern void ira_restore_scratches (FILE *dump_file); + /* ira-costs.c */ extern void ira_costs_c_finalize (void); diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c index 24b9bcb11dce5b873a84ce1cd00c88d7c3ffe8b0..262683f0189f9091f30e4988e9d5e438909c4a20 100644 --- a/gcc/loop-invariant.c +++ b/gcc/loop-invariant.c @@ -1192,7 +1192,7 @@ find_invariants_bb (class loop *loop, basic_block bb, bool always_reached, /* Don't move insn of cold BB out of loop to preheader to reduce calculations and register live range in hot loop with cold BB. */ - if (!always_executed && preheader->count > bb->count) + if (!always_executed && preheader->count > bb->count && flag_lim_count_check) { if (dump_file) fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n", diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c index 7cc479b30422e8c849447b02e9478e0eb42c5c13..ce9294d71e1a68cc129fefba6bf5f855f845df2e 100644 --- a/gcc/lra-constraints.c +++ b/gcc/lra-constraints.c @@ -409,14 +409,34 @@ valid_address_p (rtx op, struct address_info *ad, return valid_address_p (ad->mode, *ad->outer, ad->as); } +/* For special_memory_operand, it could be false for MEM_P (op), + i.e. bcst_mem_operand in i386 backend. + Extract and return real memory operand or op. */ +rtx +extract_mem_from_operand (rtx op) +{ + for (rtx x = op;; x = XEXP (x, 0)) + { + if (MEM_P (x)) + return x; + if (GET_RTX_LENGTH (GET_CODE (x)) != 1 + || GET_RTX_FORMAT (GET_CODE (x))[0] != 'e') + break; + } + return op; +} + /* Return true if the eliminated form of memory reference OP satisfies extra (special) memory constraint CONSTRAINT. */ static bool satisfies_memory_constraint_p (rtx op, enum constraint_num constraint) { struct address_info ad; + rtx mem = extract_mem_from_operand (op); + if (!MEM_P (mem)) + return false; - decompose_mem_address (&ad, op); + decompose_mem_address (&ad, mem); address_eliminator eliminator (&ad); return constraint_satisfied_p (op, constraint); } @@ -2386,8 +2406,7 @@ process_alt_operands (int only_alternative) break; case CT_SPECIAL_MEMORY: - if (MEM_P (op) - && satisfies_memory_constraint_p (op, cn)) + if (satisfies_memory_constraint_p (op, cn)) win = true; else if (spilled_pseudo_p (op)) win = true; @@ -2425,7 +2444,7 @@ process_alt_operands (int only_alternative) while ((p += len), c); scratch_p = (operand_reg[nop] != NULL_RTX - && lra_former_scratch_p (REGNO (operand_reg[nop]))); + && ira_former_scratch_p (REGNO (operand_reg[nop]))); /* Record which operands fit this alternative. */ if (win) { @@ -4279,8 +4298,8 @@ curr_insn_transform (bool check_only_p) assigment pass and the scratch pseudo will be spilled. Spilled scratch pseudos are transformed back to scratches at the LRA end. */ - && lra_former_scratch_operand_p (curr_insn, i) - && lra_former_scratch_p (REGNO (op))) + && ira_former_scratch_operand_p (curr_insn, i) + && ira_former_scratch_p (REGNO (op))) { int regno = REGNO (op); lra_change_class (regno, NO_REGS, " Change to", true); @@ -4301,7 +4320,7 @@ curr_insn_transform (bool check_only_p) && goal_alt[i] != NO_REGS && REG_P (op) && (regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER && regno < new_regno_start - && ! lra_former_scratch_p (regno) + && ! ira_former_scratch_p (regno) && reg_renumber[regno] < 0 /* Check that the optional reload pseudo will be able to hold given mode value. */ diff --git a/gcc/lra-int.h b/gcc/lra-int.h index 01fcbfa2664a24eea21bb31d46c4ed18d179614c..f9e99a28baac0e3d6b2f1348670c9b33ecbc4ed1 100644 --- a/gcc/lra-int.h +++ b/gcc/lra-int.h @@ -319,9 +319,6 @@ extern struct lra_insn_reg *lra_get_insn_regs (int); extern void lra_free_copies (void); extern void lra_create_copy (int, int, int); extern lra_copy_t lra_get_copy (int); -extern bool lra_former_scratch_p (int); -extern bool lra_former_scratch_operand_p (rtx_insn *, int); -extern void lra_register_new_scratch_op (rtx_insn *, int, int); extern int lra_new_regno_start; extern int lra_constraint_new_regno_start; diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c index 09c3975bc6c1e8489838ed983cb898735ca1fae8..4b6308bc0dc69449a7d822c9cef4e78b06f24076 100644 --- a/gcc/lra-remat.c +++ b/gcc/lra-remat.c @@ -1036,12 +1036,12 @@ update_scratch_ops (rtx_insn *remat_insn) if (! REG_P (*loc)) continue; int regno = REGNO (*loc); - if (! lra_former_scratch_p (regno)) + if (! ira_former_scratch_p (regno)) continue; *loc = lra_create_new_reg (GET_MODE (*loc), *loc, lra_get_allocno_class (regno), "scratch pseudo copy"); - lra_register_new_scratch_op (remat_insn, i, id->icode); + ira_register_new_scratch_op (remat_insn, i, id->icode); } } diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c index 0caa4acd3b5c2672c4269d6c9754c2a20221be6e..8082a5b489f7abd575ac6cbcba632717207631fd 100644 --- a/gcc/lra-spills.c +++ b/gcc/lra-spills.c @@ -446,7 +446,7 @@ remove_pseudos (rtx *loc, rtx_insn *insn) it might result in an address reload for some targets. In any case we transform such pseudos not getting hard registers into scratches back. */ - && ! lra_former_scratch_p (i)) + && ! ira_former_scratch_p (i)) { if (lra_reg_info[i].nrefs == 0 && pseudo_slots[i].mem == NULL && spill_hard_reg[i] == NULL) @@ -494,7 +494,7 @@ spill_pseudos (void) for (i = FIRST_PSEUDO_REGISTER; i < regs_num; i++) { if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0 - && ! lra_former_scratch_p (i)) + && ! ira_former_scratch_p (i)) { bitmap_set_bit (spilled_pseudos, i); bitmap_ior_into (changed_insns, &lra_reg_info[i].insn_bitmap); @@ -578,7 +578,7 @@ lra_need_for_scratch_reg_p (void) for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0 - && lra_former_scratch_p (i)) + && ira_former_scratch_p (i)) return true; return false; } @@ -591,7 +591,7 @@ lra_need_for_spills_p (void) for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++) if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0 - && ! lra_former_scratch_p (i)) + && ! ira_former_scratch_p (i)) return true; return false; } @@ -612,7 +612,7 @@ lra_spill (void) for (n = 0, i = FIRST_PSEUDO_REGISTER; i < regs_num; i++) if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0 /* We do not want to assign memory for former scratches. */ - && ! lra_former_scratch_p (i)) + && ! ira_former_scratch_p (i)) pseudo_regnos[n++] = i; lra_assert (n > 0); pseudo_slots = XNEWVEC (struct pseudo_slot, regs_num); diff --git a/gcc/lra.c b/gcc/lra.c index 3543ce3993c2fd78c336c0b21c0c6dfff12a82a8..f97bb8e077b0609715becd14070af959359eeafa 100644 --- a/gcc/lra.c +++ b/gcc/lra.c @@ -160,8 +160,6 @@ static void invalidate_insn_recog_data (int); static int get_insn_freq (rtx_insn *); static void invalidate_insn_data_regno_info (lra_insn_recog_data_t, rtx_insn *, int); -static void remove_scratches_1 (rtx_insn *); - /* Expand all regno related info needed for LRA. */ static void expand_reg_data (int old) @@ -482,6 +480,8 @@ lra_emit_add (rtx x, rtx y, rtx z) /* The number of emitted reload insns so far. */ int lra_curr_reload_num; +static void remove_insn_scratches (rtx_insn *insn); + /* Emit x := y, processing special case when y = u + v or y = u + v * scale + w through emit_add (Y can be an address which is base + index reg * scale + displacement in general case). X may be used @@ -503,7 +503,7 @@ lra_emit_move (rtx x, rtx y) /* The move pattern may require scratch registers, so convert them into real registers now. */ if (insn != NULL_RTX) - remove_scratches_1 (insn); + remove_insn_scratches (insn); if (REG_P (x)) lra_reg_info[ORIGINAL_REGNO (x)].last_reload = ++lra_curr_reload_num; /* Function emit_move can create pseudos -- so expand the pseudo @@ -1988,170 +1988,35 @@ lra_substitute_pseudo_within_insn (rtx_insn *insn, int old_regno, -/* This page contains code dealing with scratches (changing them onto - pseudos and restoring them from the pseudos). - - We change scratches into pseudos at the beginning of LRA to - simplify dealing with them (conflicts, hard register assignments). - - If the pseudo denoting scratch was spilled it means that we do need - a hard register for it. Such pseudos are transformed back to - scratches at the end of LRA. */ - -/* Description of location of a former scratch operand. */ -struct sloc +/* Return new register of the same mode as ORIGINAL of class ALL_REGS. + Used in ira_remove_scratches. */ +static rtx +get_scratch_reg (rtx original) { - rtx_insn *insn; /* Insn where the scratch was. */ - int nop; /* Number of the operand which was a scratch. */ - int icode; /* Original icode from which scratch was removed. */ -}; - -typedef struct sloc *sloc_t; - -/* Locations of the former scratches. */ -static vec scratches; - -/* Bitmap of scratch regnos. */ -static bitmap_head scratch_bitmap; - -/* Bitmap of scratch operands. */ -static bitmap_head scratch_operand_bitmap; - -/* Return true if pseudo REGNO is made of SCRATCH. */ -bool -lra_former_scratch_p (int regno) -{ - return bitmap_bit_p (&scratch_bitmap, regno); + return lra_create_new_reg (GET_MODE (original), original, ALL_REGS, NULL); } -/* Return true if the operand NOP of INSN is a former scratch. */ -bool -lra_former_scratch_operand_p (rtx_insn *insn, int nop) -{ - return bitmap_bit_p (&scratch_operand_bitmap, - INSN_UID (insn) * MAX_RECOG_OPERANDS + nop) != 0; -} - -/* Register operand NOP in INSN as a former scratch. It will be - changed to scratch back, if it is necessary, at the LRA end. */ -void -lra_register_new_scratch_op (rtx_insn *insn, int nop, int icode) -{ - lra_insn_recog_data_t id = lra_get_insn_recog_data (insn); - rtx op = *id->operand_loc[nop]; - sloc_t loc = XNEW (struct sloc); - lra_assert (REG_P (op)); - loc->insn = insn; - loc->nop = nop; - loc->icode = icode; - scratches.safe_push (loc); - bitmap_set_bit (&scratch_bitmap, REGNO (op)); - bitmap_set_bit (&scratch_operand_bitmap, - INSN_UID (insn) * MAX_RECOG_OPERANDS + nop); - add_reg_note (insn, REG_UNUSED, op); -} - -/* Change INSN's scratches into pseudos and save their location. */ +/* Remove all insn scratches in INSN. */ static void -remove_scratches_1 (rtx_insn *insn) +remove_insn_scratches (rtx_insn *insn) { - int i; - bool insn_changed_p; - rtx reg; - lra_insn_recog_data_t id; - struct lra_static_insn_data *static_id; - - id = lra_get_insn_recog_data (insn); - static_id = id->insn_static_data; - insn_changed_p = false; - for (i = 0; i < static_id->n_operands; i++) - if (GET_CODE (*id->operand_loc[i]) == SCRATCH - && GET_MODE (*id->operand_loc[i]) != VOIDmode) - { - insn_changed_p = true; - *id->operand_loc[i] = reg - = lra_create_new_reg (static_id->operand[i].mode, - *id->operand_loc[i], ALL_REGS, NULL); - lra_register_new_scratch_op (insn, i, id->icode); - if (lra_dump_file != NULL) - fprintf (lra_dump_file, - "Removing SCRATCH in insn #%u (nop %d)\n", - INSN_UID (insn), i); - } - if (insn_changed_p) - /* Because we might use DF right after caller-saves sub-pass - we need to keep DF info up to date. */ + if (ira_remove_insn_scratches (insn, true, lra_dump_file, get_scratch_reg)) df_insn_rescan (insn); } -/* Change scratches into pseudos and save their location. */ +/* Remove all insn scratches in the current function. */ static void remove_scratches (void) { basic_block bb; rtx_insn *insn; - scratches.create (get_max_uid ()); - bitmap_initialize (&scratch_bitmap, ®_obstack); - bitmap_initialize (&scratch_operand_bitmap, ®_obstack); FOR_EACH_BB_FN (bb, cfun) FOR_BB_INSNS (bb, insn) - if (INSN_P (insn)) - remove_scratches_1 (insn); -} - -/* Changes pseudos created by function remove_scratches onto scratches. */ -static void -restore_scratches (void) -{ - int regno; - unsigned i; - sloc_t loc; - rtx_insn *last = NULL; - lra_insn_recog_data_t id = NULL; - - for (i = 0; scratches.iterate (i, &loc); i++) - { - /* Ignore already deleted insns. */ - if (NOTE_P (loc->insn) - && NOTE_KIND (loc->insn) == NOTE_INSN_DELETED) - continue; - if (last != loc->insn) - { - last = loc->insn; - id = lra_get_insn_recog_data (last); - } - if (loc->icode != id->icode) - { - /* The icode doesn't match, which means the insn has been modified - (e.g. register elimination). The scratch cannot be restored. */ - continue; - } - if (REG_P (*id->operand_loc[loc->nop]) - && ((regno = REGNO (*id->operand_loc[loc->nop])) - >= FIRST_PSEUDO_REGISTER) - && lra_get_regno_hard_regno (regno) < 0) - { - /* It should be only case when scratch register with chosen - constraint 'X' did not get memory or hard register. */ - lra_assert (lra_former_scratch_p (regno)); - *id->operand_loc[loc->nop] - = gen_rtx_SCRATCH (GET_MODE (*id->operand_loc[loc->nop])); - lra_update_dup (id, loc->nop); - if (lra_dump_file != NULL) - fprintf (lra_dump_file, "Restoring SCRATCH in insn #%u(nop %d)\n", - INSN_UID (loc->insn), loc->nop); - } - } - for (i = 0; scratches.iterate (i, &loc); i++) - free (loc); - scratches.release (); - bitmap_clear (&scratch_bitmap); - bitmap_clear (&scratch_operand_bitmap); + if (INSN_P (insn)) + remove_insn_scratches (insn); } - - /* Function checks RTL for correctness. If FINAL_P is true, it is done at the end of LRA and the check is more rigorous. */ static void @@ -2571,7 +2436,7 @@ lra (FILE *f) lra_bad_spill_regno_start = lra_constraint_new_regno_start; lra_assignment_iter_after_spill = 0; } - restore_scratches (); + ira_restore_scratches (lra_dump_file); lra_eliminate (true, false); lra_final_code_change (); lra_in_progress = 0; diff --git a/gcc/recog.c b/gcc/recog.c index 2720aaaac8595a723d2cf0efc4106e8860e3d405..6af343999584981874e5f70759631d64b84d1173 100644 --- a/gcc/recog.c +++ b/gcc/recog.c @@ -1778,6 +1778,7 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints) /* FALLTHRU */ default: cn = lookup_constraint (constraint); + rtx mem = NULL; switch (get_constraint_type (cn)) { case CT_REGISTER: @@ -1796,9 +1797,13 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints) break; case CT_MEMORY: + mem = op; + /* Fall through. */ case CT_SPECIAL_MEMORY: /* Every memory operand can be reloaded to fit. */ - result = result || memory_operand (op, VOIDmode); + if (!mem) + mem = extract_mem_from_operand (op); + result = result || memory_operand (mem, VOIDmode); break; case CT_ADDRESS: @@ -2584,7 +2589,9 @@ constrain_operands (int strict, alternative_mask alternatives) /* A unary operator may be accepted by the predicate, but it is irrelevant for matching constraints. */ - if (UNARY_P (op)) + /* For special_memory_operand, there could be a memory operand inside, + and it would cause a mismatch for constraint_satisfied_p. */ + if (UNARY_P (op) && op == extract_mem_from_operand (op)) op = XEXP (op, 0); if (GET_CODE (op) == SUBREG) diff --git a/gcc/rtl.h b/gcc/rtl.h index b29afca8d6b67db9870f00158a2b56e51e352a4e..35fb6ba73a02cf2a26e7b23e50ba6455983019a2 100644 --- a/gcc/rtl.h +++ b/gcc/rtl.h @@ -4323,6 +4323,7 @@ extern rtx gen_hard_reg_clobber (machine_mode, unsigned int); extern rtx get_reg_known_value (unsigned int); extern bool get_reg_known_equiv_p (unsigned int); extern rtx get_reg_base_value (unsigned int); +extern rtx extract_mem_from_operand (rtx); #ifdef STACK_REGS extern int stack_regs_mentioned (const_rtx insn); diff --git a/gcc/testsuite/gcc.target/aarch64/nospill.c b/gcc/testsuite/gcc.target/aarch64/nospill.c new file mode 100644 index 0000000000000000000000000000000000000000..968a4267e0d46192e4c59c098f787f5f4738a5e3 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/nospill.c @@ -0,0 +1,35 @@ +/* { dg-do compile } */ +/* { dg-options "-O3" } */ + +/* The pseudo for P is marked as moveable in the IRA pass. */ +float +func_0 (float a, float b, float c) +{ + float p = c / a; + + if (b > 1) + { + b /= p; + if (c > 2) + a /= 3; + } + + return b / c * a; +} + +/* If first_moveable_pseudo and last_moveable_pseudo are not reset correctly, + they will carry over and spill the pseudo for Q. */ +float +func_1 (float a, float b, float c) +{ + float q = a + b; + + c *= a / (b + b); + if (a > 0) + c *= q; + + return a * b * c; +} + +/* We have plenty of spare registers, so check nothing has been spilled. */ +/* { dg-final { scan-assembler-not "\tstr\t" } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c new file mode 100644 index 0000000000000000000000000000000000000000..7991ba7982f767037224d9fc1998f1bb62363e38 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c @@ -0,0 +1,47 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -favoid-propagating-conflicts" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define PROB 0.1 + +struct L +{ + int data; + volatile struct L *next; + volatile struct L *inner; +}; + +/* The thing we're testing here is that the !head->inner path of the outer loop + body has no stack accesses. It's possible that we'll need to update this + pattern for unrelated code changes. but the test should be XFAILed rather + than changed if any new stack accesses occur on the !head->inner path. */ +/* +** foo: +** ... +** ldr (w[0-9]+), \[(x[0-9]+)\] +** add (w[0-9]+), (?:\3, \1|\1, \3) +** ldr (x[0-9]+), \[\2, #?16\] +** str \3, \[\2\] +** ldr \2, \[\2, #?8\] +** cbn?z \4, .* +** ... +** ret +*/ +void +foo (volatile struct L *head, int inc) +{ + while (head) + { + inc = head->data + inc; + volatile struct L *inner = head->inner; + head->data = inc; + head = head->next; + if (__builtin_expect_with_probability (inner != 0, 0, PROB)) + for (int i = 0; i < 1000; ++i) + /* Leave x30 for i. */ + asm volatile ("// foo" ::: + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + } +} diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c new file mode 100644 index 0000000000000000000000000000000000000000..ae5910ae35aa1dc48877d96ea52516b794b71ecf --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c @@ -0,0 +1,65 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -favoid-propagating-conflicts" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define PROB 0.1 + +struct L +{ + int data; + volatile struct L *next; + volatile struct L *inner; +}; + +void ext(); + +/* The thing we're testing here is that the !head->inner path of the outer loop + body has no stack accesses. It's possible that we'll need to update this + pattern for unrelated code changes. but the test should be XFAILed rather + than changed if any new stack accesses creep into the !head->inner path. */ +/* +** foo: +** ... +** ldr (w[0-9]+), \[(x[0-9]+)\] +** add (w[0-9]+), (?:\3, \1|\1, \3) +** ldr (x[0-9]+), \[\2, #?16\] +** str \3, \[\2\] +** ldr \2, \[\2, #?8\] +** cbn?z \4, .* +** ... +** ret +*/ +void +foo (volatile struct L *head, int inc, double *ptr) +{ + double d = *ptr; + while (head) + { + /* Clobber all call-preserved GPRs, so that the loop has to use + call-clobbered GPRs if it is to avoid spilling. */ + asm volatile ("" ::: + "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + inc = head->data + inc; + volatile struct L *inner = head->inner; + head->data = inc; + head = head->next; + if (__builtin_expect_with_probability (inner != 0, 0, PROB)) + for (int i = 0; i < 1000; ++i) + { + ext (); + /* Hack to create high register pressure, so that IRA doesn't + collapse this loop into the parent loop. */ + d += 1; + asm volatile ("// foo" ::: + "d0", "d1", "d2", "d3", + "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", + "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", + "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", + "d28", "d29", "d30", "d31"); + } + } + *ptr = d; +} diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c new file mode 100644 index 0000000000000000000000000000000000000000..ceb6f50de2dc38c4e57f0d9d15526cd721591e74 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c @@ -0,0 +1,69 @@ +/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */ +/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */ + +#define PROB 0.1 + +struct L +{ + int data; + volatile struct L *next; + volatile struct L *inner; +}; + +/* The thing we're testing here is that the !head->inner path of the outer loop + body has no stack accesses. It's possible that we'll need to update this + pattern for unrelated code changes. but the test should be XFAILed rather + than changed if any new stack accesses occur on the !head->inner path. */ +/* +** foo: +** ... +** ldr (w[0-9]+), \[(x[0-9]+)\] +** add (w[0-9]+), (?:\3, \1|\1, \3) +** ldr (x[0-9]+), \[\2, #?16\] +** str \3, \[\2\] +** ldr \2, \[\2, #?8\] +** cbn?z \4, .* +** ... +** ret +*/ +void +foo (volatile struct L *head, int inc) +{ + while (head) + { + /* Clobber all call-preserved GPRs, so that the loop has to use + call-clobbered GPRs if it is to avoid spilling. */ + asm volatile ("" ::: + "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + inc = head->data + inc; + volatile struct L *inner = head->inner; + head->data = inc; + head = head->next; + if (__builtin_expect_with_probability (inner != 0, 0, PROB)) + for (int i = 0; i < 1000; ++i) + asm volatile ("" :: /* example allocation: */ + "r" (i), /* x0 */ + "r" (inner), /* x1 */ + "r" (inner->next), /* x2 */ + "r" (inner->next), /* x3 */ + "r" (inner->next), /* x4 */ + "r" (inner->next), /* x5 */ + "r" (inner->next), /* x6 */ + "r" (inner->next), /* x7 */ + "r" (inner->next), /* x8 */ + "r" (inner->next), /* x9 */ + "r" (inner->next), /* x10 */ + "r" (inner->next), /* x11 */ + "r" (inner->next), /* x12 */ + "r" (inner->next), /* x13 */ + "r" (inner->next), /* x14 */ + "r" (inner->next), /* x15 */ + "r" (inner->next), /* x16 */ + "r" (inner->next), /* x17 */ + "r" (inner->next), /* x18 */ + "r" (inner->next) : /* x30 */ + "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28"); + } +} diff --git a/gcc/testsuite/gcc.target/i386/pr97540.c b/gcc/testsuite/gcc.target/i386/pr97540.c new file mode 100644 index 0000000000000000000000000000000000000000..20f8717372cb4be2f0eae3b94e9580f5ac830cde --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr97540.c @@ -0,0 +1,6 @@ +/* { dg-do compile } */ +/* { dg-options "-O2" } */ + +int mt7615_add_interface_dev_0; +int ffs(int x) { asm("" : : "rm"(x)); } +int mt7615_add_interface() { ffs(~mt7615_add_interface_dev_0); }