diff --git a/gcc/common.opt b/gcc/common.opt
index b5ea3c7a12793e4f6e866a7f90b0f140cb84a797..5e6e32b8f4affee4cc1c657deec227cde40f3378 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -3511,4 +3511,12 @@ fipa-ra
 Common Report Var(flag_ipa_ra) Optimization
 Use caller save register across calls if possible.
 
+flim-count-check
+Common Report Var(flag_lim_count_check) Init(0) Optimization
+Limit count check in loop-invariant.
+
+favoid-propagating-conflicts
+Common Report Var(flag_avoid_propagating_conflicts) Init(0) Optimization
+Avoid propagating conficts in ira.
+
 ; This comment is to ensure we retain the blank line above.
diff --git a/gcc/ira-build.c b/gcc/ira-build.c
index 0bbdb4d0c4b93fe55c9b2c6f1636074210ec21d3..734e89fe8702b3cf339a77cd82c7cbec6421865e 100644
--- a/gcc/ira-build.c
+++ b/gcc/ira-build.c
@@ -499,6 +499,7 @@ ira_create_allocno (int regno, bool cap_p,
   bitmap_set_bit (loop_tree_node->all_allocnos, ALLOCNO_NUM (a));
   ALLOCNO_NREFS (a) = 0;
   ALLOCNO_FREQ (a) = 0;
+  ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = false;
   ALLOCNO_HARD_REGNO (a) = -1;
   ALLOCNO_CALL_FREQ (a) = 0;
   ALLOCNO_CALLS_CROSSED_NUM (a) = 0;
@@ -1990,6 +1991,37 @@ propagate_modified_regnos (ira_loop_tree_node_t loop_tree_node)
 		   loop_tree_node->modified_regnos);
 }
 
+/* Propagate ALLOCNO_HARD_REG_COSTS from A to PARENT_A.  Use SPILL_COST
+   as the cost of spilling a register throughout A (which we have to do
+   for PARENT_A allocations that conflict with A).  */
+static void
+ira_propagate_hard_reg_costs (ira_allocno_t parent_a, ira_allocno_t a,
+			      int spill_cost)
+{
+  HARD_REG_SET conflicts = ira_total_conflict_hard_regs (a);
+  if (ira_caller_save_loop_spill_p (parent_a, a, spill_cost))
+    conflicts |= ira_need_caller_save_regs (a);
+  conflicts &= ~ira_total_conflict_hard_regs (parent_a);
+
+  auto costs = ALLOCNO_HARD_REG_COSTS (a);
+  if (!hard_reg_set_empty_p (conflicts))
+    ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (a) = true;
+  else if (!costs)
+    return;
+
+  auto aclass = ALLOCNO_CLASS (a);
+  ira_allocate_and_set_costs (&ALLOCNO_HARD_REG_COSTS (parent_a),
+			      aclass, ALLOCNO_CLASS_COST (parent_a));
+  auto parent_costs = ALLOCNO_HARD_REG_COSTS (parent_a);
+  for (int i = 0; i < ira_class_hard_regs_num[aclass]; ++i)
+    if (TEST_HARD_REG_BIT (conflicts, ira_class_hard_regs[aclass][i]))
+      parent_costs[i] += spill_cost;
+    else if (costs)
+      /* The cost to A of allocating this register to PARENT_A can't
+	 be more than the cost of spilling the register throughout A.  */
+      parent_costs[i] += MIN (costs[i], spill_cost);
+}
+
 /* Propagate new info about allocno A (see comments about accumulated
    info in allocno definition) to the corresponding allocno on upper
    loop tree level.  So allocnos on upper levels accumulate
@@ -2018,11 +2050,37 @@ propagate_allocno_info (void)
 	  && bitmap_bit_p (ALLOCNO_LOOP_TREE_NODE (a)->border_allocnos,
 			   ALLOCNO_NUM (a)))
 	{
+
+	  int spill_cost = INT_MAX;
+    if(flag_avoid_propagating_conflicts) {
+        /* Calculate the cost of storing to memory on entry to A's loop,
+    	     referencing as memory within A's loop, and restoring from
+    	     memory on exit from A's loop.  */
+    	  ira_loop_border_costs border_costs (a);
+    	  
+    	  if (ira_subloop_allocnos_can_differ_p (parent_a))
+    	    spill_cost = (border_costs.spill_inside_loop_cost ()
+    			  + ALLOCNO_MEMORY_COST (a));
+    }
+
 	  if (! ALLOCNO_BAD_SPILL_P (a))
 	    ALLOCNO_BAD_SPILL_P (parent_a) = false;
 	  ALLOCNO_NREFS (parent_a) += ALLOCNO_NREFS (a);
 	  ALLOCNO_FREQ (parent_a) += ALLOCNO_FREQ (a);
-	  ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
+
+
+    if (flag_avoid_propagating_conflicts) {
+    	  /* If A's allocation can differ from PARENT_A's, we can if necessary
+    	     spill PARENT_A on entry to A's loop and restore it afterwards.
+    	     Doing that has cost SPILL_COST.  */
+    	  if (!ira_subloop_allocnos_can_differ_p (parent_a))
+    	    merge_hard_reg_conflicts (a, parent_a, true);
+
+     }
+
+   if (!flag_avoid_propagating_conflicts) {
+ 
+ 	  ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
 	  merge_hard_reg_conflicts (a, parent_a, true);
 	  ALLOCNO_CALLS_CROSSED_NUM (parent_a)
 	    += ALLOCNO_CALLS_CROSSED_NUM (a);
@@ -2046,7 +2104,39 @@ propagate_allocno_info (void)
 	  ALLOCNO_CLASS_COST (parent_a)
 	    += ALLOCNO_CLASS_COST (a);
 	  ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a);
+     }
+     
+     else {
+ 
+
+	  if (!ira_caller_save_loop_spill_p (parent_a, a, spill_cost))
+	    {
+	      ALLOCNO_CALL_FREQ (parent_a) += ALLOCNO_CALL_FREQ (a);
+	      ALLOCNO_CALLS_CROSSED_NUM (parent_a)
+		+= ALLOCNO_CALLS_CROSSED_NUM (a);
+	      ALLOCNO_CHEAP_CALLS_CROSSED_NUM (parent_a)
+		+= ALLOCNO_CHEAP_CALLS_CROSSED_NUM (a);
+	      ALLOCNO_CROSSED_CALLS_ABIS (parent_a)
+		|= ALLOCNO_CROSSED_CALLS_ABIS (a);
+	      ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (parent_a)
+		|= ALLOCNO_CROSSED_CALLS_CLOBBERED_REGS (a);
+	    }
+	  ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (parent_a)
+	    += ALLOCNO_EXCESS_PRESSURE_POINTS_NUM (a);
+	  aclass = ALLOCNO_CLASS (a);
+	  ira_assert (aclass == ALLOCNO_CLASS (parent_a));
+	  ira_propagate_hard_reg_costs (parent_a, a, spill_cost);
+	  ira_allocate_and_accumulate_costs
+	    (&ALLOCNO_CONFLICT_HARD_REG_COSTS (parent_a),
+	     aclass,
+	     ALLOCNO_CONFLICT_HARD_REG_COSTS (a));
+	  /* The cost to A of allocating a register to PARENT_A can't be
+	     more than the cost of spilling the register throughout A.  */
+	  ALLOCNO_CLASS_COST (parent_a)
+	    += MIN (ALLOCNO_CLASS_COST (a), spill_cost);
+	  ALLOCNO_MEMORY_COST (parent_a) += ALLOCNO_MEMORY_COST (a);
 	}
+  	}
 }
 
 /* Create allocnos corresponding to pseudo-registers in the current
diff --git a/gcc/ira-color.c b/gcc/ira-color.c
index b0fc159a84933f1b8afdd969fab4539a0e35664c..263de6fae8fcc69b814b66c6d980834d3f6e5c06 100644
--- a/gcc/ira-color.c
+++ b/gcc/ira-color.c
@@ -36,6 +36,11 @@ along with GCC; see the file COPYING3.  If not see
 #include "reload.h"
 #include "cfgloop.h"
 
+/* To prevent soft conflict detection becoming quadratic in the
+   loop depth.  Only for very pathological cases, so it hardly
+   seems worth a --param.  */
+const int max_soft_conflict_loop_depth = 64;
+
 typedef struct allocno_hard_regs *allocno_hard_regs_t;
 
 /* The structure contains information about hard registers can be
@@ -1698,6 +1703,167 @@ calculate_saved_nregs (int hard_regno, machine_mode mode)
   return nregs;
 }
 
+/* Allocnos A1 and A2 are known to conflict.  Check whether, in some loop L
+   that is either the current loop or a nested subloop, the conflict is of
+   the following form:
+
+   - One allocno (X) is a cap allocno for some non-cap allocno X2.
+
+   - X2 belongs to some loop L2.
+
+   - The other allocno (Y) is a non-cap allocno.
+
+   - Y is an ancestor of some allocno Y2 in L2.  (Note that such a Y2
+     must exist, given that X and Y conflict.)
+
+   - Y2 is not referenced in L2 (that is, ALLOCNO_NREFS (Y2) == 0).
+
+   - Y can use a different allocation from Y2.
+
+   In this case, Y's register is live across L2 but is not used within it,
+   whereas X's register is used only within L2.  The conflict is therefore
+   only "soft", in that it can easily be avoided by spilling Y2 inside L2
+   without affecting any insn references.
+
+   If the conflict does have this form, return the Y2 that would need to be
+   spilled in order to allow X and Y (and thus A1 and A2) to use the same
+   register.  Return null otherwise.  Returning null is conservatively correct;
+   any nonnnull return value is an optimization.  */
+ira_allocno_t
+ira_soft_conflict (ira_allocno_t a1, ira_allocno_t a2)
+{
+  /* Search for the loop L and its associated allocnos X and Y.  */
+  int search_depth = 0;
+  while (ALLOCNO_CAP_MEMBER (a1) && ALLOCNO_CAP_MEMBER (a2))
+    {
+      a1 = ALLOCNO_CAP_MEMBER (a1);
+      a2 = ALLOCNO_CAP_MEMBER (a2);
+      if (search_depth++ > max_soft_conflict_loop_depth)
+	return nullptr;
+    }
+  /* This must be true if A1 and A2 conflict.  */
+  ira_assert (ALLOCNO_LOOP_TREE_NODE (a1) == ALLOCNO_LOOP_TREE_NODE (a2));
+
+  /* Make A1 the cap allocno (X in the comment above) and A2 the
+     non-cap allocno (Y in the comment above).  */
+  if (ALLOCNO_CAP_MEMBER (a2))
+    std::swap (a1, a2);
+  if (!ALLOCNO_CAP_MEMBER (a1))
+    return nullptr;
+
+  /* Search for the real allocno that A1 caps (X2 in the comment above).  */
+  do
+    {
+      a1 = ALLOCNO_CAP_MEMBER (a1);
+      if (search_depth++ > max_soft_conflict_loop_depth)
+	return nullptr;
+    }
+  while (ALLOCNO_CAP_MEMBER (a1));
+
+  /* Find the associated allocno for A2 (Y2 in the comment above).  */
+  auto node = ALLOCNO_LOOP_TREE_NODE (a1);
+  auto local_a2 = node->regno_allocno_map[ALLOCNO_REGNO (a2)];
+
+  /* Find the parent of LOCAL_A2/Y2.  LOCAL_A2 must be a descendant of A2
+     for the conflict query to make sense, so this parent lookup must succeed.
+
+     If the parent allocno has no references, it is usually cheaper to
+     spill at that loop level instead.  Keep searching until we find
+     a parent allocno that does have references (but don't look past
+     the starting allocno).  */
+  ira_allocno_t local_parent_a2;
+  for (;;)
+    {
+      local_parent_a2 = ira_parent_allocno (local_a2);
+      if (local_parent_a2 == a2 || ALLOCNO_NREFS (local_parent_a2) != 0)
+	break;
+      local_a2 = local_parent_a2;
+    }
+  if (CHECKING_P)
+    {
+      /* Sanity check to make sure that the conflict we've been given
+	 makes sense.  */
+      auto test_a2 = local_parent_a2;
+      while (test_a2 != a2)
+	{
+	  test_a2 = ira_parent_allocno (test_a2);
+	  ira_assert (test_a2);
+	}
+    }
+  if (local_a2
+      && ALLOCNO_NREFS (local_a2) == 0
+      && ira_subloop_allocnos_can_differ_p (local_parent_a2))
+    return local_a2;
+  return nullptr;
+}
+
+/* The caller has decided to allocate HREGNO to A and has proved that
+   this is safe.  However, the allocation might require the kind of
+   spilling described in the comment above ira_soft_conflict.
+   The caller has recorded that:
+
+   - The allocnos in ALLOCNOS_TO_SPILL are the ones that would need
+     to be spilled to satisfy soft conflicts for at least one allocation
+     (not necessarily HREGNO).
+
+   - The soft conflicts apply only to A allocations that overlap
+     SOFT_CONFLICT_REGS.
+
+   If allocating HREGNO is subject to any soft conflicts, record the
+   subloop allocnos that need to be spilled.  */
+static void
+spill_soft_conflicts (ira_allocno_t a, bitmap allocnos_to_spill,
+		      HARD_REG_SET soft_conflict_regs, int hregno)
+{
+  auto nregs = hard_regno_nregs (hregno, ALLOCNO_MODE (a));
+  bitmap_iterator bi;
+  unsigned int i;
+  EXECUTE_IF_SET_IN_BITMAP (allocnos_to_spill, 0, i, bi)
+    {
+      /* SPILL_A needs to be spilled for at least one allocation
+	 (not necessarily this one).  */
+      auto spill_a = ira_allocnos[i];
+
+      /* Find the corresponding allocno for this loop.  */
+      auto conflict_a = spill_a;
+      do
+	{
+	  conflict_a = ira_parent_or_cap_allocno (conflict_a);
+	  ira_assert (conflict_a);
+	}
+      while (ALLOCNO_LOOP_TREE_NODE (conflict_a)->level
+	     > ALLOCNO_LOOP_TREE_NODE (a)->level);
+
+      ira_assert (ALLOCNO_LOOP_TREE_NODE (conflict_a)
+		  == ALLOCNO_LOOP_TREE_NODE (a));
+
+      if (conflict_a == a)
+	{
+	  /* SPILL_A is a descendant of A.  We don't know (and don't need
+	     to know) which cap allocnos have a soft conflict with A.
+	     All we need to do is test whether the soft conflict applies
+	     to the chosen allocation.  */
+	  if (ira_hard_reg_set_intersection_p (hregno, ALLOCNO_MODE (a),
+					       soft_conflict_regs))
+	    ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true;
+	}
+      else
+	{
+	  /* SPILL_A is a descendant of CONFLICT_A, which has a soft conflict
+	     with A.  Test whether the soft conflict applies to the current
+	     allocation.  */
+	  ira_assert (ira_soft_conflict (a, conflict_a) == spill_a);
+	  auto conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a);
+	  ira_assert (conflict_hregno >= 0);
+	  auto conflict_nregs = hard_regno_nregs (conflict_hregno,
+						  ALLOCNO_MODE (conflict_a));
+	  if (hregno + nregs > conflict_hregno
+	      && conflict_hregno + conflict_nregs > hregno)
+	    ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (spill_a) = true;
+	}
+    }
+}
+
 /* Choose a hard register for allocno A.  If RETRY_P is TRUE, it means
    that the function called from function
    `ira_reassign_conflict_allocnos' and `allocno_reload_assign'.  In
@@ -1737,6 +1903,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
 #ifdef STACK_REGS
   bool no_stack_reg_p;
 #endif
+  auto_bitmap allocnos_to_spill;
+  HARD_REG_SET soft_conflict_regs = {};
 
   ira_assert (! ALLOCNO_ASSIGNED_P (a));
   get_conflict_and_start_profitable_regs (a, retry_p,
@@ -1824,23 +1992,56 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
 
 		  mode = ALLOCNO_MODE (conflict_a);
 		  conflict_nregs = hard_regno_nregs (hard_regno, mode);
-		  if (conflict_nregs == n_objects && conflict_nregs > 1)
+		  auto spill_a = (retry_p
+				  ? nullptr
+				  : ira_soft_conflict (a, conflict_a));
+		  if (spill_a)
 		    {
-		      int num = OBJECT_SUBWORD (conflict_obj);
-
-		      if (REG_WORDS_BIG_ENDIAN)
-			SET_HARD_REG_BIT (conflicting_regs[word],
-					  hard_regno + n_objects - num - 1);
-		      else
-			SET_HARD_REG_BIT (conflicting_regs[word],
-					  hard_regno + num);
+		      if (bitmap_set_bit (allocnos_to_spill,
+					  ALLOCNO_NUM (spill_a)))
+			{
+			  ira_loop_border_costs border_costs (spill_a);
+			  auto cost = border_costs.spill_inside_loop_cost ();
+			  auto note_conflict = [&](int r)
+			    {
+			      SET_HARD_REG_BIT (soft_conflict_regs, r);
+			      auto hri = ira_class_hard_reg_index[aclass][r];
+			      if (hri >= 0)
+				{
+				  costs[hri] += cost;
+				  full_costs[hri] += cost;
+				}
+			    };
+			  for (int r = hard_regno;
+			       r >= 0 && (int) end_hard_regno (mode, r) > hard_regno;
+			       r--)
+			    note_conflict (r);
+			  for (int r = hard_regno + 1;
+			       r < hard_regno + conflict_nregs;
+			       r++)
+			    note_conflict (r);
+			}
 		    }
 		  else
-		    conflicting_regs[word]
-		      |= ira_reg_mode_hard_regset[hard_regno][mode];
-		  if (hard_reg_set_subset_p (profitable_hard_regs,
-					     conflicting_regs[word]))
-		    goto fail;
+		    {
+		      if (conflict_nregs == n_objects && conflict_nregs > 1)
+			{
+			  int num = OBJECT_SUBWORD (conflict_obj);
+
+			  if (REG_WORDS_BIG_ENDIAN)
+			    SET_HARD_REG_BIT (conflicting_regs[word],
+					      hard_regno + n_objects - num - 1);
+			  else
+			    SET_HARD_REG_BIT (conflicting_regs[word],
+					      hard_regno + num);
+			}
+		      else
+			conflicting_regs[word]
+			  |= ira_reg_mode_hard_regset[hard_regno][mode];
+		      if (hard_reg_set_subset_p (profitable_hard_regs,
+						 conflicting_regs[word]))
+			goto fail;
+		    }
 		}
 	    }
 	  else if (! retry_p
@@ -1951,6 +2152,8 @@ assign_hard_reg (ira_allocno_t a, bool retry_p)
     {
       for (i = hard_regno_nregs (best_hard_regno, mode) - 1; i >= 0; i--)
 	allocated_hardreg_p[best_hard_regno + i] = true;
+      spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs,
+			    best_hard_regno);
     }
   if (! retry_p)
     restore_costs_from_copies (a);
@@ -2549,13 +2752,23 @@ ira_loop_edge_freq (ira_loop_tree_node_t loop_node, int regno, bool exit_p)
   return REG_FREQ_FROM_EDGE_FREQ (freq);
 }
 
+/* Construct an object that describes the boundary between A and its
+   parent allocno.  */
+ira_loop_border_costs::ira_loop_border_costs (ira_allocno_t a)
+  : m_mode (ALLOCNO_MODE (a)),
+    m_class (ALLOCNO_CLASS (a)),
+    m_entry_freq (ira_loop_edge_freq (ALLOCNO_LOOP_TREE_NODE (a),
+				      ALLOCNO_REGNO (a), false)),
+    m_exit_freq (ira_loop_edge_freq (ALLOCNO_LOOP_TREE_NODE (a),
+				     ALLOCNO_REGNO (a), true))
+{
+}
+
 /* Calculate and return the cost of putting allocno A into memory.  */
 static int
 calculate_allocno_spill_cost (ira_allocno_t a)
 {
   int regno, cost;
-  machine_mode mode;
-  enum reg_class rclass;
   ira_allocno_t parent_allocno;
   ira_loop_tree_node_t parent_node, loop_node;
 
@@ -2568,24 +2781,12 @@ calculate_allocno_spill_cost (ira_allocno_t a)
     return cost;
   if ((parent_allocno = parent_node->regno_allocno_map[regno]) == NULL)
     return cost;
-  mode = ALLOCNO_MODE (a);
-  rclass = ALLOCNO_CLASS (a);
+  ira_loop_border_costs border_costs (a);
   if (ALLOCNO_HARD_REGNO (parent_allocno) < 0)
-    cost -= (ira_memory_move_cost[mode][rclass][0]
-	     * ira_loop_edge_freq (loop_node, regno, true)
-	     + ira_memory_move_cost[mode][rclass][1]
-	     * ira_loop_edge_freq (loop_node, regno, false));
+    cost -= border_costs.spill_outside_loop_cost ();
   else
-    {
-      ira_init_register_move_cost_if_necessary (mode);
-      cost += ((ira_memory_move_cost[mode][rclass][1]
-		* ira_loop_edge_freq (loop_node, regno, true)
-		+ ira_memory_move_cost[mode][rclass][0]
-		* ira_loop_edge_freq (loop_node, regno, false))
-	       - (ira_register_move_cost[mode][rclass][rclass]
-		  * (ira_loop_edge_freq (loop_node, regno, false)
-		     + ira_loop_edge_freq (loop_node, regno, true))));
-    }
+    cost += (border_costs.spill_inside_loop_cost ()
+	     - border_costs.move_between_loops_cost ());
   return cost;
 }
 
@@ -2948,6 +3149,8 @@ improve_allocation (void)
 	   assigning hard register to allocno A even without spilling
 	   conflicting allocnos.  */
 	continue;
+      auto_bitmap allocnos_to_spill;
+      HARD_REG_SET soft_conflict_regs = {};
       mode = ALLOCNO_MODE (a);
       nwords = ALLOCNO_NUM_OBJECTS (a);
       /* Process each allocno conflicting with A and update the cost
@@ -2973,31 +3176,49 @@ improve_allocation (void)
 	      ALLOCNO_COLOR_DATA (conflict_a)->temp = check;
 	      if ((conflict_hregno = ALLOCNO_HARD_REGNO (conflict_a)) < 0)
 		continue;
-	      spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a);
-	      k = (ira_class_hard_reg_index
-		   [ALLOCNO_CLASS (conflict_a)][conflict_hregno]);
-	      ira_assert (k >= 0);
-	      if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a))
-		  != NULL)
-		spill_cost -= allocno_costs[k];
+	      auto spill_a = ira_soft_conflict (a, conflict_a);
+	      if (spill_a)
+		{
+		  if (!bitmap_set_bit (allocnos_to_spill,
+				       ALLOCNO_NUM (spill_a)))
+		    continue;
+		  ira_loop_border_costs border_costs (spill_a);
+		  spill_cost = border_costs.spill_inside_loop_cost ();
+		}
 	      else
-		spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a);
-	      spill_cost
-		+= allocno_copy_cost_saving (conflict_a, conflict_hregno);
+		{
+		  spill_cost = ALLOCNO_UPDATED_MEMORY_COST (conflict_a);
+		  k = (ira_class_hard_reg_index
+		       [ALLOCNO_CLASS (conflict_a)][conflict_hregno]);
+		  ira_assert (k >= 0);
+		  if ((allocno_costs = ALLOCNO_HARD_REG_COSTS (conflict_a))
+		      != NULL)
+		    spill_cost -= allocno_costs[k];
+		  else
+		    spill_cost -= ALLOCNO_UPDATED_CLASS_COST (conflict_a);
+		  spill_cost
+		    += allocno_copy_cost_saving (conflict_a, conflict_hregno);
+		}
 	      conflict_nregs = hard_regno_nregs (conflict_hregno,
 						 ALLOCNO_MODE (conflict_a));
+	      auto note_conflict = [&](int r)
+		{
+		  if (check_hard_reg_p (a, r,
+					conflicting_regs, profitable_hard_regs))
+		    {
+		      if (spill_a)
+			SET_HARD_REG_BIT (soft_conflict_regs, r);
+		      costs[r] += spill_cost;
+		    }
+		};
 	      for (r = conflict_hregno;
 		   r >= 0 && (int) end_hard_regno (mode, r) > conflict_hregno;
 		   r--)
-		if (check_hard_reg_p (a, r,
-				      conflicting_regs, profitable_hard_regs))
-		  costs[r] += spill_cost;
+		note_conflict (r);
 	      for (r = conflict_hregno + 1;
 		   r < conflict_hregno + conflict_nregs;
 		   r++)
-		if (check_hard_reg_p (a, r,
-				      conflicting_regs, profitable_hard_regs))
-		  costs[r] += spill_cost;
+		note_conflict (r);
 	    }
 	}
       min_cost = INT_MAX;
@@ -3020,6 +3241,7 @@ improve_allocation (void)
 	   by spilling some conflicting allocnos does not improve the
 	   allocation cost.  */
 	continue;
+      spill_soft_conflicts (a, allocnos_to_spill, soft_conflict_regs, best);
       nregs = hard_regno_nregs (best, mode);
       /* Now spill conflicting allocnos which contain a hard register
 	 of A when we assign the best chosen hard register to it.  */
@@ -3306,7 +3528,7 @@ static void
 color_pass (ira_loop_tree_node_t loop_tree_node)
 {
   int regno, hard_regno, index = -1, n;
-  int cost, exit_freq, enter_freq;
+  int cost;
   unsigned int j;
   bitmap_iterator bi;
   machine_mode mode;
@@ -3355,10 +3577,12 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	/* Remove from processing in the next loop.  */
 	bitmap_clear_bit (consideration_allocno_bitmap, j);
 	rclass = ALLOCNO_CLASS (a);
+ 
+ 
 	pclass = ira_pressure_class_translate[rclass];
 	if (flag_ira_region == IRA_REGION_MIXED
 	    && (loop_tree_node->reg_pressure[pclass]
-		<= ira_class_hard_regs_num[pclass]))
+		<= ira_class_hard_regs_num[pclass]) && !flag_avoid_propagating_conflicts)
 	  {
 	    mode = ALLOCNO_MODE (a);
 	    hard_regno = ALLOCNO_HARD_REGNO (a);
@@ -3377,7 +3601,28 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	      update_costs_from_copies (subloop_allocno, true, true);
 	    /* We don't need updated costs anymore.  */
 	    ira_free_allocno_updated_costs (subloop_allocno);
+	  } else {
+     subloop_allocno = ALLOCNO_CAP_MEMBER (a);
+	  subloop_node = ALLOCNO_LOOP_TREE_NODE (subloop_allocno);
+     	if (ira_single_region_allocno_p (a, subloop_allocno) && flag_avoid_propagating_conflicts)
+	  {
+	    mode = ALLOCNO_MODE (a);
+	    hard_regno = ALLOCNO_HARD_REGNO (a);
+	    if (hard_regno >= 0)
+	      {
+		index = ira_class_hard_reg_index[rclass][hard_regno];
+		ira_assert (index >= 0);
+	      }
+	    regno = ALLOCNO_REGNO (a);
+	    ira_assert (!ALLOCNO_ASSIGNED_P (subloop_allocno));
+	    ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
+	    ALLOCNO_ASSIGNED_P (subloop_allocno) = true;
+	    if (hard_regno >= 0)
+	      update_costs_from_copies (subloop_allocno, true, true);
+	    /* We don't need updated costs anymore.  */
+	    ira_free_allocno_updated_costs (subloop_allocno);
 	  }
+     }
       }
   /* Update costs of the corresponding allocnos (not caps) in the
      subloops.  */
@@ -3392,7 +3637,9 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	  ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
 	  mode = ALLOCNO_MODE (a);
 	  rclass = ALLOCNO_CLASS (a);
-	  pclass = ira_pressure_class_translate[rclass];
+     
+     pclass = ira_pressure_class_translate[rclass];
+     
 	  hard_regno = ALLOCNO_HARD_REGNO (a);
 	  /* Use hard register class here.  ??? */
 	  if (hard_regno >= 0)
@@ -3409,32 +3656,16 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	  ira_assert (ALLOCNO_CLASS (subloop_allocno) == rclass);
 	  ira_assert (bitmap_bit_p (subloop_node->all_allocnos,
 				    ALLOCNO_NUM (subloop_allocno)));
-	  if ((flag_ira_region == IRA_REGION_MIXED
+if (((ira_single_region_allocno_p (a, subloop_allocno)
+	|| !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0))&&  flag_avoid_propagating_conflicts) || (!flag_avoid_propagating_conflicts && (flag_ira_region == IRA_REGION_MIXED
 	       && (loop_tree_node->reg_pressure[pclass]
 		   <= ira_class_hard_regs_num[pclass]))
-	      || (pic_offset_table_rtx != NULL
-		  && regno == (int) REGNO (pic_offset_table_rtx))
-	      /* Avoid overlapped multi-registers. Moves between them
-		 might result in wrong code generation.  */
-	      || (hard_regno >= 0
-		  && ira_reg_class_max_nregs[pclass][mode] > 1))
-	    {
-	      if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
-		{
-		  ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
-		  ALLOCNO_ASSIGNED_P (subloop_allocno) = true;
-		  if (hard_regno >= 0)
-		    update_costs_from_copies (subloop_allocno, true, true);
-		  /* We don't need updated costs anymore.  */
-		  ira_free_allocno_updated_costs (subloop_allocno);
-		}
-	      continue;
-	    }
-	  exit_freq = ira_loop_edge_freq (subloop_node, regno, true);
-	  enter_freq = ira_loop_edge_freq (subloop_node, regno, false);
-	  ira_assert (regno < ira_reg_equiv_len);
-	  if (ira_equiv_no_lvalue_p (regno))
+	      || !ira_subloop_allocnos_can_differ_p (a, hard_regno >= 0) ))
 	    {
+	      if (flag_avoid_propagating_conflicts) {
+          gcc_assert (!ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P
+  			  (subloop_allocno));
+        }
 	      if (! ALLOCNO_ASSIGNED_P (subloop_allocno))
 		{
 		  ALLOCNO_HARD_REGNO (subloop_allocno) = hard_regno;
@@ -3447,16 +3678,23 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 	    }
 	  else if (hard_regno < 0)
 	    {
+	      /* If we allocate a register to SUBLOOP_ALLOCNO, we'll need
+		 to load the register on entry to the subloop and store
+		 the register back on exit from the subloop.  This incurs
+		 a fixed cost for all registers.  Since UPDATED_MEMORY_COST
+		 is (and should only be) used relative to the register costs
+		 for the same allocno, we can subtract this shared register
+		 cost from the memory cost.  */
+	      ira_loop_border_costs border_costs (subloop_allocno);
 	      ALLOCNO_UPDATED_MEMORY_COST (subloop_allocno)
-		-= ((ira_memory_move_cost[mode][rclass][1] * enter_freq)
-		    + (ira_memory_move_cost[mode][rclass][0] * exit_freq));
+		-= border_costs.spill_outside_loop_cost ();
 	    }
 	  else
 	    {
+	      ira_loop_border_costs border_costs (subloop_allocno);
 	      aclass = ALLOCNO_CLASS (subloop_allocno);
 	      ira_init_register_move_cost_if_necessary (mode);
-	      cost = (ira_register_move_cost[mode][rclass][rclass]
-		      * (exit_freq + enter_freq));
+	      cost = border_costs.move_between_loops_cost ();
 	      ira_allocate_and_set_or_copy_costs
 		(&ALLOCNO_UPDATED_HARD_REG_COSTS (subloop_allocno), aclass,
 		 ALLOCNO_UPDATED_CLASS_COST (subloop_allocno),
@@ -3471,9 +3709,11 @@ color_pass (ira_loop_tree_node_t loop_tree_node)
 		  > ALLOCNO_UPDATED_HARD_REG_COSTS (subloop_allocno)[index])
 		ALLOCNO_UPDATED_CLASS_COST (subloop_allocno)
 		  = ALLOCNO_UPDATED_HARD_REG_COSTS (subloop_allocno)[index];
+	      /* If we spill SUBLOOP_ALLOCNO, we'll need to store HARD_REGNO
+		 on entry to the subloop and restore HARD_REGNO on exit from
+		 the subloop.  */
 	      ALLOCNO_UPDATED_MEMORY_COST (subloop_allocno)
-		+= (ira_memory_move_cost[mode][rclass][0] * enter_freq
-		    + ira_memory_move_cost[mode][rclass][1] * exit_freq);
+		+= border_costs.spill_inside_loop_cost ();
 	    }
 	}
     }
@@ -3514,7 +3754,6 @@ move_spill_restore (void)
 {
   int cost, regno, hard_regno, hard_regno2, index;
   bool changed_p;
-  int enter_freq, exit_freq;
   machine_mode mode;
   enum reg_class rclass;
   ira_allocno_t a, parent_allocno, subloop_allocno;
@@ -3562,45 +3801,88 @@ move_spill_restore (void)
 	      if (subloop_allocno == NULL)
 		continue;
 	      ira_assert (rclass == ALLOCNO_CLASS (subloop_allocno));
-	      /* We have accumulated cost.  To get the real cost of
-		 allocno usage in the loop we should subtract costs of
-		 the subloop allocnos.  */
-	      cost -= (ALLOCNO_MEMORY_COST (subloop_allocno)
+	      ira_loop_border_costs border_costs (subloop_allocno);
+
+ if (!flag_avoid_propagating_conflicts) {
+  	      cost -= (ALLOCNO_MEMORY_COST (subloop_allocno)
 		       - (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL
 			  ? ALLOCNO_CLASS_COST (subloop_allocno)
 			  : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]));
-	      exit_freq = ira_loop_edge_freq (subloop_node, regno, true);
-	      enter_freq = ira_loop_edge_freq (subloop_node, regno, false);
+	      ira_loop_border_costs border_costs (subloop_allocno);
+ } else {
+ 
+ 	      /* We have accumulated cost.  To get the real cost of
+		 allocno usage in the loop we should subtract the costs
+		 added by propagate_allocno_info for the subloop allocnos.  */
+	      int reg_cost
+		= (ALLOCNO_HARD_REG_COSTS (subloop_allocno) == NULL
+		   ? ALLOCNO_CLASS_COST (subloop_allocno)
+		   : ALLOCNO_HARD_REG_COSTS (subloop_allocno)[index]);
+
+	      int spill_cost
+		= (border_costs.spill_inside_loop_cost ()
+		   + ALLOCNO_MEMORY_COST (subloop_allocno));
+
+	      /* If HARD_REGNO conflicts with SUBLOOP_A then
+		 propagate_allocno_info will have propagated
+		 the cost of spilling HARD_REGNO in SUBLOOP_NODE.
+		 (ira_subloop_allocnos_can_differ_p must be true
+		 in that case.)  If HARD_REGNO is a caller-saved
+		 register, we might have modelled it in the same way.
+
+		 Otherwise, SPILL_COST acted as a cap on the propagated
+		 register cost, in cases where the allocations can differ.  */
+	      auto conflicts = ira_total_conflict_hard_regs (subloop_allocno);
+	      if (TEST_HARD_REG_BIT (conflicts, hard_regno)
+		  || (ira_need_caller_save_p (subloop_allocno, hard_regno)
+		      && ira_caller_save_loop_spill_p (a, subloop_allocno,
+						       spill_cost)))
+		reg_cost = spill_cost;
+	      else if (ira_subloop_allocnos_can_differ_p (a))
+		reg_cost = MIN (reg_cost, spill_cost);
+
+	      cost -= ALLOCNO_MEMORY_COST (subloop_allocno) - reg_cost;
+ 
+ 
+ }
+
 	      if ((hard_regno2 = ALLOCNO_HARD_REGNO (subloop_allocno)) < 0)
-		cost -= (ira_memory_move_cost[mode][rclass][0] * exit_freq
-			 + ira_memory_move_cost[mode][rclass][1] * enter_freq);
+		/* The register was spilled in the subloop.  If we spill
+		   it in the outer loop too then we'll no longer need to
+		   save the register on entry to the subloop and restore
+		   the register on exit from the subloop.  */
+		cost -= border_costs.spill_inside_loop_cost ();
 	      else
 		{
-		  cost
-		    += (ira_memory_move_cost[mode][rclass][0] * exit_freq
-			+ ira_memory_move_cost[mode][rclass][1] * enter_freq);
+		  /* The register was also allocated in the subloop.  If we
+		     spill it in the outer loop then we'll need to load the
+		     register on entry to the subloop and store the register
+		     back on exit from the subloop.  */
+		  cost += border_costs.spill_outside_loop_cost ();
 		  if (hard_regno2 != hard_regno)
-		    cost -= (ira_register_move_cost[mode][rclass][rclass]
-			     * (exit_freq + enter_freq));
+		    cost -= border_costs.move_between_loops_cost ();
 		}
 	    }
 	  if ((parent = loop_node->parent) != NULL
 	      && (parent_allocno = parent->regno_allocno_map[regno]) != NULL)
 	    {
 	      ira_assert (rclass == ALLOCNO_CLASS (parent_allocno));
-	      exit_freq	= ira_loop_edge_freq (loop_node, regno, true);
-	      enter_freq = ira_loop_edge_freq (loop_node, regno, false);
+	      ira_loop_border_costs border_costs (a);
 	      if ((hard_regno2 = ALLOCNO_HARD_REGNO (parent_allocno)) < 0)
-		cost -= (ira_memory_move_cost[mode][rclass][0] * exit_freq
-			 + ira_memory_move_cost[mode][rclass][1] * enter_freq);
+		/* The register was spilled in the parent loop.  If we spill
+		   it in this loop too then we'll no longer need to load the
+		   register on entry to this loop and save the register back
+		   on exit from this loop.  */
+		cost -= border_costs.spill_outside_loop_cost ();
 	      else
 		{
-		  cost
-		    += (ira_memory_move_cost[mode][rclass][1] * exit_freq
-			+ ira_memory_move_cost[mode][rclass][0] * enter_freq);
+		  /* The register was also allocated in the parent loop.
+		     If we spill it in this loop then we'll need to save
+		     the register on entry to this loop and restore the
+		     register on exit from this loop.  */
+		  cost += border_costs.spill_inside_loop_cost ();
 		  if (hard_regno2 != hard_regno)
-		    cost -= (ira_register_move_cost[mode][rclass][rclass]
-			     * (exit_freq + enter_freq));
+		    cost -= border_costs.move_between_loops_cost ();
 		}
 	    }
 	  if (cost < 0)
diff --git a/gcc/ira-costs.c b/gcc/ira-costs.c
index 6891156b5aaa6cdc7d43e536a74e077e216eef0c..4ac2b7888921491eb23ee2f24518cfe32f6d2247 100644
--- a/gcc/ira-costs.c
+++ b/gcc/ira-costs.c
@@ -781,7 +781,8 @@ record_reg_classes (int n_alts, int n_ops, rtx *ops,
 
 		    case CT_SPECIAL_MEMORY:
 		      insn_allows_mem[i] = allows_mem[i] = 1;
-		      if (MEM_P (op) && constraint_satisfied_p (op, cn))
+		      if (MEM_P (extract_mem_from_operand (op))
+			  && constraint_satisfied_p (op, cn))
 			win = 1;
 		      break;
 
@@ -1397,15 +1398,16 @@ record_operand_costs (rtx_insn *insn, enum reg_class *pref)
      commutative.  */
   for (i = 0; i < recog_data.n_operands; i++)
     {
+      rtx op_mem = extract_mem_from_operand (recog_data.operand[i]);
       memcpy (op_costs[i], init_cost, struct_costs_size);
 
       if (GET_CODE (recog_data.operand[i]) == SUBREG)
 	recog_data.operand[i] = SUBREG_REG (recog_data.operand[i]);
 
-      if (MEM_P (recog_data.operand[i]))
-	record_address_regs (GET_MODE (recog_data.operand[i]),
-			     MEM_ADDR_SPACE (recog_data.operand[i]),
-			     XEXP (recog_data.operand[i], 0),
+      if (MEM_P (op_mem))
+	record_address_regs (GET_MODE (op_mem),
+			     MEM_ADDR_SPACE (op_mem),
+			     XEXP (op_mem, 0),
 			     0, MEM, SCRATCH, frequency * 2);
       else if (constraints[i][0] == 'p'
 	       || (insn_extra_address_constraint
@@ -2326,7 +2328,7 @@ ira_tune_allocno_costs (void)
 {
   int j, n, regno;
   int cost, min_cost, *reg_costs;
-  enum reg_class aclass, rclass;
+  enum reg_class aclass;
   machine_mode mode;
   ira_allocno_t a;
   ira_allocno_iterator ai;
@@ -2365,12 +2367,9 @@ ira_tune_allocno_costs (void)
 		}
 	      if (skip_p)
 		continue;
-	      rclass = REGNO_REG_CLASS (regno);
 	      cost = 0;
 	      if (ira_need_caller_save_p (a, regno))
-		cost += (ALLOCNO_CALL_FREQ (a)
-			 * (ira_memory_move_cost[mode][rclass][0]
-			    + ira_memory_move_cost[mode][rclass][1]));
+		cost += ira_caller_save_cost (a);
 #ifdef IRA_HARD_REGNO_ADD_COST_MULTIPLIER
 	      cost += ((ira_memory_move_cost[mode][rclass][0]
 			+ ira_memory_move_cost[mode][rclass][1])
diff --git a/gcc/ira-int.h b/gcc/ira-int.h
index 4bee4eec66eee26951e888e193d5adeb1e559c63..121147cbb1e915f2ed35d3b0886316de74a9e3f2 100644
--- a/gcc/ira-int.h
+++ b/gcc/ira-int.h
@@ -314,6 +314,13 @@ struct ira_allocno
      vector where a bit with given index represents allocno with the
      same number.  */
   unsigned int conflict_vec_p : 1;
+  /* True if the parent loop has an allocno for the same register and
+     if the parent allocno's assignment might not be valid in this loop.
+     This means that we cannot merge this allocno and the parent allocno
+     together.
+
+     This is only ever true for non-cap allocnos.  */
+  unsigned int might_conflict_with_parent_p : 1;
   /* Hard register assigned to given allocno.  Negative value means
      that memory was allocated to the allocno.  During the reload,
      spilled allocno has value equal to the corresponding stack slot
@@ -423,6 +430,8 @@ struct ira_allocno
 #define ALLOCNO_CAP_MEMBER(A) ((A)->cap_member)
 #define ALLOCNO_NREFS(A) ((A)->nrefs)
 #define ALLOCNO_FREQ(A) ((A)->freq)
+#define ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P(A) \
+  ((A)->might_conflict_with_parent_p)
 #define ALLOCNO_HARD_REGNO(A) ((A)->hard_regno)
 #define ALLOCNO_CALL_FREQ(A) ((A)->call_freq)
 #define ALLOCNO_CALLS_CROSSED_NUM(A) ((A)->calls_crossed_num)
@@ -1058,6 +1067,7 @@ extern void ira_debug_conflicts (bool);
 extern void ira_build_conflicts (void);
 
 /* ira-color.c */
+extern ira_allocno_t ira_soft_conflict (ira_allocno_t, ira_allocno_t);
 extern void ira_debug_hard_regs_forest (void);
 extern int ira_loop_edge_freq (ira_loop_tree_node_t, int, bool);
 extern void ira_reassign_conflict_allocnos (int);
@@ -1539,4 +1549,155 @@ ira_need_caller_save_p (ira_allocno_t a, unsigned int regno)
 				     ALLOCNO_MODE (a), regno);
 }
 
+/* Represents the boundary between an allocno in one loop and its parent
+   allocno in the enclosing loop.  It is usually possible to change a
+   register's allocation on this boundary; the class provides routines
+   for calculating the cost of such changes.  */
+class ira_loop_border_costs
+{
+public:
+  ira_loop_border_costs (ira_allocno_t);
+
+  int move_between_loops_cost () const;
+  int spill_outside_loop_cost () const;
+  int spill_inside_loop_cost () const;
+
+private:
+  /* The mode and class of the child allocno.  */
+  machine_mode m_mode;
+  reg_class m_class;
+
+  /* Sums the frequencies of the entry edges and the exit edges.  */
+  int m_entry_freq, m_exit_freq;
+};
+
+/* Return the cost of storing the register on entry to the loop and
+   loading it back on exit from the loop.  This is the cost to use if
+   the register is spilled within the loop but is successfully allocated
+   in the parent loop.  */
+inline int
+ira_loop_border_costs::spill_inside_loop_cost () const
+{
+  return (m_entry_freq * ira_memory_move_cost[m_mode][m_class][0]
+	  + m_exit_freq * ira_memory_move_cost[m_mode][m_class][1]);
+}
+
+/* Return the cost of loading the register on entry to the loop and
+   storing it back on exit from the loop.  This is the cost to use if
+   the register is successfully allocated within the loop but is spilled
+   in the parent loop.  */
+inline int
+ira_loop_border_costs::spill_outside_loop_cost () const
+{
+  return (m_entry_freq * ira_memory_move_cost[m_mode][m_class][1]
+	  + m_exit_freq * ira_memory_move_cost[m_mode][m_class][0]);
+}
+
+/* Return the cost of moving the pseudo register between different hard
+   registers on entry and exit from the loop.  This is the cost to use
+   if the register is successfully allocated within both this loop and
+   the parent loop, but the allocations for the loops differ.  */
+inline int
+ira_loop_border_costs::move_between_loops_cost () const
+{
+  ira_init_register_move_cost_if_necessary (m_mode);
+  auto move_cost = ira_register_move_cost[m_mode][m_class][m_class];
+  return move_cost * (m_entry_freq + m_exit_freq);
+}
+
+/* Return true if subloops that contain allocnos for A's register can
+   use a different assignment from A.  ALLOCATED_P is true for the case
+   in which allocation succeeded for A.  */
+inline bool
+ira_subloop_allocnos_can_differ_p (ira_allocno_t a, bool allocated_p = true)
+{
+  auto regno = ALLOCNO_REGNO (a);
+
+  if (pic_offset_table_rtx != NULL
+      && regno == (int) REGNO (pic_offset_table_rtx))
+    return false;
+
+  ira_assert (regno < ira_reg_equiv_len);
+  if (ira_equiv_no_lvalue_p (regno))
+    return false;
+
+  /* Avoid overlapping multi-registers.  Moves between them might result
+     in wrong code generation.  */
+  if (allocated_p)
+    {
+      auto pclass = ira_pressure_class_translate[ALLOCNO_CLASS (a)];
+      if (ira_reg_class_max_nregs[pclass][ALLOCNO_MODE (a)] > 1)
+	return false;
+    }
+
+  return true;
+}
+
+/* Return true if we should treat A and SUBLOOP_A as belonging to a
+   single region.  */
+inline bool
+ira_single_region_allocno_p (ira_allocno_t a, ira_allocno_t subloop_a)
+{
+  if (flag_ira_region != IRA_REGION_MIXED)
+    return false;
+
+  if (ALLOCNO_MIGHT_CONFLICT_WITH_PARENT_P (subloop_a))
+    return false;
+
+  auto rclass = ALLOCNO_CLASS (a);
+  auto pclass = ira_pressure_class_translate[rclass];
+  auto loop_used_regs = ALLOCNO_LOOP_TREE_NODE (a)->reg_pressure[pclass];
+  return loop_used_regs <= ira_class_hard_regs_num[pclass];
+}
+
+/* Return the set of all hard registers that conflict with A.  */
+inline HARD_REG_SET
+ira_total_conflict_hard_regs (ira_allocno_t a)
+{
+  auto obj_0 = ALLOCNO_OBJECT (a, 0);
+  HARD_REG_SET conflicts = OBJECT_TOTAL_CONFLICT_HARD_REGS (obj_0);
+  for (int i = 1; i < ALLOCNO_NUM_OBJECTS (a); i++)
+    conflicts |= OBJECT_TOTAL_CONFLICT_HARD_REGS (ALLOCNO_OBJECT (a, i));
+  return conflicts;
+}
+
+/* Return the cost of saving a caller-saved register before each call
+   in A's live range and restoring the same register after each call.  */
+inline int
+ira_caller_save_cost (ira_allocno_t a)
+{
+  auto mode = ALLOCNO_MODE (a);
+  auto rclass = ALLOCNO_CLASS (a);
+  return (ALLOCNO_CALL_FREQ (a)
+	  * (ira_memory_move_cost[mode][rclass][0]
+	     + ira_memory_move_cost[mode][rclass][1]));
+}
+
+/* A and SUBLOOP_A are allocnos for the same pseudo register, with A's
+   loop immediately enclosing SUBLOOP_A's loop.  If we allocate to A a
+   hard register R that is clobbered by a call in SUBLOOP_A, decide
+   which of the following approaches should be used for handling the
+   conflict:
+
+   (1) Spill R on entry to SUBLOOP_A's loop, assign memory to SUBLOOP_A,
+       and restore R on exit from SUBLOOP_A's loop.
+
+   (2) Spill R before each necessary call in SUBLOOP_A's live range and
+       restore R after each such call.
+
+   Return true if (1) is better than (2).  SPILL_COST is the cost of
+   doing (1).  */
+inline bool
+ira_caller_save_loop_spill_p (ira_allocno_t a, ira_allocno_t subloop_a,
+			      int spill_cost)
+{
+  if (!ira_subloop_allocnos_can_differ_p (a))
+    return false;
+
+  /* Calculate the cost of saving a call-clobbered register
+     before each call and restoring it afterwards.  */
+  int call_cost = ira_caller_save_cost (subloop_a);
+  return call_cost && call_cost >= spill_cost;
+}
+
 #endif /* GCC_IRA_INT_H */
diff --git a/gcc/ira.c b/gcc/ira.c
index 681ec2f46f9d9c57b47ed740f6fbe78fb617216f..b7551c1c4e94d3dad3ec604f8ab820e90167a46d 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -1845,6 +1845,7 @@ ira_setup_alts (rtx_insn *insn)
 		  default:
 		    {
 		      enum constraint_num cn = lookup_constraint (p);
+		      rtx mem = NULL;
 		      switch (get_constraint_type (cn))
 			{
 			case CT_REGISTER:
@@ -1867,8 +1868,12 @@ ira_setup_alts (rtx_insn *insn)
 			  goto op_success;
 
 			case CT_MEMORY:
+			  mem = op;
+			  /* Fall through.  */
 			case CT_SPECIAL_MEMORY:
-			  if (MEM_P (op))
+			  if (!mem)
+			    mem = extract_mem_from_operand (op);
+			  if (MEM_P (mem))
 			    goto op_success;
 			  win_p = true;
 			  break;
@@ -2526,6 +2531,8 @@ check_allocation (void)
 	      int conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a);
 	      if (conflict_hard_regno < 0)
 		continue;
+	      if (ira_soft_conflict (a, conflict_a))
+		continue;
 
 	      conflict_nregs = hard_regno_nregs (conflict_hard_regno,
 						 ALLOCNO_MODE (conflict_a));
@@ -5130,8 +5137,201 @@ move_unallocated_pseudos (void)
 		   INSN_UID (newinsn), i);
 	SET_REG_N_REFS (i, 0);
       }
+
+  first_moveable_pseudo = last_moveable_pseudo = 0;
 }
+
 
+
+/* Code dealing with scratches (changing them onto
+   pseudos and restoring them from the pseudos).
+
+   We change scratches into pseudos at the beginning of IRA to
+   simplify dealing with them (conflicts, hard register assignments).
+
+   If the pseudo denoting scratch was spilled it means that we do not
+   need a hard register for it.  Such pseudos are transformed back to
+   scratches at the end of LRA.  */
+
+/* Description of location of a former scratch operand.	 */
+struct sloc
+{
+  rtx_insn *insn; /* Insn where the scratch was.  */
+  int nop;  /* Number of the operand which was a scratch.  */
+  unsigned regno; /* regno gnerated instead of scratch */
+  int icode;  /* Original icode from which scratch was removed.  */
+};
+
+typedef struct sloc *sloc_t;
+
+/* Locations of the former scratches.  */
+static vec<sloc_t> scratches;
+
+/* Bitmap of scratch regnos.  */
+static bitmap_head scratch_bitmap;
+
+/* Bitmap of scratch operands.	*/
+static bitmap_head scratch_operand_bitmap;
+
+/* Return true if pseudo REGNO is made of SCRATCH.  */
+bool
+ira_former_scratch_p (int regno)
+{
+  return bitmap_bit_p (&scratch_bitmap, regno);
+}
+
+/* Return true if the operand NOP of INSN is a former scratch.	*/
+bool
+ira_former_scratch_operand_p (rtx_insn *insn, int nop)
+{
+  return bitmap_bit_p (&scratch_operand_bitmap,
+		       INSN_UID (insn) * MAX_RECOG_OPERANDS + nop) != 0;
+}
+
+/* Register operand NOP in INSN as a former scratch.  It will be
+   changed to scratch back, if it is necessary, at the LRA end.  */
+void
+ira_register_new_scratch_op (rtx_insn *insn, int nop, int icode)
+{
+  rtx op = *recog_data.operand_loc[nop];
+  sloc_t loc = XNEW (struct sloc);
+  ira_assert (REG_P (op));
+  loc->insn = insn;
+  loc->nop = nop;
+  loc->regno = REGNO (op);
+  loc->icode = icode;
+  scratches.safe_push (loc);
+  bitmap_set_bit (&scratch_bitmap, REGNO (op));
+  bitmap_set_bit (&scratch_operand_bitmap,
+		  INSN_UID (insn) * MAX_RECOG_OPERANDS + nop);
+  add_reg_note (insn, REG_UNUSED, op);
+}
+
+/* Return true if string STR contains constraint 'X'.  */
+static bool
+contains_X_constraint_p (const char *str)
+{
+  int c;
+  
+  while ((c = *str))
+    {
+      str += CONSTRAINT_LEN (c, str);
+      if (c == 'X') return true;
+    }
+  return false;
+}
+  
+/* Change INSN's scratches into pseudos and save their location.
+   Return true if we changed any scratch.  */
+bool
+ira_remove_insn_scratches (rtx_insn *insn, bool all_p, FILE *dump_file,
+			   rtx (*get_reg) (rtx original))
+{
+  int i;
+  bool insn_changed_p;
+  rtx reg, *loc;
+   
+  extract_insn (insn);
+  insn_changed_p = false;
+  for (i = 0; i < recog_data.n_operands; i++)
+    {
+      loc = recog_data.operand_loc[i];
+      if (GET_CODE (*loc) == SCRATCH && GET_MODE (*loc) != VOIDmode)
+	{
+	  if (! all_p && contains_X_constraint_p (recog_data.constraints[i]))
+	    continue;
+	  insn_changed_p = true;
+	  *loc = reg = get_reg (*loc);
+	  ira_register_new_scratch_op (insn, i, INSN_CODE (insn));
+	  if (ira_dump_file != NULL)
+	    fprintf (dump_file,
+		     "Removing SCRATCH to p%u in insn #%u (nop %d)\n",
+		     REGNO (reg), INSN_UID (insn), i);
+	}
+    }
+  return insn_changed_p;
+}
+
+/* Return new register of the same mode as ORIGINAL.  Used in
+   remove_scratches.  */
+static rtx
+get_scratch_reg (rtx original)
+{
+  return gen_reg_rtx (GET_MODE (original));
+}
+
+/* Change scratches into pseudos and save their location.  Return true
+   if we changed any scratch.  */
+static bool
+remove_scratches (void)
+{
+  bool change_p = false;
+  basic_block bb;
+  rtx_insn *insn;
+
+  scratches.create (get_max_uid ());
+  bitmap_initialize (&scratch_bitmap, &reg_obstack);
+  bitmap_initialize (&scratch_operand_bitmap, &reg_obstack);
+  FOR_EACH_BB_FN (bb, cfun)
+    FOR_BB_INSNS (bb, insn)
+    if (INSN_P (insn)
+	&& ira_remove_insn_scratches (insn, false, ira_dump_file, get_scratch_reg))
+      {
+	/* Because we might use DF, we need to keep DF info up to date.  */
+	df_insn_rescan (insn);
+	change_p = true;
+      }
+  return change_p;
+}
+
+/* Changes pseudos created by function remove_scratches onto scratches.	 */
+void
+ira_restore_scratches (FILE *dump_file)
+{
+  int regno, n;
+  unsigned i;
+  rtx *op_loc;
+  sloc_t loc;
+
+  for (i = 0; scratches.iterate (i, &loc); i++)
+    {
+      /* Ignore already deleted insns.  */
+      if (NOTE_P (loc->insn)
+	  && NOTE_KIND (loc->insn) == NOTE_INSN_DELETED)
+	continue;
+      extract_insn (loc->insn);
+      if (loc->icode != INSN_CODE (loc->insn))
+	{
+	  /* The icode doesn't match, which means the insn has been modified
+	     (e.g. register elimination).  The scratch cannot be restored.  */
+	  continue;
+	}
+      op_loc = recog_data.operand_loc[loc->nop];
+      if (REG_P (*op_loc)
+	  && ((regno = REGNO (*op_loc)) >= FIRST_PSEUDO_REGISTER)
+	  && reg_renumber[regno] < 0)
+	{
+	  /* It should be only case when scratch register with chosen
+	     constraint 'X' did not get memory or hard register.  */
+	  ira_assert (ira_former_scratch_p (regno));
+	  *op_loc = gen_rtx_SCRATCH (GET_MODE (*op_loc));
+	  for (n = 0; n < recog_data.n_dups; n++)
+	    *recog_data.dup_loc[n]
+	      = *recog_data.operand_loc[(int) recog_data.dup_num[n]];
+	  if (dump_file != NULL)
+	    fprintf (dump_file, "Restoring SCRATCH in insn #%u(nop %d)\n",
+		     INSN_UID (loc->insn), loc->nop);
+	}
+    }
+  for (i = 0; scratches.iterate (i, &loc); i++)
+    free (loc);
+  scratches.release ();
+  bitmap_clear (&scratch_bitmap);
+  bitmap_clear (&scratch_operand_bitmap);
+}
+
+
+
 /* If the backend knows where to allocate pseudos for hard
    register initial values, register these allocations now.  */
 static void
@@ -5180,8 +5380,10 @@ allocate_initial_values (void)
 						  &hreg, &preg));
     }
 }
+
 
 
+
 /* True when we use LRA instead of reload pass for the current
    function.  */
 bool ira_use_lra_p;
@@ -5202,6 +5404,17 @@ ira (FILE *f)
   bool saved_flag_caller_saves = flag_caller_saves;
   enum ira_region saved_flag_ira_region = flag_ira_region;
 
+  if (flag_ira_verbose < 10)
+    {
+      internal_flag_ira_verbose = flag_ira_verbose;
+      ira_dump_file = f;
+    }
+  else
+    {
+      internal_flag_ira_verbose = flag_ira_verbose - 10;
+      ira_dump_file = stderr;
+    }
+
   clear_bb_flags ();
 
   /* Determine if the current function is a leaf before running IRA
@@ -5248,17 +5461,6 @@ ira (FILE *f)
   if (flag_caller_saves && !ira_use_lra_p)
     init_caller_save ();
 
-  if (flag_ira_verbose < 10)
-    {
-      internal_flag_ira_verbose = flag_ira_verbose;
-      ira_dump_file = f;
-    }
-  else
-    {
-      internal_flag_ira_verbose = flag_ira_verbose - 10;
-      ira_dump_file = stderr;
-    }
-
   setup_prohibited_mode_move_regs ();
   decrease_live_ranges_number ();
   df_note_add_problem ();
@@ -5303,9 +5505,6 @@ ira (FILE *f)
   if (warn_clobbered)
     generate_setjmp_warnings ();
 
-  if (resize_reg_info () && flag_ira_loop_pressure)
-    ira_set_pseudo_classes (true, ira_dump_file);
-
   init_alias_analysis ();
   loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
   reg_equiv = XCNEWVEC (struct equivalence, max_reg_num ());
@@ -5329,6 +5528,30 @@ ira (FILE *f)
   end_alias_analysis ();
   free (reg_equiv);
 
+  /* Once max_regno changes, we need to free and re-init/re-compute
+     some data structures like regstat_n_sets_and_refs and reg_info_p.  */
+  auto regstat_recompute_for_max_regno = []() {
+    regstat_free_n_sets_and_refs ();
+    regstat_free_ri ();
+    regstat_init_n_sets_and_refs ();
+    regstat_compute_ri ();
+  };
+
+  int max_regno_before_rm = max_reg_num ();
+  if (ira_use_lra_p && remove_scratches ())
+    {
+      ira_expand_reg_equiv ();
+      /* For now remove_scatches is supposed to create pseudos when it
+	 succeeds, assert this happens all the time.  Once it doesn't
+	 hold, we should guard the regstat recompute for the case
+	 max_regno changes.  */
+      gcc_assert (max_regno_before_rm != max_reg_num ());
+      regstat_recompute_for_max_regno ();
+    }
+
+  if (resize_reg_info () && flag_ira_loop_pressure)
+    ira_set_pseudo_classes (true, ira_dump_file);
+
   setup_reg_equiv ();
   grow_reg_equivs ();
   setup_reg_equiv_init ();
@@ -5451,12 +5674,7 @@ ira (FILE *f)
 #endif
 
   if (max_regno != max_regno_before_ira)
-    {
-      regstat_free_n_sets_and_refs ();
-      regstat_free_ri ();
-      regstat_init_n_sets_and_refs ();
-      regstat_compute_ri ();
-    }
+    regstat_recompute_for_max_regno ();
 
   overall_cost_before = ira_overall_cost;
   if (! ira_conflicts_p)
diff --git a/gcc/ira.h b/gcc/ira.h
index 09f40ef6a78a185e3af396f2881b55d3fa51da97..c30f36aeccaee86f7a7bf0a6e724ab93c2d26a92 100644
--- a/gcc/ira.h
+++ b/gcc/ira.h
@@ -207,6 +207,13 @@ extern bool ira_bad_reload_regno (int, rtx, rtx);
 
 extern void ira_adjust_equiv_reg_cost (unsigned, int);
 
+extern bool ira_former_scratch_p (int regno);
+extern bool ira_former_scratch_operand_p (rtx_insn *insn, int nop);
+extern void ira_register_new_scratch_op (rtx_insn *insn, int nop, int icode);
+extern bool ira_remove_insn_scratches (rtx_insn *insn, bool all_p, FILE *dump_file,
+				       rtx (*get_reg) (rtx original));
+extern void ira_restore_scratches (FILE *dump_file);
+
 /* ira-costs.c */
 extern void ira_costs_c_finalize (void);
 
diff --git a/gcc/loop-invariant.c b/gcc/loop-invariant.c
index 24b9bcb11dce5b873a84ce1cd00c88d7c3ffe8b0..262683f0189f9091f30e4988e9d5e438909c4a20 100644
--- a/gcc/loop-invariant.c
+++ b/gcc/loop-invariant.c
@@ -1192,7 +1192,7 @@ find_invariants_bb (class loop *loop, basic_block bb, bool always_reached,
 
   /* Don't move insn of cold BB out of loop to preheader to reduce calculations
      and register live range in hot loop with cold BB.  */
-  if (!always_executed && preheader->count > bb->count)
+  if (!always_executed && preheader->count > bb->count && flag_lim_count_check)
     {
       if (dump_file)
 	fprintf (dump_file, "Don't move invariant from bb: %d out of loop %d\n",
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index 7cc479b30422e8c849447b02e9478e0eb42c5c13..ce9294d71e1a68cc129fefba6bf5f855f845df2e 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -409,14 +409,34 @@ valid_address_p (rtx op, struct address_info *ad,
   return valid_address_p (ad->mode, *ad->outer, ad->as);
 }
 
+/* For special_memory_operand, it could be false for MEM_P (op),
+   i.e. bcst_mem_operand in i386 backend.
+   Extract and return real memory operand or op.  */
+rtx
+extract_mem_from_operand (rtx op)
+{
+  for (rtx x = op;; x = XEXP (x, 0))
+    {
+      if (MEM_P (x))
+	return x;
+      if (GET_RTX_LENGTH (GET_CODE (x)) != 1
+	  || GET_RTX_FORMAT (GET_CODE (x))[0] != 'e')
+	break;
+    }
+  return op;
+}
+
 /* Return true if the eliminated form of memory reference OP satisfies
    extra (special) memory constraint CONSTRAINT.  */
 static bool
 satisfies_memory_constraint_p (rtx op, enum constraint_num constraint)
 {
   struct address_info ad;
+  rtx mem = extract_mem_from_operand (op);
+  if (!MEM_P (mem))
+    return false;
 
-  decompose_mem_address (&ad, op);
+  decompose_mem_address (&ad, mem);
   address_eliminator eliminator (&ad);
   return constraint_satisfied_p (op, constraint);
 }
@@ -2386,8 +2406,7 @@ process_alt_operands (int only_alternative)
 		      break;
 
 		    case CT_SPECIAL_MEMORY:
-		      if (MEM_P (op)
-			  && satisfies_memory_constraint_p (op, cn))
+		      if (satisfies_memory_constraint_p (op, cn))
 			win = true;
 		      else if (spilled_pseudo_p (op))
 			win = true;
@@ -2425,7 +2444,7 @@ process_alt_operands (int only_alternative)
 	  while ((p += len), c);
 
 	  scratch_p = (operand_reg[nop] != NULL_RTX
-		       && lra_former_scratch_p (REGNO (operand_reg[nop])));
+		       && ira_former_scratch_p (REGNO (operand_reg[nop])));
 	  /* Record which operands fit this alternative.  */
 	  if (win)
 	    {
@@ -4279,8 +4298,8 @@ curr_insn_transform (bool check_only_p)
 		 assigment pass and the scratch pseudo will be
 		 spilled.  Spilled scratch pseudos are transformed
 		 back to scratches at the LRA end.  */
-	      && lra_former_scratch_operand_p (curr_insn, i)
-	      && lra_former_scratch_p (REGNO (op)))
+	      && ira_former_scratch_operand_p (curr_insn, i)
+	      && ira_former_scratch_p (REGNO (op)))
 	    {
 	      int regno = REGNO (op);
 	      lra_change_class (regno, NO_REGS, "      Change to", true);
@@ -4301,7 +4320,7 @@ curr_insn_transform (bool check_only_p)
 	      && goal_alt[i] != NO_REGS && REG_P (op)
 	      && (regno = REGNO (op)) >= FIRST_PSEUDO_REGISTER
 	      && regno < new_regno_start
-	      && ! lra_former_scratch_p (regno)
+	      && ! ira_former_scratch_p (regno)
 	      && reg_renumber[regno] < 0
 	      /* Check that the optional reload pseudo will be able to
 		 hold given mode value.  */
diff --git a/gcc/lra-int.h b/gcc/lra-int.h
index 01fcbfa2664a24eea21bb31d46c4ed18d179614c..f9e99a28baac0e3d6b2f1348670c9b33ecbc4ed1 100644
--- a/gcc/lra-int.h
+++ b/gcc/lra-int.h
@@ -319,9 +319,6 @@ extern struct lra_insn_reg *lra_get_insn_regs (int);
 extern void lra_free_copies (void);
 extern void lra_create_copy (int, int, int);
 extern lra_copy_t lra_get_copy (int);
-extern bool lra_former_scratch_p (int);
-extern bool lra_former_scratch_operand_p (rtx_insn *, int);
-extern void lra_register_new_scratch_op (rtx_insn *, int, int);
 
 extern int lra_new_regno_start;
 extern int lra_constraint_new_regno_start;
diff --git a/gcc/lra-remat.c b/gcc/lra-remat.c
index 09c3975bc6c1e8489838ed983cb898735ca1fae8..4b6308bc0dc69449a7d822c9cef4e78b06f24076 100644
--- a/gcc/lra-remat.c
+++ b/gcc/lra-remat.c
@@ -1036,12 +1036,12 @@ update_scratch_ops (rtx_insn *remat_insn)
       if (! REG_P (*loc))
 	continue;
       int regno = REGNO (*loc);
-      if (! lra_former_scratch_p (regno))
+      if (! ira_former_scratch_p (regno))
 	continue;
       *loc = lra_create_new_reg (GET_MODE (*loc), *loc,
 				 lra_get_allocno_class (regno),
 				 "scratch pseudo copy");
-      lra_register_new_scratch_op (remat_insn, i, id->icode);
+      ira_register_new_scratch_op (remat_insn, i, id->icode);
     }
   
 }
diff --git a/gcc/lra-spills.c b/gcc/lra-spills.c
index 0caa4acd3b5c2672c4269d6c9754c2a20221be6e..8082a5b489f7abd575ac6cbcba632717207631fd 100644
--- a/gcc/lra-spills.c
+++ b/gcc/lra-spills.c
@@ -446,7 +446,7 @@ remove_pseudos (rtx *loc, rtx_insn *insn)
 	 it might result in an address reload for some targets.	 In
 	 any case we transform such pseudos not getting hard registers
 	 into scratches back.  */
-      && ! lra_former_scratch_p (i))
+      && ! ira_former_scratch_p (i))
     {
       if (lra_reg_info[i].nrefs == 0
 	  && pseudo_slots[i].mem == NULL && spill_hard_reg[i] == NULL)
@@ -494,7 +494,7 @@ spill_pseudos (void)
   for (i = FIRST_PSEUDO_REGISTER; i < regs_num; i++)
     {
       if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0
-	  && ! lra_former_scratch_p (i))
+	  && ! ira_former_scratch_p (i))
 	{
 	  bitmap_set_bit (spilled_pseudos, i);
 	  bitmap_ior_into (changed_insns, &lra_reg_info[i].insn_bitmap);
@@ -578,7 +578,7 @@ lra_need_for_scratch_reg_p (void)
 
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
     if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0
-	&& lra_former_scratch_p (i))
+	&& ira_former_scratch_p (i))
       return true;
   return false;
 }
@@ -591,7 +591,7 @@ lra_need_for_spills_p (void)
 
   for (i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
     if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0
-	&& ! lra_former_scratch_p (i))
+	&& ! ira_former_scratch_p (i))
       return true;
   return false;
 }
@@ -612,7 +612,7 @@ lra_spill (void)
   for (n = 0, i = FIRST_PSEUDO_REGISTER; i < regs_num; i++)
     if (lra_reg_info[i].nrefs != 0 && lra_get_regno_hard_regno (i) < 0
 	/* We do not want to assign memory for former scratches.  */
-	&& ! lra_former_scratch_p (i))
+	&& ! ira_former_scratch_p (i))
       pseudo_regnos[n++] = i;
   lra_assert (n > 0);
   pseudo_slots = XNEWVEC (struct pseudo_slot, regs_num);
diff --git a/gcc/lra.c b/gcc/lra.c
index 3543ce3993c2fd78c336c0b21c0c6dfff12a82a8..f97bb8e077b0609715becd14070af959359eeafa 100644
--- a/gcc/lra.c
+++ b/gcc/lra.c
@@ -160,8 +160,6 @@ static void invalidate_insn_recog_data (int);
 static int get_insn_freq (rtx_insn *);
 static void invalidate_insn_data_regno_info (lra_insn_recog_data_t,
 					     rtx_insn *, int);
-static void remove_scratches_1 (rtx_insn *);
-
 /* Expand all regno related info needed for LRA.  */
 static void
 expand_reg_data (int old)
@@ -482,6 +480,8 @@ lra_emit_add (rtx x, rtx y, rtx z)
 /* The number of emitted reload insns so far.  */
 int lra_curr_reload_num;
 
+static void remove_insn_scratches (rtx_insn *insn);
+
 /* Emit x := y, processing special case when y = u + v or y = u + v *
    scale + w through emit_add (Y can be an address which is base +
    index reg * scale + displacement in general case).  X may be used
@@ -503,7 +503,7 @@ lra_emit_move (rtx x, rtx y)
       /* The move pattern may require scratch registers, so convert them
 	 into real registers now.  */
       if (insn != NULL_RTX)
-	remove_scratches_1 (insn);
+	remove_insn_scratches (insn);
       if (REG_P (x))
 	lra_reg_info[ORIGINAL_REGNO (x)].last_reload = ++lra_curr_reload_num;
       /* Function emit_move can create pseudos -- so expand the pseudo
@@ -1988,170 +1988,35 @@ lra_substitute_pseudo_within_insn (rtx_insn *insn, int old_regno,
 
 
 
-/* This page contains code dealing with scratches (changing them onto
-   pseudos and restoring them from the pseudos).
-
-   We change scratches into pseudos at the beginning of LRA to
-   simplify dealing with them (conflicts, hard register assignments).
-
-   If the pseudo denoting scratch was spilled it means that we do need
-   a hard register for it.  Such pseudos are transformed back to
-   scratches at the end of LRA.	 */
-
-/* Description of location of a former scratch operand.	 */
-struct sloc
+/* Return new register of the same mode as ORIGINAL of class ALL_REGS.
+   Used in ira_remove_scratches.  */
+static rtx
+get_scratch_reg (rtx original)
 {
-  rtx_insn *insn; /* Insn where the scratch was.  */
-  int nop;  /* Number of the operand which was a scratch.  */
-  int icode;  /* Original icode from which scratch was removed.  */
-};
-
-typedef struct sloc *sloc_t;
-
-/* Locations of the former scratches.  */
-static vec<sloc_t> scratches;
-
-/* Bitmap of scratch regnos.  */
-static bitmap_head scratch_bitmap;
-
-/* Bitmap of scratch operands.	*/
-static bitmap_head scratch_operand_bitmap;
-
-/* Return true if pseudo REGNO is made of SCRATCH.  */
-bool
-lra_former_scratch_p (int regno)
-{
-  return bitmap_bit_p (&scratch_bitmap, regno);
+  return lra_create_new_reg (GET_MODE (original), original, ALL_REGS, NULL);
 }
 
-/* Return true if the operand NOP of INSN is a former scratch.	*/
-bool
-lra_former_scratch_operand_p (rtx_insn *insn, int nop)
-{
-  return bitmap_bit_p (&scratch_operand_bitmap,
-		       INSN_UID (insn) * MAX_RECOG_OPERANDS + nop) != 0;
-}
-
-/* Register operand NOP in INSN as a former scratch.  It will be
-   changed to scratch back, if it is necessary, at the LRA end.  */
-void
-lra_register_new_scratch_op (rtx_insn *insn, int nop, int icode)
-{
-  lra_insn_recog_data_t id = lra_get_insn_recog_data (insn);
-  rtx op = *id->operand_loc[nop];
-  sloc_t loc = XNEW (struct sloc);
-  lra_assert (REG_P (op));
-  loc->insn = insn;
-  loc->nop = nop;
-  loc->icode = icode;
-  scratches.safe_push (loc);
-  bitmap_set_bit (&scratch_bitmap, REGNO (op));
-  bitmap_set_bit (&scratch_operand_bitmap,
-		  INSN_UID (insn) * MAX_RECOG_OPERANDS + nop);
-  add_reg_note (insn, REG_UNUSED, op);
-}
-
-/* Change INSN's scratches into pseudos and save their location.  */
+/* Remove all insn scratches in INSN.  */
 static void
-remove_scratches_1 (rtx_insn *insn)
+remove_insn_scratches (rtx_insn *insn)
 {
-  int i;
-  bool insn_changed_p;
-  rtx reg;
-  lra_insn_recog_data_t id;
-  struct lra_static_insn_data *static_id;
-
-  id = lra_get_insn_recog_data (insn);
-  static_id = id->insn_static_data;
-  insn_changed_p = false;
-  for (i = 0; i < static_id->n_operands; i++)
-    if (GET_CODE (*id->operand_loc[i]) == SCRATCH
-	&& GET_MODE (*id->operand_loc[i]) != VOIDmode)
-      {
-	insn_changed_p = true;
-	*id->operand_loc[i] = reg
-	  = lra_create_new_reg (static_id->operand[i].mode,
-				*id->operand_loc[i], ALL_REGS, NULL);
-	lra_register_new_scratch_op (insn, i, id->icode);
-	if (lra_dump_file != NULL)
-	  fprintf (lra_dump_file,
-		   "Removing SCRATCH in insn #%u (nop %d)\n",
-		   INSN_UID (insn), i);
-      }
-  if (insn_changed_p)
-    /* Because we might use DF right after caller-saves sub-pass
-       we need to keep DF info up to date.  */
+  if (ira_remove_insn_scratches (insn, true, lra_dump_file, get_scratch_reg))
     df_insn_rescan (insn);
 }
 
-/* Change scratches into pseudos and save their location.  */
+/* Remove all insn scratches in the current function.  */
 static void
 remove_scratches (void)
 {
   basic_block bb;
   rtx_insn *insn;
 
-  scratches.create (get_max_uid ());
-  bitmap_initialize (&scratch_bitmap, &reg_obstack);
-  bitmap_initialize (&scratch_operand_bitmap, &reg_obstack);
   FOR_EACH_BB_FN (bb, cfun)
     FOR_BB_INSNS (bb, insn)
-    if (INSN_P (insn))
-      remove_scratches_1 (insn);
-}
-
-/* Changes pseudos created by function remove_scratches onto scratches.	 */
-static void
-restore_scratches (void)
-{
-  int regno;
-  unsigned i;
-  sloc_t loc;
-  rtx_insn *last = NULL;
-  lra_insn_recog_data_t id = NULL;
-
-  for (i = 0; scratches.iterate (i, &loc); i++)
-    {
-      /* Ignore already deleted insns.  */
-      if (NOTE_P (loc->insn)
-	  && NOTE_KIND (loc->insn) == NOTE_INSN_DELETED)
-	continue;
-      if (last != loc->insn)
-	{
-	  last = loc->insn;
-	  id = lra_get_insn_recog_data (last);
-	}
-      if (loc->icode != id->icode)
-	{
-	  /* The icode doesn't match, which means the insn has been modified
-	     (e.g. register elimination).  The scratch cannot be restored.  */
-	  continue;
-	}
-      if (REG_P (*id->operand_loc[loc->nop])
-	  && ((regno = REGNO (*id->operand_loc[loc->nop]))
-	      >= FIRST_PSEUDO_REGISTER)
-	  && lra_get_regno_hard_regno (regno) < 0)
-	{
-	  /* It should be only case when scratch register with chosen
-	     constraint 'X' did not get memory or hard register.  */
-	  lra_assert (lra_former_scratch_p (regno));
-	  *id->operand_loc[loc->nop]
-	    = gen_rtx_SCRATCH (GET_MODE (*id->operand_loc[loc->nop]));
-	  lra_update_dup (id, loc->nop);
-	  if (lra_dump_file != NULL)
-	    fprintf (lra_dump_file, "Restoring SCRATCH in insn #%u(nop %d)\n",
-		     INSN_UID (loc->insn), loc->nop);
-	}
-    }
-  for (i = 0; scratches.iterate (i, &loc); i++)
-    free (loc);
-  scratches.release ();
-  bitmap_clear (&scratch_bitmap);
-  bitmap_clear (&scratch_operand_bitmap);
+      if (INSN_P (insn))
+        remove_insn_scratches (insn);
 }
 
-
-
 /* Function checks RTL for correctness.	 If FINAL_P is true, it is
    done at the end of LRA and the check is more rigorous.  */
 static void
@@ -2571,7 +2436,7 @@ lra (FILE *f)
 	lra_bad_spill_regno_start = lra_constraint_new_regno_start;
       lra_assignment_iter_after_spill = 0;
     }
-  restore_scratches ();
+  ira_restore_scratches (lra_dump_file);
   lra_eliminate (true, false);
   lra_final_code_change ();
   lra_in_progress = 0;
diff --git a/gcc/recog.c b/gcc/recog.c
index 2720aaaac8595a723d2cf0efc4106e8860e3d405..6af343999584981874e5f70759631d64b84d1173 100644
--- a/gcc/recog.c
+++ b/gcc/recog.c
@@ -1778,6 +1778,7 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints)
 	  /* FALLTHRU */
 	default:
 	  cn = lookup_constraint (constraint);
+	  rtx mem = NULL;
 	  switch (get_constraint_type (cn))
 	    {
 	    case CT_REGISTER:
@@ -1796,9 +1797,13 @@ asm_operand_ok (rtx op, const char *constraint, const char **constraints)
 	      break;
 
 	    case CT_MEMORY:
+	      mem = op;
+	      /* Fall through.  */
 	    case CT_SPECIAL_MEMORY:
 	      /* Every memory operand can be reloaded to fit.  */
-	      result = result || memory_operand (op, VOIDmode);
+	      if (!mem)
+		mem = extract_mem_from_operand (op);
+	      result = result || memory_operand (mem, VOIDmode);
 	      break;
 
 	    case CT_ADDRESS:
@@ -2584,7 +2589,9 @@ constrain_operands (int strict, alternative_mask alternatives)
 
 	  /* A unary operator may be accepted by the predicate, but it
 	     is irrelevant for matching constraints.  */
-	  if (UNARY_P (op))
+	  /* For special_memory_operand, there could be a memory operand inside,
+	     and it would cause a mismatch for constraint_satisfied_p.  */
+	  if (UNARY_P (op) && op == extract_mem_from_operand (op))
 	    op = XEXP (op, 0);
 
 	  if (GET_CODE (op) == SUBREG)
diff --git a/gcc/rtl.h b/gcc/rtl.h
index b29afca8d6b67db9870f00158a2b56e51e352a4e..35fb6ba73a02cf2a26e7b23e50ba6455983019a2 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -4323,6 +4323,7 @@ extern rtx gen_hard_reg_clobber (machine_mode, unsigned int);
 extern rtx get_reg_known_value (unsigned int);
 extern bool get_reg_known_equiv_p (unsigned int);
 extern rtx get_reg_base_value (unsigned int);
+extern rtx extract_mem_from_operand (rtx);
 
 #ifdef STACK_REGS
 extern int stack_regs_mentioned (const_rtx insn);
diff --git a/gcc/testsuite/gcc.target/aarch64/nospill.c b/gcc/testsuite/gcc.target/aarch64/nospill.c
new file mode 100644
index 0000000000000000000000000000000000000000..968a4267e0d46192e4c59c098f787f5f4738a5e3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/nospill.c
@@ -0,0 +1,35 @@
+/* { dg-do compile } */
+/* { dg-options "-O3" } */
+
+/* The pseudo for P is marked as moveable in the IRA pass. */
+float
+func_0 (float a, float b, float c)
+{
+  float p = c / a;
+
+  if (b > 1)
+    {
+      b /= p;
+      if (c > 2)
+        a /= 3;
+    }
+
+  return b / c * a;
+}
+
+/* If first_moveable_pseudo and last_moveable_pseudo are not reset correctly,
+   they will carry over and spill the pseudo for Q. */
+float
+func_1 (float a, float b, float c)
+{
+  float q = a + b;
+
+  c *= a / (b + b);
+  if (a > 0)
+    c *= q;
+
+  return a * b * c;
+}
+
+/* We have plenty of spare registers, so check nothing has been spilled. */
+/* { dg-final { scan-assembler-not "\tstr\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c
new file mode 100644
index 0000000000000000000000000000000000000000..7991ba7982f767037224d9fc1998f1bb62363e38
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-2.c
@@ -0,0 +1,47 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -favoid-propagating-conflicts" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+  int data;
+  volatile struct L *next;
+  volatile struct L *inner;
+};
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+   body has no stack accesses.  It's possible that we'll need to update this
+   pattern for unrelated code changes. but the test should be XFAILed rather
+   than changed if any new stack accesses occur on the !head->inner path.  */
+/*
+** foo:
+**	...
+**	ldr	(w[0-9]+), \[(x[0-9]+)\]
+**	add	(w[0-9]+), (?:\3, \1|\1, \3)
+**	ldr	(x[0-9]+), \[\2, #?16\]
+**	str	\3, \[\2\]
+**	ldr	\2, \[\2, #?8\]
+**	cbn?z	\4, .*
+**	...
+**	ret
+*/
+void
+foo (volatile struct L *head, int inc)
+{
+  while (head)
+    {
+      inc = head->data + inc;
+      volatile struct L *inner = head->inner;
+      head->data = inc;
+      head = head->next;
+      if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+	for (int i = 0; i < 1000; ++i)
+	  /* Leave x30 for i.  */
+	  asm volatile ("// foo" :::
+			"x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
+			"x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
+			"x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23",
+			"x24", "x25", "x26", "x27", "x28");
+    }
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c
new file mode 100644
index 0000000000000000000000000000000000000000..ae5910ae35aa1dc48877d96ea52516b794b71ecf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-3.c
@@ -0,0 +1,65 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2 -favoid-propagating-conflicts" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+  int data;
+  volatile struct L *next;
+  volatile struct L *inner;
+};
+
+void ext();
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+   body has no stack accesses.  It's possible that we'll need to update this
+   pattern for unrelated code changes. but the test should be XFAILed rather
+   than changed if any new stack accesses creep into the !head->inner path.  */
+/*
+** foo:
+**	...
+**	ldr	(w[0-9]+), \[(x[0-9]+)\]
+**	add	(w[0-9]+), (?:\3, \1|\1, \3)
+**	ldr	(x[0-9]+), \[\2, #?16\]
+**	str	\3, \[\2\]
+**	ldr	\2, \[\2, #?8\]
+**	cbn?z	\4, .*
+**	...
+**	ret
+*/
+void
+foo (volatile struct L *head, int inc, double *ptr)
+{
+  double d = *ptr;
+  while (head)
+    {
+      /* Clobber all call-preserved GPRs, so that the loop has to use
+	 call-clobbered GPRs if it is to avoid spilling.  */
+      asm volatile ("" :::
+		    "x19", "x20", "x21", "x22", "x23",
+		    "x24", "x25", "x26", "x27", "x28");
+      inc = head->data + inc;
+      volatile struct L *inner = head->inner;
+      head->data = inc;
+      head = head->next;
+      if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+	for (int i = 0; i < 1000; ++i)
+	  {
+	    ext ();
+	    /* Hack to create high register pressure, so that IRA doesn't
+	       collapse this loop into the parent loop.  */
+	    d += 1;
+	    asm volatile ("// foo" :::
+			  "d0", "d1", "d2", "d3",
+			  "d4", "d5", "d6", "d7",
+			  "d8", "d9", "d10", "d11",
+			  "d12", "d13", "d14", "d15",
+			  "d16", "d17", "d18", "d19",
+			  "d20", "d21", "d22", "d23",
+			  "d24", "d25", "d26", "d27",
+			  "d28", "d29", "d30", "d31");
+	  }
+    }
+  *ptr = d;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c b/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c
new file mode 100644
index 0000000000000000000000000000000000000000..ceb6f50de2dc38c4e57f0d9d15526cd721591e74
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/reg-alloc-4.c
@@ -0,0 +1,69 @@
+/* { dg-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } */
+/* { dg-final { check-function-bodies "**" "" "" { target lp64 } } } */
+
+#define PROB 0.1
+
+struct L
+{
+  int data;
+  volatile struct L *next;
+  volatile struct L *inner;
+};
+
+/* The thing we're testing here is that the !head->inner path of the outer loop
+   body has no stack accesses.  It's possible that we'll need to update this
+   pattern for unrelated code changes. but the test should be XFAILed rather
+   than changed if any new stack accesses occur on the !head->inner path.  */
+/*
+** foo:
+**	...
+**	ldr	(w[0-9]+), \[(x[0-9]+)\]
+**	add	(w[0-9]+), (?:\3, \1|\1, \3)
+**	ldr	(x[0-9]+), \[\2, #?16\]
+**	str	\3, \[\2\]
+**	ldr	\2, \[\2, #?8\]
+**	cbn?z	\4, .*
+**	...
+**	ret
+*/
+void
+foo (volatile struct L *head, int inc)
+{
+  while (head)
+    {
+      /* Clobber all call-preserved GPRs, so that the loop has to use
+	 call-clobbered GPRs if it is to avoid spilling.  */
+      asm volatile ("" :::
+		    "x19", "x20", "x21", "x22", "x23",
+		    "x24", "x25", "x26", "x27", "x28");
+      inc = head->data + inc;
+      volatile struct L *inner = head->inner;
+      head->data = inc;
+      head = head->next;
+      if (__builtin_expect_with_probability (inner != 0, 0, PROB))
+	for (int i = 0; i < 1000; ++i)
+	  asm volatile ("" ::			/* example allocation: */
+			"r" (i),		/* x0 */
+			"r" (inner),		/* x1 */
+			"r" (inner->next),	/* x2 */
+			"r" (inner->next),	/* x3 */
+			"r" (inner->next),	/* x4 */
+			"r" (inner->next),	/* x5 */
+			"r" (inner->next),	/* x6 */
+			"r" (inner->next),	/* x7 */
+			"r" (inner->next),	/* x8 */
+			"r" (inner->next),	/* x9 */
+			"r" (inner->next),	/* x10 */
+			"r" (inner->next),	/* x11 */
+			"r" (inner->next),	/* x12 */
+			"r" (inner->next),	/* x13 */
+			"r" (inner->next),	/* x14 */
+			"r" (inner->next),	/* x15 */
+			"r" (inner->next),	/* x16 */
+			"r" (inner->next),	/* x17 */
+			"r" (inner->next),	/* x18 */
+			"r" (inner->next) :	/* x30 */
+			"x19", "x20", "x21", "x22", "x23",
+			"x24", "x25", "x26", "x27", "x28");
+    }
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr97540.c b/gcc/testsuite/gcc.target/i386/pr97540.c
new file mode 100644
index 0000000000000000000000000000000000000000..20f8717372cb4be2f0eae3b94e9580f5ac830cde
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr97540.c
@@ -0,0 +1,6 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+int mt7615_add_interface_dev_0;
+int ffs(int x) { asm("" : : "rm"(x)); }
+int mt7615_add_interface() { ffs(~mt7615_add_interface_dev_0); }