diff --git a/Makefile.in b/Makefile.in
index cfdca3d18e1deed8f4c830403349295270556414..23b6fe4ea68899ef0c94b712b42fee3e7fb252d4 100644
--- a/Makefile.in
+++ b/Makefile.in
@@ -640,6 +640,7 @@ all:
 @target_makefile_frag@
 @alphaieee_frag@
 @ospace_frag@
+@sw_64ieee_frag@
 @host_makefile_frag@
 ###
 
diff --git a/Makefile.tpl b/Makefile.tpl
index efed1511750412f7dff03a363779b78c6ecccb41..ebe66c5467b7e02d853c10c84951e4329eea7a49 100644
--- a/Makefile.tpl
+++ b/Makefile.tpl
@@ -563,6 +563,7 @@ all:
 @target_makefile_frag@
 @alphaieee_frag@
 @ospace_frag@
+@sw_64ieee_frag@
 @host_makefile_frag@
 ###
 
diff --git a/config.guess b/config.guess
index 97ad0733304d51c825cb2abbc5db47d31d32c0ef..52cad983c53ef7a4fa77bbdcf3c9af78fc29cd4e 100644
--- a/config.guess
+++ b/config.guess
@@ -1083,6 +1083,18 @@ EOF
     sparc:Linux:*:* | sparc64:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
+    sw_64:Linux:*:*)
+	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
+	  SW6)   UNAME_MACHINE=sw_64sw6 ;;
+	  SW6A)   UNAME_MACHINE=sw_64sw6a ;;
+	  SW6B)  UNAME_MACHINE=sw_64sw6b ;;
+	  SW8A)  UNAME_MACHINE=sw_64sw8a ;;
+	  SW)  UNAME_MACHINE=sw_64 ;;
+	esac
+	objdump --private-headers /bin/sh | grep -q ld.so.1
+	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
+	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
+	exit ;;
     tile*:Linux:*:*)
 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
 	exit ;;
diff --git a/config.sub b/config.sub
index a318a46868500fbeea993e693e32701041ffad1b..aa418e7b930376dac6f3a70791e867991c380e15 100644
--- a/config.sub
+++ b/config.sub
@@ -1237,6 +1237,7 @@ case $cpu-$vendor in
 			| sparclite \
 			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
 			| spu \
+			| sw_64 | sw_64sw6a | sw_64sw6b | sw_64sw8a \
 			| tahoe \
 			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
 			| tron \
diff --git a/config/intdiv0.m4 b/config/intdiv0.m4
index 55dddcf1c24c26a63f303c48ac6ff33329404f99..53dc632bce844c980c7813a43a73fc01f3396e84 100644
--- a/config/intdiv0.m4
+++ b/config/intdiv0.m4
@@ -56,7 +56,7 @@ int main ()
         [
           # Guess based on the CPU.
           case "$host_cpu" in
-            alpha* | i[34567]86 | m68k | s390*)
+            alpha* | i[34567]86 | m68k | s390* | sw_64* )
               gt_cv_int_divbyzero_sigfpe="guessing yes";;
             *)
               gt_cv_int_divbyzero_sigfpe="guessing no";;
diff --git a/config/tcl.m4 b/config/tcl.m4
index 4542a4b23d7239e1d37d44d0961382442f474bd2..c58bf534363ad2ada6120b36d9ddc73eabb455c9 100644
--- a/config/tcl.m4
+++ b/config/tcl.m4
@@ -1368,6 +1368,9 @@ dnl AC_CHECK_TOOL(AR, ar)
 	    if test "`uname -m`" = "alpha" ; then
 		CFLAGS="$CFLAGS -mieee"
 	    fi
+	    if test "`uname -m`" = "sw_64" ; then
+		CFLAGS="$CFLAGS -mieee"
+	    fi
 	    if test $do64bit = yes; then
 		AC_CACHE_CHECK([if compiler accepts -m64 flag], tcl_cv_cc_m64, [
 		    hold_cflags=$CFLAGS
@@ -1418,6 +1421,9 @@ dnl AC_CHECK_TOOL(AR, ar)
 	    if test "`uname -m`" = "alpha" ; then
 		CFLAGS="$CFLAGS -mieee"
 	    fi
+	    if test "`uname -m`" = "sw_64" ; then
+		CFLAGS="$CFLAGS -mieee"
+	    fi
 	    ;;
 	Lynx*)
 	    SHLIB_CFLAGS="-fPIC"
diff --git a/configure b/configure
index 97d5ca4fc0498f8e6074908a2a43cdd19e2e797d..1bee61dc764437da86d754ab6f4f304408c06605 100755
--- a/configure
+++ b/configure
@@ -777,6 +777,7 @@ ac_subst_files='serialization_dependencies
 host_makefile_frag
 target_makefile_frag
 alphaieee_frag
+sw_64ieee_frag
 ospace_frag'
 ac_user_opts='
 enable_option_checking
@@ -4010,6 +4011,10 @@ case "${target}" in
            use_gnu_ld=no
     fi
     ;;
+  sw_64*-*-*)
+    # newlib is not 64 bit ready
+    noconfigdirs="$noconfigdirs target-newlib target-libgloss"
+    ;;
   tic6x-*-*)
     noconfigdirs="$noconfigdirs sim"
     ;;
@@ -7161,6 +7166,15 @@ case $target in
     ;;
 esac
 
+sw_64ieee_frag=/dev/null
+case $target in
+  sw_64*-*-*)
+    # This just makes sure to use the -mieee option to build target libs.
+    # This should probably be set individually by each library.
+    sw_64ieee_frag="config/mt-sw_64ieee"
+    ;;
+esac
+
 # If --enable-target-optspace always use -Os instead of -O2 to build
 # the target libraries, similarly if it is not specified, use -Os
 # on selected platforms.
@@ -7856,7 +7870,7 @@ case "${target}" in
 esac
 
 # Makefile fragments.
-for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag;
+for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag;
 do
   eval fragval=\$$frag
   if test $fragval != /dev/null; then
diff --git a/configure.ac b/configure.ac
index 90ccd5ef8a85e8f36658cd5c81924ca346a01eab..1e3cd04d53f3f2a6afb5ca388d1f11754fc3e044 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1283,6 +1283,10 @@ case "${target}" in
            use_gnu_ld=no
     fi
     ;;
+  sw_64*-*-*)
+    # newlib is not 64 bit ready
+    noconfigdirs="$noconfigdirs target-newlib target-libgloss"
+    ;;
   tic6x-*-*)
     noconfigdirs="$noconfigdirs sim"
     ;;
@@ -1342,6 +1346,9 @@ case "${host}" in
   rs6000-*-aix*)
     host_makefile_frag="config/mh-ppc-aix"
     ;;
+  sw_64*-linux*)
+    host_makefile_frag="config/mh-sw_64-linux"
+    ;;
 esac
 fi
 
@@ -2666,6 +2673,15 @@ case $target in
     ;;
 esac
 
+sw_64ieee_frag=/dev/null
+case $target in
+  sw_64*-*-*)
+    # This just makes sure to use the -mieee option to build target libs.
+    # This should probably be set individually by each library.
+    sw_64ieee_frag="config/mt-sw_64ieee"
+    ;;
+esac
+
 # If --enable-target-optspace always use -Os instead of -O2 to build
 # the target libraries, similarly if it is not specified, use -Os
 # on selected platforms.
@@ -3356,7 +3372,7 @@ case "${target}" in
 esac
 
 # Makefile fragments.
-for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag;
+for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag;
 do
   eval fragval=\$$frag
   if test $fragval != /dev/null; then
@@ -3366,6 +3382,7 @@ done
 AC_SUBST_FILE(host_makefile_frag)
 AC_SUBST_FILE(target_makefile_frag)
 AC_SUBST_FILE(alphaieee_frag)
+AC_SUBST_FILE(sw_64ieee_frag)
 AC_SUBST_FILE(ospace_frag)
 
 # Miscellanea: directories, flags, etc.
diff --git a/contrib/compare-all-tests b/contrib/compare-all-tests
index 502cc64f52270c19b4086b3d660fedaf928e5a31..02519a1f3e859d26a56011a1ef627dec1b56c906 100644
--- a/contrib/compare-all-tests
+++ b/contrib/compare-all-tests
@@ -33,8 +33,9 @@ ppc_opts='-m32 -m64'
 s390_opts='-m31 -m31/-mzarch -m64'
 sh_opts='-m3 -m3e -m4 -m4a -m4al -m4/-mieee -m1 -m1/-mno-cbranchdi -m2a -m2a/-mieee -m2e -m2e/-mieee'
 sparc_opts='-mcpu=v8/-m32 -mcpu=v9/-m32 -m64'
+sw_64_opts='-mlong-double-64/-mieee -mlong-double-64 -mlong-double-128/-mieee -mlong-double-128'
 
-all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc v850 vax xstormy16 xtensa' # e500 
+all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc sw_64 v850 vax xstormy16 xtensa' # e500
 
 test_one_file ()
 {
diff --git a/contrib/config-list.mk b/contrib/config-list.mk
index d154286a497cb0c8492892b8ee52cd489efac3e8..0a8fbf0e7e0f928aa975dd6f5e4482707eba8cf0 100644
--- a/contrib/config-list.mk
+++ b/contrib/config-list.mk
@@ -92,6 +92,7 @@ LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \
   sparc64-sun-solaris2.11OPT-with-gnu-ldOPT-with-gnu-asOPT-enable-threads=posix \
   sparc-wrs-vxworks sparc64-elf sparc64-rtems sparc64-linux sparc64-freebsd6 \
   sparc64-netbsd sparc64-openbsd \
+  sw_64-linux-gnu sw_64-netbsd sw_64-openbsd \
   tilegx-linux-gnu tilegxbe-linux-gnu tilepro-linux-gnu \
   v850e-elf v850-elf v850-rtems vax-linux-gnu \
   vax-netbsdelf vax-openbsd visium-elf x86_64-apple-darwin \
diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c
index 7d0d91403f3856ceb5acd6768a14d929f4ec4b33..167e8c9af064bac3488fddb8cb42121c04c932cc 100644
--- a/gcc/auto-inc-dec.c
+++ b/gcc/auto-inc-dec.c
@@ -892,6 +892,10 @@ parse_add_or_inc (rtx_insn *insn, bool before_mem)
 	  inc_insn.reg1_val = -INTVAL (XEXP (SET_SRC (pat), 1));
 	  inc_insn.reg1 = GEN_INT (inc_insn.reg1_val);
 	}
+#ifdef FLAG_SW64_INC_DEC
+      if (inc_insn.reg1_val > 2047 || inc_insn.reg1_val < -2048)
+	return false;
+#endif
       return true;
     }
   else if ((HAVE_PRE_MODIFY_REG || HAVE_POST_MODIFY_REG)
@@ -1369,6 +1373,10 @@ find_mem (rtx *address_of_x)
 	  mem_insn.reg1_is_const = true;
 	  /* Match with *(reg0 + c) where c is a const. */
 	  mem_insn.reg1_val = INTVAL (reg1);
+#ifdef FLAG_SW64_INC_DEC
+	  if (mem_insn.reg1_val > 2047 || mem_insn.reg1_val < -2048)
+	    return false;
+#endif
 	  if (find_inc (true))
 	    return true;
 	}
@@ -1697,7 +1705,11 @@ public:
       if (!AUTO_INC_DEC)
 	return false;
 
+#ifdef FLAG_SW64_INC_DEC
+      return (optimize > 0 && flag_auto_inc_dec && flag_sw_auto_inc_dec);
+#else
       return (optimize > 0 && flag_auto_inc_dec);
+#endif
     }
 
 
diff --git a/gcc/builtins.c b/gcc/builtins.c
index ffbb2cae9eeefc1ef119c6985f472ff6e0f2cf46..8f319ceab91bed9400231165ab269d5c0e7e2c44 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -7460,6 +7460,17 @@ expand_builtin_sync_synchronize (void)
   expand_mem_thread_fence (MEMMODEL_SYNC_SEQ_CST);
 }
 
+#ifdef FLAG_SW64_WMEMB
+static void
+expand_builtin_sync_synchronize_write (void)
+{
+  if (TARGET_SW8A && targetm.have_memory_barrier ())
+    emit_insn (targetm.gen_write_memory_barrier ());
+  else
+    error ("Current arch don't support write memory barrier !!!");
+}
+#endif
+
 static rtx
 expand_builtin_thread_pointer (tree exp, rtx target)
 {
@@ -8678,6 +8689,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
       expand_builtin_sync_synchronize ();
       return const0_rtx;
 
+#ifdef FLAG_SW64_WMEMB
+    case BUILT_IN_SYNC_SYNCHRONIZE_WRITE:
+      expand_builtin_sync_synchronize_write ();
+      return const0_rtx;
+#endif
     case BUILT_IN_ATOMIC_EXCHANGE_1:
     case BUILT_IN_ATOMIC_EXCHANGE_2:
     case BUILT_IN_ATOMIC_EXCHANGE_4:
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index dc1a898487194c91371b9e326cd53bfb23b39d50..bdab4928cb1cca0d9b6547a8a1eb29b30c2ce14e 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -751,7 +751,14 @@ default_handle_c_option (size_t code ATTRIBUTE_UNUSED,
 			 const char *arg ATTRIBUTE_UNUSED,
 			 int value ATTRIBUTE_UNUSED)
 {
+#if defined FLAG_SW64_SIMD || defined FLAG_SW64_M32
+  if (code == OPT_msimd || code == OPT_m32 || code == OPT_msw_use_32align)
+	  return true;
+  else
+	  return false;
+#else
   return false;
+#endif
 }
 
 /* Post-switch processing.  */
diff --git a/gcc/common/config/sw_64/sw_64-common.c b/gcc/common/config/sw_64/sw_64-common.c
new file mode 100644
index 0000000000000000000000000000000000000000..eaf1f0d32d89e292fa928a5bebbc6507806aa762
--- /dev/null
+++ b/gcc/common/config/sw_64/sw_64-common.c
@@ -0,0 +1,114 @@
+/* Common hooks for Sw_64.
+   Copyright (C) 1992-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "diagnostic-core.h"
+#include "tm.h"
+#include "common/common-target.h"
+#include "common/common-target-def.h"
+#include "opts.h"
+#include "flags.h"
+
+int flag_fpcr_set;
+
+/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
+static const struct default_options sw_64_option_optimization_table[] = {
+  /* Enable redundant extension instructions removal at -O2 and higher.  */
+  {OPT_LEVELS_2_PLUS, OPT_free, NULL, 1},
+  {OPT_LEVELS_NONE, 0, NULL, 0}};
+
+/* Implement TARGET_OPTION_INIT_STRUCT.  */
+
+static void
+sw_64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED)
+{
+  opts->x_target_flags |= MASK_IEEE;
+  global_options.x_flag_prefetch_loop_arrays = 1;
+}
+
+/* Implement TARGET_HANDLE_OPTION.  */
+
+static bool
+sw_64_handle_option (struct gcc_options *opts,
+		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+		     const struct cl_decoded_option *decoded, location_t loc)
+{
+  size_t code = decoded->opt_index;
+  const char *arg = decoded->arg;
+  int value = decoded->value;
+
+  switch (code)
+    {
+    case OPT_mfp_regs:
+      if (value == 0)
+	opts->x_target_flags |= MASK_SOFT_FP;
+      break;
+
+    case OPT_mieee:
+    case OPT_mieee_with_inexact:
+      /* add mieee for sw_64.  */
+    case OPT_mieee_main:
+      if (code == OPT_mieee)
+	flag_fpcr_set = 1;
+      else if (code == OPT_mieee_with_inexact)
+	flag_fpcr_set = 3;
+      else if (code == OPT_mieee_main)
+	flag_fpcr_set = 4;
+      opts->x_target_flags |= MASK_IEEE_CONFORMANT;
+      break;
+
+    case OPT_mtls_size_:
+      if (value != 16 && value != 32 && value != 64)
+	error_at (loc, "bad value %qs for %<-mtls-size%> switch", arg);
+      break;
+
+    case OPT_mtls_tlsgd_:
+      if (value != 16 && value != 32)
+	error_at (loc, "bad value %qs for -mtls-tlsgd switch", arg);
+      break;
+
+    case OPT_mtls_tlsldm_:
+      if (value != 16 && value != 32)
+	error_at (loc, "bad value %qs for -mtls-tlsldm switch", arg);
+      break;
+
+    case OPT_mgprel_size_:
+      if (value != 16 && value != 32)
+	error_at (loc, "bad value %qs for -mgprel-size switch", arg);
+      break;
+    }
+
+  return true;
+}
+
+#undef TARGET_DEFAULT_TARGET_FLAGS
+#define TARGET_DEFAULT_TARGET_FLAGS					    \
+  (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS)
+#undef TARGET_HANDLE_OPTION
+#define TARGET_HANDLE_OPTION sw_64_handle_option
+
+#undef TARGET_OPTION_INIT_STRUCT
+#define TARGET_OPTION_INIT_STRUCT sw_64_option_init_struct
+
+#undef TARGET_OPTION_OPTIMIZATION_TABLE
+#define TARGET_OPTION_OPTIMIZATION_TABLE sw_64_option_optimization_table
+
+struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
diff --git a/gcc/config.gcc b/gcc/config.gcc
index 6fcdd771d4c32604685ebc5da3e20260fe6da2ad..789a406b62f00914e7976a147d10cd3645e3d827 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -552,6 +552,10 @@ sh[123456789lbe]*-*-* | sh-*-*)
 	extra_options="${extra_options} fused-madd.opt"
 	extra_objs="${extra_objs} sh_treg_combine.o sh-mem.o sh_optimize_sett_clrt.o"
 	;;
+sw_64*-*-*)
+	cpu_type=sw_64
+	extra_options="${extra_options} g.opt"
+	;;
 v850*-*-*)
 	cpu_type=v850
 	;;
@@ -3407,6 +3411,11 @@ sparc64-*-openbsd*)
 	with_cpu=ultrasparc
 	tmake_file="${tmake_file} sparc/t-sparc"
 	;;
+sw_64*-*-linux*)
+	tm_file="elfos.h ${tm_file} sw_64/gnu-user.h sw_64/elf.h sw_64/linux.h sw_64/linux-elf.h glibc-stdint.h"
+	tmake_file="${tmake_file} sw_64/t-linux sw_64/t-sw_64"
+	extra_options="${extra_options} sw_64/elf.opt"
+	;;
 tic6x-*-elf)
 	tm_file="elfos.h ${tm_file} c6x/elf-common.h c6x/elf.h"
 	tm_file="${tm_file} dbxelf.h tm-dwarf2.h newlib-stdint.h"
@@ -3937,6 +3946,15 @@ if test x$with_cpu = x ; then
 	  ;;
       esac
       ;;
+    sw_64sw6a*-*-*)
+      with_cpu=sw6a
+      ;;
+    sw_64sw6b*-*-*)
+      with_cpu=sw6b
+      ;;
+    sw_64sw8a*-*-*)
+      with_cpu=sw8a
+      ;;
     visium-*-*)
       with_cpu=gr5
       ;;
@@ -5147,6 +5165,23 @@ case "${target}" in
 		esac
 		;;
 
+	sw_64*-*-*)
+		supported_defaults="cpu tune"
+		for which in cpu tune; do
+			eval "val=\$with_$which"
+			case "$val" in
+			"" \
+			| sw6 | sw6a | sw6b  \
+			| sw8a)
+				;;
+			*)
+				echo "Unknown CPU used in --with-$which=$val" 1>&2
+				exit 1
+				;;
+			esac
+		done
+		;;
+
 	tic6x-*-*)
 		supported_defaults="arch"
 
diff --git a/gcc/config.host b/gcc/config.host
index 230ab61ac05b93b1890fce376024f9184a7b4ecf..793cc7b50c7fccda6e9c18607226164f45f9a63b 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -201,6 +201,14 @@ case ${host} in
 	;;
     esac
     ;;
+  sw_64*-*-linux*)
+    case ${target} in
+      sw_64*-*-linux*)
+	host_extra_gcc_objs="driver-sw_64.o"
+	host_xmake_file="${host_xmake_file} sw_64/x-sw_64"
+	;;
+    esac
+    ;;
 esac
 
 # Machine-specific settings.
diff --git a/gcc/config.in b/gcc/config.in
index 80b421d99a34dc8dff989f2184bb8d0cded7a90f..20a10eef6593bac0c0bcd854121854d358a369db 100644
--- a/gcc/config.in
+++ b/gcc/config.in
@@ -199,6 +199,10 @@
 #undef ENABLE_LD_BUILDID
 #endif
 
+/* Define if gcc should always pass --no-relax to linker for sw_64.  */
+#ifndef USED_FOR_TARGET
+#undef ENABLE_LD_NORELAX
+#endif
 
 /* Define to 1 to enable libquadmath support */
 #ifndef USED_FOR_TARGET
@@ -394,6 +398,10 @@
 #undef HAVE_AS_EXPLICIT_RELOCS
 #endif
 
+/* Define if your assembler supports explicit relocations. */
+#ifndef USED_FOR_TARGET
+#undef SW_64_ENABLE_ASAN
+#endif
 
 /* Define if your assembler supports FMAF, HPC, and VIS 3.0 instructions. */
 #ifndef USED_FOR_TARGET
@@ -2508,3 +2516,15 @@
 #undef vfork
 #endif
 
+/* Define only sw64 target. */
+#undef FLAG_SW64_ATOMIC
+#undef FLAG_SW64_90139
+#undef FLAG_SW64_PREFETCH
+#undef FLAG_SW64_PROTECT
+#undef FLAG_SW64_SIMD
+#undef FLAG_SW64_AUTOSIMD
+#undef FLAG_SW64_M32
+#undef FLAG_SW64_INC_DEC
+#undef FLAG_SW64_DELNOP
+#undef FLAG_SW64_FM
+#undef FLAG_SW64_WMEMB
diff --git a/gcc/config/host-linux.c b/gcc/config/host-linux.c
index 26872544130dddaf335068b1c3ae6fac3dc2e90a..20522756b45eaabb477750e0acae398cbb4c9770 100644
--- a/gcc/config/host-linux.c
+++ b/gcc/config/host-linux.c
@@ -84,6 +84,8 @@
 # define TRY_EMPTY_VM_SPACE	0x8000000000
 #elif defined(__sparc__)
 # define TRY_EMPTY_VM_SPACE	0x60000000
+#elif defined(__sw_64)
+# define TRY_EMPTY_VM_SPACE	0x10000000000
 #elif defined(__mc68000__)
 # define TRY_EMPTY_VM_SPACE	0x40000000
 #elif defined(__aarch64__) && defined(__ILP32__)
diff --git a/gcc/config/sw_64/constraints.md b/gcc/config/sw_64/constraints.md
new file mode 100644
index 0000000000000000000000000000000000000000..e5d5c7c7697b7027f132f34325bf88db21bcd6ab
--- /dev/null
+++ b/gcc/config/sw_64/constraints.md
@@ -0,0 +1,123 @@
+;; Constraint definitions for Sw_64.
+;; Copyright (C) 2007-2020 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;; Unused letters:
+;;;    ABCDEF H	     V  YZ
+;;;       de ghijkl   pq  tu wxyz
+
+;; Integer register constraints.
+
+(define_register_constraint "a" "R24_REG"
+ "General register 24, input to division routine")
+
+(define_register_constraint "b" "R25_REG"
+ "General register 24, input to division routine")
+
+(define_register_constraint "c" "R27_REG"
+ "General register 27, function call address")
+
+(define_register_constraint "f" "TARGET_FPREGS ? FLOAT_REGS : NO_REGS"
+ "Any floating-point register")
+
+(define_register_constraint "v" "R0_REG"
+ "General register 0, function value return address")
+
+(define_memory_constraint "w"
+ "A memory whose address is only a register"
+ (match_operand 0 "mem_noofs_operand"))
+
+;; Integer constant constraints.
+(define_constraint "I"
+  "An unsigned 8 bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 255)")))
+
+(define_constraint "J"
+  "The constant zero"
+  (and (match_code "const_int")
+       (match_test "ival == 0")))
+
+(define_constraint "K"
+  "Signed 16-bit integer constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -32768, 32767)")))
+
+(define_constraint "L"
+  "A shifted signed 16-bit constant appropriate for LDAH"
+  (and (match_code "const_int")
+       (match_test "(ival & 0xffff) == 0
+		    && (ival >> 31 == -1 || ival >> 31 == 0)")))
+
+(define_constraint "M"
+  "A valid operand of a ZAP insn"
+  (and (match_code "const_int")
+       (match_test "zap_mask (ival) != 0")))
+
+(define_constraint "N"
+  "A complemented unsigned 8-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (~ival, 0, 255)")))
+
+(define_constraint "O"
+  "A negated unsigned 8-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (-ival, 0, 255)")))
+
+(define_constraint "P"
+  "The constant 1, 2 or 3"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 3)")))
+
+;; Floating-point constant constraints.
+(define_constraint "G"
+  "The floating point zero constant"
+  (and (match_code "const_double")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; "Extra" constraints.
+
+;; A memory location that is not a reference
+;; (using an AND) to an unaligned location.
+(define_memory_constraint "Q"
+  "@internal A normal_memory_operand"
+  (and (match_code "mem")
+       (not (match_code "and" "0"))))
+
+(define_constraint "R"
+  "@internal A direct_call_operand"
+  (match_operand:DI 0 "direct_call_operand"))
+
+(define_constraint "S"
+  "An unsigned 6-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "T"
+  "@internal A high-part symbol"
+  (match_code "high"))
+
+(define_constraint "W"
+  "A vector zero constant"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+(define_constraint "Y"
+  "An unsigned 5-bit constant"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 31)")))
diff --git a/gcc/config/sw_64/driver-sw_64.c b/gcc/config/sw_64/driver-sw_64.c
new file mode 100644
index 0000000000000000000000000000000000000000..84a3692c81f7d558bdbb6c0adde5e264870509a9
--- /dev/null
+++ b/gcc/config/sw_64/driver-sw_64.c
@@ -0,0 +1,101 @@
+/* Subroutines for the gcc driver.
+   Copyright (C) 2009-2020 Free Software Foundation, Inc.
+   Contributed by Arthur Loiret <aloiret@debian.org>
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+
+/* Chip family type IDs, returned by implver instruction.  */
+#define IMPLVER_SW6_FAMILY 2 /* SW6 */
+#define IMPLVER_SW8_FAMILY 4 /* SW8 */
+
+/* Bit defines for amask instruction.  */
+#define AMASK_BWX 0x1 /* byte/word extension.  */
+#define AMASK_FIX							      \
+  0x2			    /* sqrt and f <-> i conversions		    \
+			       extension.  */
+#define AMASK_CIX 0x4       /* count extension.  */
+#define AMASK_MVI 0x100     /* multimedia extension.  */
+#define AMASK_PRECISE 0x200 /* Precise arithmetic traps.  */
+#define AMASK_LOCKPFTCHOK						      \
+  0x1000 /* Safe to prefetch lock cache					\
+	    block.  */
+#define AMASK_SW6A (1U << 16)
+#define AMASK_SW6B (1U << 17)
+#define AMASK_SW8A (1U << 18)
+/* This will be called by the spec parser in gcc.c when it sees
+   a %:local_cpu_detect(args) construct.  Currently it will be called
+   with either "cpu" or "tune" as argument depending on if -mcpu=native
+   or -mtune=native is to be substituted.
+
+   It returns a string containing new command line parameters to be
+   put at the place of the above two options, depending on what CPU
+   this is executed.  E.g. "-mcpu=sw6" on an Sw_64 for
+   -mcpu=native.  If the routine can't detect a known processor,
+   the -mcpu or -mtune option is discarded.
+
+   ARGC and ARGV are set depending on the actual arguments given
+   in the spec.  */
+const char *
+host_detect_local_cpu (int argc, const char **argv)
+{
+  static const struct cpu_types
+  {
+    long implver;
+    long amask;
+    const char *const cpu;
+  } cpu_types[] = {{IMPLVER_SW6_FAMILY,
+		    AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW6A, "sw6a"},
+		   {IMPLVER_SW6_FAMILY,
+		    AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW6B, "sw6b"},
+		   {IMPLVER_SW8_FAMILY,
+		    AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW8A, "sw8a"},
+		   {0, 0, NULL}};
+  long implver;
+  long amask;
+  const char *cpu;
+  int i;
+
+  if (argc < 1)
+    return NULL;
+
+  if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune"))
+    return NULL;
+
+  implver = __builtin_sw_64_implver ();
+  amask = __builtin_sw_64_amask (~0L);
+  cpu = NULL;
+
+  for (i = 0; cpu_types[i].cpu != NULL; i++)
+    if (implver == cpu_types[i].implver
+	&& (~amask & cpu_types[i].amask) == cpu_types[i].amask)
+      {
+	cpu = cpu_types[i].cpu;
+	break;
+      }
+
+  if (cpu == NULL)
+    return NULL;
+
+  return concat ("-m", argv[0], "=", cpu, NULL);
+}
diff --git a/gcc/config/sw_64/elf.h b/gcc/config/sw_64/elf.h
new file mode 100644
index 0000000000000000000000000000000000000000..559a8172ac069ea198a2545d667a008ba42169df
--- /dev/null
+++ b/gcc/config/sw_64/elf.h
@@ -0,0 +1,194 @@
+/* Definitions of target machine for GNU compiler, for Sw_64 w/ELF.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson (rth@tamu.edu).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef CC1_SPEC
+#define CC1_SPEC "%{G*}"
+
+#undef ASM_SPEC
+#define ASM_SPEC							       \
+  "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug} "	    \
+  "%{mcpu=*:-m%*}"
+
+/* Do not output a .file directive at the beginning of the input file.  */
+
+#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
+#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
+
+/* This is how to output an assembler line
+   that says to advance the location counter
+   to a multiple of 2**LOG bytes.  */
+
+#define ASM_OUTPUT_ALIGN(FILE, LOG)					    \
+  if ((LOG) != 0)							      \
+    fprintf (FILE, "\t.align %d\n", LOG);
+
+/* This says how to output assembler code to declare an
+   uninitialized internal linkage data object.  Under SVR4,
+   the linker seems to want the alignment of data objects
+   to depend on their types.  We do exactly that here.  */
+
+#undef ASM_OUTPUT_ALIGNED_LOCAL
+#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		      \
+  do									   \
+    {									  \
+      if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		   \
+	switch_to_section (sbss_section);				      \
+      else								     \
+	switch_to_section (bss_section);				       \
+      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
+      if (!flag_inhibit_size_directive)					\
+	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			  \
+      ASM_OUTPUT_ALIGN ((FILE), exact_log2 ((ALIGN) / BITS_PER_UNIT));	 \
+      ASM_OUTPUT_LABEL (FILE, NAME);					   \
+      ASM_OUTPUT_SKIP ((FILE), (SIZE) ? (SIZE) : 1);			   \
+    }									  \
+  while (0)
+
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  */
+
+#undef ASM_OUTPUT_ALIGNED_BSS
+#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		  \
+  do									   \
+    {									  \
+      ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);		      \
+    }									  \
+  while (0)
+
+#undef BSS_SECTION_ASM_OP
+#define BSS_SECTION_ASM_OP "\t.section\t.bss"
+#undef SBSS_SECTION_ASM_OP
+#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\"aw\""
+#undef SDATA_SECTION_ASM_OP
+#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\"aw\""
+
+/* This is how we tell the assembler that two symbols have the same value.  */
+
+#undef ASM_OUTPUT_DEF
+#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME)				      \
+  do									   \
+    {									  \
+      assemble_name (FILE, ALIAS);					     \
+      fputs (" = ", FILE);						     \
+      assemble_name (FILE, NAME);					      \
+      fputc ('\n', FILE);						      \
+    }									  \
+  while (0)
+
+#undef ASM_OUTPUT_DEF_FROM_DECLS
+#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)			  \
+  do									   \
+    {									  \
+      const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
+      const char *name = IDENTIFIER_POINTER (TARGET);			  \
+      if (TREE_CODE (DECL) == FUNCTION_DECL)				   \
+	{								      \
+	  fputc ('$', FILE);						   \
+	  assemble_name (FILE, alias);					 \
+	  fputs ("..ng = $", FILE);					    \
+	  assemble_name (FILE, name);					  \
+	  fputs ("..ng\n", FILE);					      \
+	}								      \
+      ASM_OUTPUT_DEF (FILE, alias, name);				      \
+    }									  \
+  while (0)
+
+/* Provide a STARTFILE_SPEC appropriate for ELF.  Here we add the
+   (even more) magical crtbegin.o file which provides part of the
+   support for getting C++ file-scope static object constructed
+   before entering `main'.  */
+
+#undef STARTFILE_SPEC
+#ifdef HAVE_LD_PIE
+#define STARTFILE_SPEC							 \
+  "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#else
+#define STARTFILE_SPEC							 \
+  "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\
+   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
+#endif
+
+/* Provide a ENDFILE_SPEC appropriate for ELF.  Here we tack on the
+   magical crtend.o file which provides part of the support for
+   getting C++ file-scope static object constructed before entering
+   `main', followed by a normal ELF "finalizer" file, `crtn.o'.  */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC							   \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
+   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
+
+/* This variable should be set to 'true' if the target ABI requires
+   unwinding tables even when exceptions are not used.  */
+#define TARGET_UNWIND_TABLES_DEFAULT true
+
+/* Select a format to encode pointers in exception handling data.  CODE
+   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
+   true if the symbol may be affected by dynamic relocations.
+
+   Since application size is already constrained to <2GB by the form of
+   the ldgp relocation, we can use a 32-bit pc-relative relocation to
+   static data.  Dynamic data is accessed indirectly to allow for read
+   only EH sections.  */
+#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			     \
+  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
+
+/* If defined, a C statement to be executed just prior to the output of
+   assembler code for INSN.  */
+#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)			     \
+  (sw_64_this_literal_sequence_number = 0,				     \
+   sw_64_this_gpdisp_sequence_number = 0)
+extern int sw_64_this_literal_sequence_number;
+extern int sw_64_this_gpdisp_sequence_number;
+
+/* Since the bits of the _init and _fini function is spread across
+   many object files, each potentially with its own GP, we must assume
+   we need to load our GP.  Further, the .init/.fini section can
+   easily be more than 4MB away from the function to call so we can't
+   use bsr.  */
+// jsr->call
+#ifdef __sw_64_sw8a__
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)			     \
+  asm (SECTION_OP "\n"							  \
+		 "       addpi 0, $29\n"				       \
+		 "       ldgp $29,0($29)\n"				    \
+		 "       unop\n"					       \
+		 "       call $26," USER_LABEL_PREFIX #FUNC "\n"	       \
+		 "       .align 3\n"					   \
+		 "       .previous");
+#else
+#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)			     \
+  asm (SECTION_OP "\n"							  \
+		 "	br $29,1f\n"						\
+		 "1:	ldgp $29,0($29)\n"					\
+		 "	unop\n"						     \
+		 "	call $26," USER_LABEL_PREFIX #FUNC "\n"		\
+		 "	.align 3\n"						 \
+		 "	.previous");
+#endif
+
+/* If we have the capability create headers for efficient EH lookup.
+   As of Jan 2002, only glibc 2.2.4 can actually make use of this, but
+   I imagine that other systems will catch up.  In the meantime, it
+   doesn't harm to make sure that the data exists to be used later.  */
+#if defined HAVE_LD_EH_FRAME_HDR
+#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
+#endif
diff --git a/gcc/config/sw_64/elf.opt b/gcc/config/sw_64/elf.opt
new file mode 100644
index 0000000000000000000000000000000000000000..9059fee8c6c9e9982ac17570c2ad58fa3371584c
--- /dev/null
+++ b/gcc/config/sw_64/elf.opt
@@ -0,0 +1,29 @@
+; Sw_64 ELF options.
+
+; Copyright (C) 2011-2020 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+; for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+; See the GCC internals manual (options.texi) for a description of
+; this file's format.
+
+; Please try to keep this file in ASCII collating order.
+
+relax
+Driver
+
+; This comment is to ensure we retain the blank line above.
diff --git a/gcc/config/sw_64/freebsd.h b/gcc/config/sw_64/freebsd.h
new file mode 100644
index 0000000000000000000000000000000000000000..f0b599b7991f5c8cd93545e621832ee1d2685e67
--- /dev/null
+++ b/gcc/config/sw_64/freebsd.h
@@ -0,0 +1,69 @@
+/* Definitions for Sw_64 running FreeBSD using the ELF format
+   Copyright (C) 2000-2020 Free Software Foundation, Inc.
+   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS							    \
+  {									    \
+    "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER				 \
+  }
+
+/* Provide a CPP_SPEC appropriate for FreeBSD/sw_64 -- dealing with
+   the GCC option `-posix'.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
+
+#define LINK_SPEC							      \
+  "%{G*} %{relax:-relax}				\
+  %{p:%nconsider using '-pg' instead of '-p' with gprof (1)}		\
+  %{assert*} %{R*} %{rpath*} %{defsym*}					\
+  %{shared:-Bshareable %{h*} %{soname*}}				\
+  %{!shared:								\
+    %{!static:								\
+      %{rdynamic:-export-dynamic}					\
+      -dynamic-linker %(fbsd_dynamic_linker) }	\
+    %{static:-Bstatic}}							\
+  %{symbolic:-Bsymbolic}"
+
+/************************[  Target stuff  ]***********************************/
+
+/* Define the actual types of some ANSI-mandated types.
+   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
+   c-common.c, and config/<arch>/<arch>.h.  */
+
+/* sw_64.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
+#undef WCHAR_TYPE
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#define TARGET_ELF 1
+
+#undef HAS_INIT_SECTION
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* Don't default to pcc-struct-return, we want to retain compatibility with
+   older FreeBSD releases AND pcc-struct-return may not be reentrant.  */
+
+#undef DEFAULT_PCC_STRUCT_RETURN
+#define DEFAULT_PCC_STRUCT_RETURN 0
diff --git a/gcc/config/sw_64/gnu-user.h b/gcc/config/sw_64/gnu-user.h
new file mode 100644
index 0000000000000000000000000000000000000000..2c40cb84b7ba0070712cf743ae053f47ec92e73a
--- /dev/null
+++ b/gcc/config/sw_64/gnu-user.h
@@ -0,0 +1,177 @@
+/* Definitions for systems using, at least optionally, a GNU
+   (glibc-based) userspace or other userspace with libc derived from
+   glibc (e.g. uClibc) or for which similar specs are appropriate.
+   Copyright (C) 1995-2020 Free Software Foundation, Inc.
+   Contributed by Eric Youngdale.
+   Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Don't assume anything about the header files.  */
+//#define SYSTEM_IMPLICIT_EXTERN_C
+/*
+#undef ASM_APP_ON
+#define ASM_APP_ON "#APP\n"
+
+#undef ASM_APP_OFF
+#define ASM_APP_OFF "#NO_APP\n"
+*/
+#if ENABLE_OFFLOADING == 1
+#define CRTOFFLOADBEGIN "%{fopenacc|fopenmp:crtoffloadbegin%O%s}"
+#define CRTOFFLOADEND "%{fopenacc|fopenmp:crtoffloadend%O%s}"
+#else
+#define CRTOFFLOADBEGIN ""
+#define CRTOFFLOADEND ""
+#endif
+
+/* Provide a STARTFILE_SPEC appropriate for GNU userspace.  Here we add
+   the GNU userspace magical crtbegin.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main'.  */
+
+#if defined HAVE_LD_PIE
+#define GNU_USER_TARGET_STARTFILE_SPEC					 \
+  "%{shared:; \
+     pg|p|profile:%{static-pie:grcrt1.o%s;:gcrt1.o%s}; \
+     static:crt1.o%s; \
+     static-pie:rcrt1.o%s; \
+     " PIE_SPEC ":Scrt1.o%s; \
+     :crt1.o%s} \
+   crti.o%s \
+   %{static:crtbeginT.o%s; \
+     shared|static-pie|" PIE_SPEC ":crtbeginS.o%s; \
+     :crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s} \
+   " CRTOFFLOADBEGIN
+#else
+#define GNU_USER_TARGET_STARTFILE_SPEC					 \
+  "%{shared:; \
+     pg|p|profile:gcrt1.o%s; \
+     :crt1.o%s} \
+   crti.o%s \
+   %{static:crtbeginT.o%s; \
+     shared|pie|static-pie:crtbeginS.o%s; \
+     :crtbegin.o%s} \
+   %{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_start_preinit.o%s; \
+     fvtable-verify=std:vtv_start.o%s} \
+   " CRTOFFLOADBEGIN
+#endif
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC
+
+/* Provide a ENDFILE_SPEC appropriate for GNU userspace.  Here we tack on
+   the GNU userspace magical crtend.o file (see crtstuff.c) which
+   provides part of the support for getting C++ file-scope static
+   object constructed before entering `main', followed by a normal
+   GNU userspace "finalizer" file, `crtn.o'.  */
+
+#if defined HAVE_LD_PIE
+#define GNU_USER_TARGET_ENDFILE_SPEC					   \
+  "%{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end_preinit.o%s; \
+     fvtable-verify=std:vtv_end.o%s} \
+   %{static:crtend.o%s; \
+     shared|static-pie|" PIE_SPEC ":crtendS.o%s; \
+     :crtend.o%s} \
+   crtn.o%s \
+   " CRTOFFLOADEND
+#else
+#define GNU_USER_TARGET_ENDFILE_SPEC					   \
+  "%{fvtable-verify=none:%s; \
+     fvtable-verify=preinit:vtv_end_preinit.o%s; \
+     fvtable-verify=std:vtv_end.o%s} \
+   %{static:crtend.o%s; \
+     shared|pie|static-pie:crtendS.o%s; \
+     :crtend.o%s} \
+   crtn.o%s \
+   " CRTOFFLOADEND
+#endif
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC
+
+/* This is for -profile to use -lc_p instead of -lc.  */
+#define GNU_USER_TARGET_CC1_SPEC "%{profile:-p}"
+#ifndef CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC
+#endif
+
+/* The GNU C++ standard library requires that these macros be defined.  */
+#undef CPLUSPLUS_CPP_SPEC
+#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
+
+#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC				   \
+  "%{shared:-lc} \
+   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
+
+#define GNU_USER_TARGET_LIB_SPEC					       \
+  "%{pthread:-lpthread} " GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC
+
+#undef LIB_SPEC
+#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
+
+#if defined HAVE_LD_EH_FRAME_HDR
+#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
+#endif
+
+#undef LINK_GCC_C_SEQUENCE_SPEC
+#define LINK_GCC_C_SEQUENCE_SPEC					       \
+  "%{static|static-pie:--start-group} %G %L \
+   %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+#define TARGET_POSIX_IO
+
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
+
+/* Link -lasan early on the command line.  For -static-libasan, don't link
+   it for -shared link, the executable should be compiled with -static-libasan
+   in that case, and for executable link with --{,no-}whole-archive around
+   it to force everything into the executable.  And similarly for -ltsan
+   and -llsan.  */
+#if defined HAVE_LD_STATIC_DYNAMIC
+#undef LIBASAN_EARLY_SPEC
+#define LIBASAN_EARLY_SPEC						     \
+  "%{!shared:libasan_preinit%O%s} "					    \
+  "%{static-libasan:%{!shared:" LD_STATIC_OPTION			       \
+  " --whole-archive -lasan --no-whole-archive " LD_DYNAMIC_OPTION	      \
+  "}}%{!static-libasan:-lasan}"
+#undef LIBTSAN_EARLY_SPEC
+#define LIBTSAN_EARLY_SPEC						     \
+  "%{!shared:libtsan_preinit%O%s} "					    \
+  "%{static-libtsan:%{!shared:" LD_STATIC_OPTION			       \
+  " --whole-archive -ltsan --no-whole-archive " LD_DYNAMIC_OPTION	      \
+  "}}%{!static-libtsan:-ltsan}"
+#undef LIBLSAN_EARLY_SPEC
+#define LIBLSAN_EARLY_SPEC						     \
+  "%{!shared:liblsan_preinit%O%s} "					    \
+  "%{static-liblsan:%{!shared:" LD_STATIC_OPTION			       \
+  " --whole-archive -llsan --no-whole-archive " LD_DYNAMIC_OPTION	      \
+  "}}%{!static-liblsan:-llsan}"
+#endif
diff --git a/gcc/config/sw_64/linux-elf.h b/gcc/config/sw_64/linux-elf.h
new file mode 100644
index 0000000000000000000000000000000000000000..f3039c2ff917a98a6423556554945fbb1a13b458
--- /dev/null
+++ b/gcc/config/sw_64/linux-elf.h
@@ -0,0 +1,54 @@
+/* Definitions of target machine for GNU compiler
+   for Sw_64 Linux-based GNU systems using ELF.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS {"elf_dynamic_linker", ELF_DYNAMIC_LINKER},
+
+#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
+#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
+#if DEFAULT_LIBC == LIBC_UCLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
+#elif DEFAULT_LIBC == LIBC_GLIBC
+#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
+#else
+#error "Unsupported DEFAULT_LIBC"
+#endif
+#define GNU_USER_DYNAMIC_LINKER						\
+  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER)
+
+#define ELF_DYNAMIC_LINKER GNU_USER_DYNAMIC_LINKER
+
+#define LINK_SPEC							      \
+  "-m elf64sw_64 %{G*} %{relax:-relax}		\
+  %{O*:-O3} %{!O*:-O1}						\
+  %{shared:-shared}						\
+  %{!shared:							\
+    %{!static:							\
+      %{rdynamic:-export-dynamic}				\
+      -dynamic-linker %(elf_dynamic_linker)}	\
+    %{static:-static}}"
+
+#undef LIB_SPEC
+#define LIB_SPEC							       \
+  "%{pthread:-lpthread} "						      \
+  "%{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} "
+
+#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
diff --git a/gcc/config/sw_64/linux.h b/gcc/config/sw_64/linux.h
new file mode 100644
index 0000000000000000000000000000000000000000..023fd9fdeeebd90af34bd1dc28c1beab191485a5
--- /dev/null
+++ b/gcc/config/sw_64/linux.h
@@ -0,0 +1,105 @@
+/* Definitions of target machine for GNU compiler,
+   for Sw_64 Linux-based GNU systems.
+   Copyright (C) 1996-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()					       \
+  do									   \
+    {									  \
+      builtin_define ("__gnu_linux__");					\
+      builtin_define ("_LONGLONG");					    \
+      builtin_define_std ("linux");					    \
+      builtin_define_std ("unix");					     \
+      builtin_assert ("system=linux");					 \
+      builtin_assert ("system=unix");					  \
+      builtin_assert ("system=posix");					 \
+      /* The GNU C++ standard library requires this.  */		       \
+      if (c_dialect_cxx ())						    \
+	builtin_define ("_GNU_SOURCE");					\
+    }									  \
+  while (0)
+
+#undef LIB_SPEC
+#define LIB_SPEC							       \
+  "%{pthread:-lpthread} \
+   %{shared:-lc} \
+   %{!shared: %{profile:-lc_p}%{!profile:-lc}}"
+
+#undef CPP_SPEC
+#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* Don't care about faults in the prologue.  */
+#undef TARGET_CAN_FAULT_IN_PROLOGUE
+#define TARGET_CAN_FAULT_IN_PROLOGUE 1
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#ifdef SINGLE_LIBC
+#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)
+#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
+#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
+#undef OPTION_MUSL
+#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)
+#else
+#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)
+#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
+#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
+#undef OPTION_MUSL
+#define OPTION_MUSL (linux_libc == LIBC_MUSL)
+#endif
+
+/* Determine what functions are present at the runtime;
+   this includes full c99 runtime and sincos.  */
+#undef TARGET_LIBC_HAS_FUNCTION
+#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
+
+#define TARGET_POSIX_IO
+
+#define LINK_GCC_C_SEQUENCE_SPEC					       \
+  "%{static|static-pie:--start-group} %G %L \
+   %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}"
+
+/* Use --as-needed -lgcc_s for eh support.  */
+#ifdef HAVE_LD_AS_NEEDED
+#define USE_LD_AS_NEEDED 1
+#endif
+
+/* Define if long doubles should be mangled as 'g'.  */
+#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+
+/* -mcpu=native handling only makes sense with compiler running on
+   an Sw_64 chip.  */
+#if defined __sw_64__ || defined __sw_64
+extern const char *
+host_detect_local_cpu (int argc, const char **argv);
+#define EXTRA_SPEC_FUNCTIONS {"local_cpu_detect", host_detect_local_cpu},
+
+#define MCPU_MTUNE_NATIVE_SPECS						\
+  " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		      \
+  " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
+#else
+#define MCPU_MTUNE_NATIVE_SPECS ""
+#endif
+
+#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
diff --git a/gcc/config/sw_64/m32.md b/gcc/config/sw_64/m32.md
new file mode 100644
index 0000000000000000000000000000000000000000..1ca033acb90dff7c74b81a2b5f2192e3b1c85fbf
--- /dev/null
+++ b/gcc/config/sw_64/m32.md
@@ -0,0 +1,227 @@
+;; Machine description for Sw_64 for GNU C compiler
+;; Copyright (C) 1992-2020 Free Software Foundation, Inc.
+;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "*addsi_er_high_l"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (match_operand:SI 1 "register_operand" "r")
+		 (high:SI (match_operand:SI 2 "local_symbolic_operand"))))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
+  "ldih %0,%2(%1)\t\t!gprelhigh"
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(high:SI (match_operand:SI 1 "local_symbolic_operand")))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (match_dup 2) (high:SI (match_dup 1))))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_insn "movsi_er_high_g"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:SI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_LITERAL))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldw %0,%2(%1)\t\t!literal";
+  else
+    return "ldw %0,%2(%1)\t\t!literal!%3";
+}
+  [(set_attr "type" "ldsym")])
+
+(define_insn "movsi_er_high_g32"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_LITERAL))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldw %0,%2(%1)\t\t!literal";
+  else
+    return "ldw %0,%2(%1)\t\t!literal!%3";
+}
+  [(set_attr "type" "ldsym")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "global_symbolic_operand"))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:SI [(match_dup 2)
+		    (match_dup 1)
+		    (const_int 0)] UNSPEC_LITERAL))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_insn "*movsi_er_low_l"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
+		   (match_operand:SI 2 "local_symbolic_operand")))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+{
+  if (true_regnum (operands[1]) == 29)
+    return "ldi %0,%2(%1)\t\t!gprel";
+  else
+    return "ldi %0,%2(%1)\t\t!gprellow";
+}
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "small_symbolic_operand"))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(lo_sum:SI (match_dup 2) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "local_symbolic_operand"))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:SI (match_dup 2) (high:SI (match_dup 1))))
+   (set (match_dup 0)
+	(lo_sum:SI (match_dup 0) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_expand "prologue_ldgp_32"
+  [(set (match_dup 0)
+	(unspec_volatile:SI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec_volatile:SI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))]
+  "TARGET_SW_M32"
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 27);
+  operands[2] = (TARGET_EXPLICIT_RELOCS
+		 ? GEN_INT (sw_64_next_sequence_number++)
+		 : const0_rtx);
+})
+
+(define_insn "*ldgp_er_1_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_LDGP1))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+  "ldih %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*ldgp_er_2_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand")]
+		   UNSPEC_LDGP2))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+  "ldi %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_er_2_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_PLDGP2))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+{
+  if (stfp3_flag == 1)
+    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec1";
+  else if (stfp3_flag == 2)
+    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec3";
+  else if (flag_fpcr_set == 1)
+    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec1";
+  else if (flag_fpcr_set == 3)
+    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec0";
+  else
+    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:";
+}
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_1_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_LDGP1))]
+  "TARGET_SW_M32"
+{
+  if (stfp3_flag == 1)
+    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec1";
+  else if (stfp3_flag == 2)
+    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec3";
+  else if (flag_fpcr_set == 1)
+    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec1";
+  else if (flag_fpcr_set == 3)
+    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec0";
+  else
+    return "ldgp %0,0(%1)\n$%~..ng:";
+}
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_2_32"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_PLDGP2))]
+  "TARGET_SW_M32"
+)
+
+(define_insn "*call_value_osf_1_er_32"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:SI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+  "@
+   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%1\t\t!samegp
+   ldw $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*call_osf_1_er_noreturn_32"
+  [(call (mem:DI (match_operand:SI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   call $26,($27),0
+   bsr $26,%0\t\t!samegp
+   ldw $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1_er_32"
+  [(call (mem:DI (match_operand:SI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
+  "@
+   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%0\t\t!samegp
+   ldw $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "jsr")
+   (set_attr "length" "12,*,16")])
diff --git a/gcc/config/sw_64/netbsd.h b/gcc/config/sw_64/netbsd.h
new file mode 100644
index 0000000000000000000000000000000000000000..c605c8df2aa2e67fea3a0b8bf9d45f9950008869
--- /dev/null
+++ b/gcc/config/sw_64/netbsd.h
@@ -0,0 +1,69 @@
+/* Definitions of target machine for GNU compiler,
+   for Sw_64 NetBSD systems.
+   Copyright (C) 1998-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()					       \
+  do									   \
+    {									  \
+      NETBSD_OS_CPP_BUILTINS_ELF ();					   \
+    }									  \
+  while (0)
+
+/* NetBSD doesn't use the LANGUAGE* built-ins.  */
+#undef SUBTARGET_LANGUAGE_CPP_BUILTINS
+#define SUBTARGET_LANGUAGE_CPP_BUILTINS()	/* nothing.  */
+
+/* Show that we need a GP when profiling.  */
+#undef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 1
+
+/* Provide a CPP_SPEC appropriate for NetBSD/sw_64.  We use
+   this to pull in CPP specs that all NetBSD configurations need.  */
+
+#undef CPP_SPEC
+#define CPP_SPEC NETBSD_CPP_SPEC
+
+#undef EXTRA_SPECS
+#define EXTRA_SPECS							    \
+  {"netbsd_link_spec", NETBSD_LINK_SPEC_ELF},				  \
+    {"netbsd_entry_point", NETBSD_ENTRY_POINT},				\
+    {"netbsd_endfile_spec", NETBSD_ENDFILE_SPEC},
+
+/* Provide a LINK_SPEC appropriate for a NetBSD/sw_64 ELF target.  */
+
+#undef LINK_SPEC
+#define LINK_SPEC							      \
+  "%{G*} %{relax:-relax} \
+   %{O*:-O3} %{!O*:-O1} \
+   %(netbsd_link_spec)"
+
+#define NETBSD_ENTRY_POINT "__start"
+
+/* Provide an ENDFILE_SPEC appropriate for NetBSD/sw_64 ELF.  Here we
+   add crtend.o, which provides part of the support for getting
+   C++ file-scope static objects deconstructed after exiting "main".
+
+   We also need to handle the GCC option `-ffast-math'.  */
+
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC							   \
+  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \
+   %(netbsd_endfile_spec)"
+
+#define HAVE_ENABLE_EXECUTE_STACK
diff --git a/gcc/config/sw_64/openbsd.h b/gcc/config/sw_64/openbsd.h
new file mode 100644
index 0000000000000000000000000000000000000000..6b20e8dc65f0e29f714e4ed379adf7f0c893ce43
--- /dev/null
+++ b/gcc/config/sw_64/openbsd.h
@@ -0,0 +1,74 @@
+/* Configuration file for an sw_64 OpenBSD target.
+   Copyright (C) 1999-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Controlling the compilation driver.  */
+#undef TARGET_DEFAULT
+#define TARGET_DEFAULT (MASK_FPREGS | MASK_IEEE | MASK_IEEE_CONFORMANT)
+
+#define LINK_SPEC							      \
+  "%{!shared:%{!nostdlib:%{!r*:%{!e*:-e __start}}}} \
+   %{shared:-shared} %{R*} \
+   %{static:-Bstatic} \
+   %{!static:-Bdynamic} \
+   %{rdynamic:-export-dynamic} \
+   %{assert*} \
+   %{!dynamic-linker:-dynamic-linker /usr/libexec/ld.so}"
+
+/* As an elf system, we need crtbegin/crtend stuff.  */
+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC							 \
+  "\
+	%{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} \
+	%{!p:%{!static:crt0%O%s} %{static:%{nopie:crt0%O%s} \
+	%{!nopie:rcrt0%O%s}}}} crtbegin%O%s} %{shared:crtbeginS%O%s}"
+#undef ENDFILE_SPEC
+#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
+
+/* run-time target specifications.  */
+#define TARGET_OS_CPP_BUILTINS()		\
+    do {					\
+	OPENBSD_OS_CPP_BUILTINS_ELF();		\
+	OPENBSD_OS_CPP_BUILTINS_LP64();		\
+    } while (0)
+
+/* Layout of source language data types.  */
+
+/* This must agree with <machine/_types.h> */
+#undef SIZE_TYPE
+#define SIZE_TYPE "long unsigned int"
+
+#undef PTRDIFF_TYPE
+#define PTRDIFF_TYPE "long int"
+
+#undef INTMAX_TYPE
+#define INTMAX_TYPE "long long int"
+
+#undef UINTMAX_TYPE
+#define UINTMAX_TYPE "long long unsigned int"
+
+#undef WCHAR_TYPE
+#define WCHAR_TYPE "int"
+
+#undef WCHAR_TYPE_SIZE
+#define WCHAR_TYPE_SIZE 32
+
+#undef WINT_TYPE
+#define WINT_TYPE "int"
+
+#define LOCAL_LABEL_PREFIX "."
diff --git a/gcc/config/sw_64/predicates.md b/gcc/config/sw_64/predicates.md
new file mode 100644
index 0000000000000000000000000000000000000000..c82d5c7de27c20facd65eb83d758a8ec834d0230
--- /dev/null
+++ b/gcc/config/sw_64/predicates.md
@@ -0,0 +1,649 @@
+;; Predicate definitions for Sw_64.
+;; Copyright (C) 2004-2020 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;; Return 1 if OP is the zero constant for MODE.
+(define_predicate "const0_operand"
+  (and (match_code "const_int,const_wide_int,const_double,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Returns true if OP is either the constant zero or a register.
+(define_predicate "reg_or_0_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const0_operand")))
+
+;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or
+;; any register.
+(define_predicate "reg_or_6bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant in the range of 0-31 (for a shift) or
+;; any register.
+(define_predicate "reg_or_5bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is an 8-bit constant.
+(define_predicate "cint8_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")))
+
+;; Return 1 if OP is an 8-bit constant or any register.
+(define_predicate "reg_or_8bit_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant or any register.
+(define_predicate "reg_or_cint_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "const_int_operand")))
+
+;; Return 1 if the operand is a valid second operand to an add insn.
+(define_predicate "add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a valid second operand to a
+;; sign-extending add insn.
+(define_predicate "sext_add_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if the operand is a non-symbolic constant operand that
+;; does not satisfy add_operand.
+(define_predicate "non_add_const_operand"
+  (and (match_code "const_int,const_wide_int,const_double,const_vector")
+       (not (match_operand 0 "add_operand"))))
+
+;; Return 1 if the operand is a non-symbolic, nonzero constant operand.
+(define_predicate "non_zero_const_operand"
+  (and (match_code "const_int,const_wide_int,const_double,const_vector")
+       (not (match_test "op == CONST0_RTX (mode)"))))
+
+;; Return 1 if OP is the constant 1, 2 or 3.
+(define_predicate "const123_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 3)")))
+
+;; Return 1 if OP is the constant 2 or 3.
+(define_predicate "const23_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 2 || INTVAL (op) == 3")))
+
+;; Return 1 if OP is the constant 4 or 8.
+(define_predicate "const48_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) == 4 || INTVAL (op) == 8")))
+
+;; Return 1 if OP is a valid first operand to an AND insn.
+(define_predicate "and_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
+		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
+		 || zap_mask (INTVAL (op))")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a valid first operand to an IOR or XOR insn.
+(define_predicate "or_operand"
+  (if_then_else (match_code "const_int")
+    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
+		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100")
+    (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a constant that is the width, in bits, of an integral
+;; mode not larger than DImode.
+(define_predicate "mode_width_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT i = INTVAL (op);
+  return i == 8 || i == 16 || i == 32 || i == 64;
+})
+
+;; Return 1 if OP is a constant that is a mask of ones of width of an
+;; integral machine mode not larger than DImode.
+(define_predicate "mode_mask_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT value = INTVAL (op);
+
+  if (value == 0xff)
+    return 1;
+  if (value == 0xffff)
+    return 1;
+  if (value == 0xffffffff)
+    return 1;
+  if (value == -1)
+    return 1;
+
+  return 0;
+})
+
+;; Return 1 if OP is a multiple of 8 less than 64.
+(define_predicate "mul8_operand"
+  (match_code "const_int")
+{
+  unsigned HOST_WIDE_INT i = INTVAL (op);
+  return i < 64 && i % 8 == 0;
+})
+
+;; Return 1 if OP is a hard floating-point register.
+(define_predicate "hard_fp_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (SUBREG_P (op))
+    op = SUBREG_REG (op);
+  return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
+})
+
+;; Return 1 if OP is a hard general register.
+(define_predicate "hard_int_register_operand"
+  (match_operand 0 "register_operand")
+{
+  if (SUBREG_P (op))
+    op = SUBREG_REG (op);
+  return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
+})
+
+;; Return 1 if OP is a valid operand for the source of a move insn.
+(define_predicate "input_operand"
+  (match_operand 0 "general_operand")
+{
+  switch (GET_CODE (op))
+    {
+    case LABEL_REF:
+    case SYMBOL_REF:
+    case CONST:
+      if (TARGET_EXPLICIT_RELOCS)
+	{
+	  /* We don't split symbolic operands into something unintelligable
+	     until after reload, but we do not wish non-small, non-global
+	     symbolic operands to be reconstructed from their high/lo_sum
+	     form.  */
+	  return (small_symbolic_operand (op, mode)
+		  || global_symbolic_operand (op, mode)
+		  || gotdtp_symbolic_operand (op, mode)
+		  || gottp_symbolic_operand (op, mode));
+	}
+      /* VMS still has a 32-bit mode.  */
+      return mode == ptr_mode || mode == Pmode;
+
+    case HIGH:
+      return (TARGET_EXPLICIT_RELOCS
+	      && local_symbolic_operand (XEXP (op, 0), mode));
+
+    case REG:
+      return 1;
+
+    case SUBREG:
+      if (register_operand (op, mode))
+	return 1;
+      /* fall through.  */
+    case MEM:
+      return ((TARGET_BWX || (mode != HImode && mode != QImode))
+	      && general_operand (op, mode));
+
+    case CONST_WIDE_INT:
+    case CONST_DOUBLE:
+      return op == CONST0_RTX (mode);
+
+    case CONST_VECTOR:
+      if (reload_in_progress || reload_completed)
+	return sw_64_legitimate_constant_p (mode, op);
+      return op == CONST0_RTX (mode);
+
+    case CONST_INT:
+      if (mode == QImode || mode == HImode)
+	return true;
+      if (reload_in_progress || reload_completed)
+	return sw_64_legitimate_constant_p (mode, op);
+      return add_operand (op, mode);
+
+    default:
+      gcc_unreachable ();
+    }
+  return 0;
+})
+
+;; Return 1 if OP is a SYMBOL_REF for a function known to be in this
+;; file, and in the same section as the current function.
+
+(define_predicate "samegp_function_operand"
+  (match_code "symbol_ref")
+{
+  /* Easy test for recursion.  */
+  if (op == XEXP (DECL_RTL (current_function_decl), 0))
+    return true;
+
+  /* Functions that are not local can be overridden, and thus may
+     not share the same gp.  */
+  if (! SYMBOL_REF_LOCAL_P (op))
+    return false;
+
+  /* If -msmall-data is in effect, assume that there is only one GP
+     for the module, and so any local symbol has this property.  We
+     need explicit relocations to be able to enforce this for symbols
+     not defined in this unit of translation, however.  */
+  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
+    return true;
+
+  /* Functions that are not external are defined in this UoT,
+     and thus must share the same gp.  */
+  return ! SYMBOL_REF_EXTERNAL_P (op);
+})
+
+;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr.
+(define_predicate "direct_call_operand"
+  (match_operand 0 "samegp_function_operand")
+{
+  /* If profiling is implemented via linker tricks, we can't jump
+     to the nogp alternate entry point.  Note that crtl->profile
+     would not be correct, since that doesn't indicate if the target
+     function uses profiling.  */
+  /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test,
+     but is approximately correct for the SYSV ABIs.  Don't know
+     what to do for VMS, NT, or UMK.  */
+  if (!TARGET_PROFILING_NEEDS_GP && profile_flag)
+    return false;
+
+  /* Must be a function.  In some cases folks create thunks in static
+     data structures and then make calls to them.  If we allow the
+     direct call, we'll get an error from the linker about !samegp reloc
+     against a symbol without a .prologue directive.  */
+  if (!SYMBOL_REF_FUNCTION_P (op))
+    return false;
+
+  /* Must be "near" so that the branch is assumed to reach.  With
+     -msmall-text, this is assumed true of all local symbols.  Since
+     we've already checked samegp, locality is already assured.  */
+  if (TARGET_SMALL_TEXT)
+    return true;
+
+  return false;
+})
+
+;; Return 1 if OP is a valid operand for the MEM of a CALL insn.
+;;
+;; For TARGET_ABI_SYSV, we want to restrict to R27 or a pseudo.
+
+(define_predicate "call_operand"
+  (ior (match_code "symbol_ref")
+       (and (match_code "reg")
+	    (ior (not (match_test "TARGET_ABI_OSF"))
+		 (not (match_test "HARD_REGISTER_P (op)"))
+		 (match_test "REGNO (op) == R27_REG")))))
+
+;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing
+;; a (non-tls) variable known to be defined in this file.
+(define_predicate "local_symbolic_operand"
+  (match_code "label_ref,const,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) == LABEL_REF)
+    return 1;
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  return (SYMBOL_REF_LOCAL_P (op)
+	  && !SYMBOL_REF_WEAK (op)
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
+;; known to be defined in this file in the small data area.
+(define_predicate "small_symbolic_operand"
+  (match_code "const,symbol_ref")
+{
+  HOST_WIDE_INT ofs = 0, max_ofs = 0;
+
+  if (! TARGET_SMALL_DATA)
+    return false;
+
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    {
+      ofs = INTVAL (XEXP (XEXP (op, 0), 1));
+      op = XEXP (XEXP (op, 0), 0);
+    }
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return false;
+
+  /* ??? There's no encode_section_info equivalent for the rtl
+     constant pool, so SYMBOL_FLAG_SMALL never gets set.  */
+  if (CONSTANT_POOL_ADDRESS_P (op))
+    {
+      max_ofs = GET_MODE_SIZE (get_pool_mode (op));
+      if (max_ofs > g_switch_value)
+	return false;
+    }
+  else if (SYMBOL_REF_LOCAL_P (op)
+	    && SYMBOL_REF_SMALL_P (op)
+	    && !SYMBOL_REF_WEAK (op)
+	    && !SYMBOL_REF_TLS_MODEL (op))
+    {
+      if (SYMBOL_REF_DECL (op))
+	max_ofs = tree_to_uhwi (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op)));
+    }
+  else
+    return false;
+
+  /* Given that we know that the GP is always 8 byte aligned, we can
+     always adjust by 7 without overflowing.  */
+  if (max_ofs < 8)
+    max_ofs = 8;
+
+  /* Since we know this is an object in a small data section, we know the
+     entire section is addressable via GP.  We don't know where the section
+     boundaries are, but we know the entire object is within.  */
+  /*return IN_RANGE (ofs, 0, max_ofs - 1);*/
+
+  if (sw_64_gprel_size == 16)
+    return IN_RANGE (ofs, 0, max_ofs - 1);
+  if (sw_64_gprel_size == 32)
+    return false;
+
+})
+
+;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
+;; not known (or known not) to be defined in this file.
+(define_predicate "global_symbolic_operand"
+  (match_code "const,symbol_ref")
+{
+  if (GET_CODE (op) == CONST
+      && GET_CODE (XEXP (op, 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
+    op = XEXP (XEXP (op, 0), 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op))
+	  && !SYMBOL_REF_TLS_MODEL (op));
+})
+
+;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
+;; possibly with an offset.
+(define_predicate "symbolic_operand"
+  (ior (match_code "symbol_ref,label_ref")
+       (and (match_code "const")
+	    (match_code "plus" "0")
+	    (match_code "symbol_ref,label_ref" "00")
+	    (match_code "const_int" "01"))))
+
+;; Return true if OP is valid for 16-bit DTP relative relocations.
+(define_predicate "dtp16_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 32-bit DTP relative relocations.
+(define_predicate "dtp32_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 64-bit DTP relative relocations.
+(define_predicate "gotdtp_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)")))
+
+;; Return true if OP is valid for 16-bit TP relative relocations.
+(define_predicate "tp16_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)")))
+
+;; Return true if OP is valid for 32-bit TP relative relocations.
+(define_predicate "tp32_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)")))
+
+;; Return true if OP is valid for 64-bit TP relative relocations.
+(define_predicate "gottp_symbolic_operand"
+  (and (match_code "const")
+       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)")))
+
+;; Return 1 if this memory address is a known aligned register plus
+;; a constant.  It must be a valid address.  This means that we can do
+;; this as an aligned reference plus some offset.
+;;
+;; Take into account what reload will do.  Oh god this is awful.
+;; The horrible comma-operator construct below is to prevent genrecog
+;; from thinking that this predicate accepts REG and SUBREG.  We don't
+;; use recog during reload, so pretending these codes are accepted
+;; pessimizes things a tad.
+
+(define_special_predicate "aligned_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (match_code "mem"))
+{
+  rtx base;
+  int offset;
+
+  if (MEM_ALIGN (op) >= 32)
+    return 1;
+
+  op = XEXP (op, 0);
+
+  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
+     sorts of constructs.  Dig for the real base register.  */
+  if (reload_in_progress
+      && GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (op, 0), 0);
+      offset = INTVAL (XEXP (op, 1));
+    }
+  else
+    {
+      if (! memory_address_p (mode, op))
+	return 0;
+      if (GET_CODE (op) == PLUS)
+	{
+	  base = XEXP (op, 0);
+	  offset = INTVAL (XEXP (op, 1));
+	}
+      else
+	{
+	  base = op;
+	  offset = 0;
+	}
+    }
+
+  if (offset % GET_MODE_SIZE (mode))
+    return 0;
+
+  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
+})
+
+;; Similar, but return 1 if OP is a MEM which is not alignable.
+
+(define_special_predicate "unaligned_memory_operand"
+  (ior (match_test "op = resolve_reload_operand (op), 0")
+       (match_code "mem"))
+{
+  rtx base;
+  int offset;
+
+  if (MEM_ALIGN (op) >= 32)
+    return 0;
+
+  op = XEXP (op, 0);
+
+  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
+     sorts of constructs.  Dig for the real base register.  */
+  if (reload_in_progress
+      && GET_CODE (op) == PLUS
+      && GET_CODE (XEXP (op, 0)) == PLUS)
+    {
+      base = XEXP (XEXP (op, 0), 0);
+      offset = INTVAL (XEXP (op, 1));
+    }
+  else
+    {
+      if (! memory_address_p (mode, op))
+	return 0;
+      if (GET_CODE (op) == PLUS)
+	{
+	  base = XEXP (op, 0);
+	  offset = INTVAL (XEXP (op, 1));
+	}
+      else
+	{
+	  base = op;
+	  offset = 0;
+	}
+    }
+
+  if (offset % GET_MODE_SIZE (mode))
+    return 1;
+
+  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
+})
+
+;; Return 1 if OP is any memory location.  During reload a pseudo matches.
+(define_special_predicate "any_memory_operand"
+  (match_code "mem,reg,subreg")
+{
+  if (SUBREG_P (op))
+    op = SUBREG_REG (op);
+
+  if (MEM_P (op))
+    return true;
+  if (reload_in_progress && REG_P (op))
+    {
+      unsigned regno = REGNO (op);
+      if (HARD_REGISTER_NUM_P (regno))
+	return false;
+      else
+	return reg_renumber[regno] < 0;
+    }
+
+  return false;
+})
+
+;; Returns 1 if OP is not an eliminable register.
+;;
+;; This exists to cure a pathological failure in the s8addq (et al) patterns,
+;;
+;;	long foo () { long t; bar (); return (long) &t * 26107; }
+;;
+;; which run afoul of a hack in reload to cure a (presumably) similar
+;; problem with lea-type instructions on other targets.  But there is
+;; one of us and many of them, so work around the problem by selectively
+;; preventing combine from making the optimization.
+
+(define_predicate "reg_not_elim_operand"
+  (match_operand 0 "register_operand")
+{
+  if (SUBREG_P (op))
+    op = SUBREG_REG (op);
+  return op != frame_pointer_rtx && op != arg_pointer_rtx;
+})
+
+;; Accept a register, but not a subreg of any kind.  This allows us to
+;; avoid pathological cases in reload wrt data movement common in
+;; int->fp conversion.  */
+(define_predicate "reg_no_subreg_operand"
+  (and (match_code "reg")
+       (match_operand 0 "register_operand")))
+
+;; Return 1 if OP is a valid Sw_64 comparison operator for "cbranch"
+;; instructions.
+(define_predicate "sw_64_cbranch_operator"
+  (ior (match_operand 0 "ordered_comparison_operator")
+       (match_code "ordered,unordered")))
+
+;; Return 1 if OP is a valid Sw_64 comparison operator for "cmp" style
+;; instructions.
+(define_predicate "sw_64_comparison_operator"
+  (match_code "eq,le,lt,leu,ltu"))
+
+;; Similarly, but with swapped operands.
+(define_predicate "sw_64_swapped_comparison_operator"
+  (match_code "eq,ge,gt,gtu"))
+
+;; Return 1 if OP is a valid Sw_64 comparison operator against zero
+;; for "bcc" style instructions.
+(define_predicate "sw_64_zero_comparison_operator"
+  (match_code "eq,ne,le,lt,leu,ltu"))
+
+;; Return 1 if OP is a signed comparison operation.
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,le,lt,ge,gt"))
+
+;; Return 1 if OP is a valid Sw_64 floating point comparison operator.
+(define_predicate "sw_64_fp_comparison_operator"
+  (match_code "eq,le,lt,unordered"))
+
+;; Return 1 if this is a divide or modulus operator.
+(define_predicate "divmod_operator"
+  (match_code "div,mod,udiv,umod"))
+
+;; Return 1 if this is a float->int conversion operator.
+(define_predicate "fix_operator"
+  (match_code "fix,unsigned_fix"))
+
+;; Recognize an addition operation that includes a constant.  Used to
+;; convince reload to canonize (plus (plus reg c1) c2) during register
+;; elimination.
+
+(define_predicate "addition_operation"
+  (and (match_code "plus")
+       (match_test "register_operand (XEXP (op, 0), mode)
+		    && satisfies_constraint_K (XEXP (op, 1))")))
+
+;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a
+;; small symbolic operand until after reload.  At which point we need
+;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref))
+;; so that sched2 has the proper dependency information.  */
+(define_predicate "some_small_symbolic_operand"
+  (match_code "set,parallel,prefetch,unspec,unspec_volatile")
+{
+  /* Avoid search unless necessary.  */
+  if (!TARGET_EXPLICIT_RELOCS || !reload_completed)
+    return false;
+  return some_small_symbolic_operand_int (op);
+})
+
+;; Accept a register, or a memory if BWX is enabled.
+(define_predicate "reg_or_bwx_memory_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "TARGET_BWX")
+	    (match_operand 0 "memory_operand"))))
+
+;; Accept a memory whose address is only a register.
+(define_predicate "mem_noofs_operand"
+  (and (match_code "mem")
+       (match_code "reg" "0")))
+
+(define_predicate "sw_64_branch_combination"
+  (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu"))
+
+(define_predicate "sw_64_swapped_branch_combination"
+  (match_code "ne,ge,gt,geu,gtu"))
+
diff --git a/gcc/config/sw_64/sw6.md b/gcc/config/sw_64/sw6.md
new file mode 100644
index 0000000000000000000000000000000000000000..615ddae7079debcceeda8a62b64c2b623b42e01e
--- /dev/null
+++ b/gcc/config/sw_64/sw6.md
@@ -0,0 +1,181 @@
+;; Scheduling description for Sw_64 SW6.
+;;   Copyright (C) 2002-2020 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; SW6 can issue 4 insns per clock.  It's out-of-order, so this isn't
+; expected to help over-much, but a precise description can be important
+; for software pipelining.
+;
+; SW6 has two symmetric pairs ("clusters") of two asymmetric integer
+; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+;
+; ??? The clusters have independent register files that are re-synced
+; every cycle.  Thus there is one additional cycle of latency between
+; insns issued on different clusters.  Possibly model that by duplicating
+; all EBOX insn_reservations that can issue to either cluster, increasing
+; all latencies by one, and adding bypasses within the cluster.
+;
+; ??? In addition, instruction order affects cluster issue.
+
+(define_automaton "sw6_0,sw6_1")
+(define_cpu_unit "sw6_u0,sw6_u1,sw6_l0,sw6_l1" "sw6_0")
+(define_reservation "sw6_u" "sw6_u0|sw6_u1")
+(define_reservation "sw6_l" "sw6_l0|sw6_l1")
+(define_reservation "sw6_ebox" "sw6_u|sw6_l")
+
+(define_cpu_unit "sw6_fa" "sw6_1")
+(define_cpu_unit "sw6_fm,sw6_fst0,sw6_fst1" "sw6_0")
+(define_reservation "sw6_fst" "sw6_fst0|sw6_fst1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "sw6_multi" 1
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "multi"))
+  "sw6_u0+sw6_u1+sw6_l0+sw6_l1+sw6_fa+sw6_fm+sw6_fst0+sw6_fst1")
+
+; Integer loads take at least 3 clocks, and only issue to lower units.
+; adjust_cost still factors in user-specified memory latency, so return 1 here.
+(define_insn_reservation "sw6_ild" 4
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "ild,ldsym,ld_l"))
+  "sw6_l")
+
+(define_insn_reservation "sw6_ist" 4
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "ist,st_c"))
+  "sw6_l")
+
+(define_insn_reservation "sw6_mb" 1
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "mb"))
+  "sw6_l1")
+
+; FP loads take at least 4 clocks.  adjust_cost still factors
+; in user-specified memory latency, so return 2 here.
+(define_insn_reservation "sw6_fld" 2
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "fld"))
+  "sw6_l")
+
+; The FPU communicates with memory and the integer register file
+; via two fp store units.  We need a slot in the fst immediately, and
+; a slot in LOW after the operand data is ready.  At which point the
+; data may be moved either to the store queue or the integer register
+; file and the insn retired.
+
+(define_insn_reservation "sw6_fst" 3
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "fst"))
+  "sw6_fst,nothing,sw6_l")
+
+; Arithmetic goes anywhere.
+(define_insn_reservation "sw6_arith" 1
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "iadd,ilog,icmp"))
+  "sw6_ebox")
+
+; Motion video insns also issue only to U0, and take three ticks.
+(define_insn_reservation "sw6_mvi" 3
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "mvi"))
+  "sw6_u0")
+
+; Shifts issue to upper units.
+(define_insn_reservation "sw6_shift" 1
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "shift"))
+  "sw6_u")
+
+; Multiplies issue only to U1, and all take 7 ticks.
+(define_insn_reservation "sw6_imul" 7
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "imul"))
+  "sw6_u1")
+
+; Conditional moves decompose into two independent primitives, each taking
+; one cycle.  Since sw6 is out-of-order, we can't see anything but two cycles.
+(define_insn_reservation "sw6_icmov" 2
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "icmov"))
+  "sw6_ebox,sw6_ebox")
+
+; Integer branches issue to upper units
+(define_insn_reservation "sw6_ibr" 1
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "ibr,callpal"))
+  "sw6_u")
+
+; Calls only issue to L0.
+(define_insn_reservation "sw6_jsr" 1
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "call"))
+  "sw6_l0")
+
+; Ftoi/itof only issue to lower pipes.
+(define_insn_reservation "sw6_itof" 3
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "itof"))
+  "sw6_l")
+
+(define_insn_reservation "sw6_ftoi" 3
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "ftoi"))
+  "sw6_fst,nothing,sw6_l")
+
+(define_insn_reservation "sw6_fmul" 4
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "fmul"))
+  "sw6_fm")
+
+(define_insn_reservation "sw6_fadd" 4
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "fadd,fcpys,fbr"))
+  "sw6_fa")
+
+(define_bypass 6 "sw6_fmul,sw6_fadd" "sw6_fst,sw6_ftoi")
+
+(define_insn_reservation "sw6_fcmov" 8
+  (and (eq_attr "tune" "sw6")
+       (eq_attr "type" "fcmov"))
+  "sw6_fa,nothing*3,sw6_fa")
+
+(define_bypass 10 "sw6_fcmov" "sw6_fst,sw6_ftoi")
+
+(define_insn_reservation "sw6_fdivsf" 12
+  (and (eq_attr "tune" "sw6")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  "sw6_fa*9")
+
+(define_insn_reservation "sw6_fdivdf" 15
+  (and (eq_attr "tune" "sw6")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  "sw6_fa*12")
+
+(define_insn_reservation "sw6_sqrtsf" 18
+  (and (eq_attr "tune" "sw6")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "si")))
+  "sw6_fa*15")
+
+(define_insn_reservation "sw6_sqrtdf" 33
+  (and (eq_attr "tune" "sw6")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "di")))
+  "sw6_fa*30")
diff --git a/gcc/config/sw_64/sw8.md b/gcc/config/sw_64/sw8.md
new file mode 100644
index 0000000000000000000000000000000000000000..414908dbc8fbec5a1951da54666e7c23d64e9eb1
--- /dev/null
+++ b/gcc/config/sw_64/sw8.md
@@ -0,0 +1,181 @@
+;; Scheduling description for Sw_64 SW8.
+;;   Copyright (C) 2002-2020 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+; SW8 can issue 4 insns per clock.  It's out-of-order, so this isn't
+; expected to help over-much, but a precise description can be important
+; for software pipelining.
+;
+; SW8 has two symmetric pairs ("clusters") of two asymmetric integer
+; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
+;
+; ??? The clusters have independent register files that are re-synced
+; every cycle.  Thus there is one additional cycle of latency between
+; insns issued on different clusters.  Possibly model that by duplicating
+; all EBOX insn_reservations that can issue to either cluster, increasing
+; all latencies by one, and adding bypasses within the cluster.
+;
+; ??? In addition, instruction order affects cluster issue.
+
+(define_automaton "sw8_0,sw8_1")
+(define_cpu_unit "sw8_u0,sw8_u1,sw8_l0,sw8_l1" "sw8_0")
+(define_reservation "sw8_u" "sw8_u0|sw8_u1")
+(define_reservation "sw8_l" "sw8_l0|sw8_l1")
+(define_reservation "sw8_ebox" "sw8_u|sw8_l")
+
+(define_cpu_unit "sw8_fa" "sw8_1")
+(define_cpu_unit "sw8_fm,sw8_fst0,sw8_fst1" "sw8_0")
+(define_reservation "sw8_fst" "sw8_fst0|sw8_fst1")
+
+; Assume type "multi" single issues.
+(define_insn_reservation "sw8_multi" 1
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "multi"))
+  "sw8_u0+sw8_u1+sw8_l0+sw8_l1+sw8_fa+sw8_fm+sw8_fst0+sw8_fst1")
+
+; Integer loads take at least 3 clocks, and only issue to lower units.
+; adjust_cost still factors in user-specified memory latency, so return 1 here.
+(define_insn_reservation "sw8_ild" 4
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "ild,ldsym,ld_l"))
+  "sw8_l")
+
+(define_insn_reservation "sw8_ist" 4
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "ist,st_c"))
+  "sw8_l")
+
+(define_insn_reservation "sw8_mb" 1
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "mb"))
+  "sw8_l1")
+
+; FP loads take at least 4 clocks.  adjust_cost still factors
+; in user-specified memory latency, so return 2 here.
+(define_insn_reservation "sw8_fld" 2
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "fld"))
+  "sw8_l")
+
+; The FPU communicates with memory and the integer register file
+; via two fp store units.  We need a slot in the fst immediately, and
+; a slot in LOW after the operand data is ready.  At which point the
+; data may be moved either to the store queue or the integer register
+; file and the insn retired.
+
+(define_insn_reservation "sw8_fst" 3
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "fst"))
+  "sw8_fst,nothing,sw8_l")
+
+; Arithmetic goes anywhere.
+(define_insn_reservation "sw8_arith" 1
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "iadd,ilog,icmp"))
+  "sw8_ebox")
+
+; Motion video insns also issue only to U0, and take three ticks.
+(define_insn_reservation "sw8_mvi" 3
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "mvi"))
+  "sw8_u0")
+
+; Shifts issue to upper units.
+(define_insn_reservation "sw8_shift" 1
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "shift"))
+  "sw8_u")
+
+; Multiplies issue only to U1, and all take 7 ticks.
+(define_insn_reservation "sw8_imul" 7
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "imul"))
+  "sw8_u1")
+
+; Conditional moves decompose into two independent primitives, each taking
+; one cycle.  Since sw8 is out-of-order, we can't see anything but two cycles.
+(define_insn_reservation "sw8_icmov" 2
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "icmov"))
+  "sw8_ebox,sw8_ebox")
+
+; Integer branches issue to upper units
+(define_insn_reservation "sw8_ibr" 1
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "ibr,callpal"))
+  "sw8_u")
+
+; Calls only issue to L0.
+(define_insn_reservation "sw8_jsr" 1
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "call"))
+  "sw8_l0")
+
+; Ftoi/itof only issue to lower pipes.
+(define_insn_reservation "sw8_itof" 3
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "itof"))
+  "sw8_l")
+
+(define_insn_reservation "sw8_ftoi" 3
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "ftoi"))
+  "sw8_fst,nothing,sw8_l")
+
+(define_insn_reservation "sw8_fmul" 4
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "fmul"))
+  "sw8_fm")
+
+(define_insn_reservation "sw8_fadd" 4
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "fadd,fcpys,fbr"))
+  "sw8_fa")
+
+(define_bypass 6 "sw8_fmul,sw8_fadd" "sw8_fst,sw8_ftoi")
+
+(define_insn_reservation "sw8_fcmov" 8
+  (and (eq_attr "tune" "sw8")
+       (eq_attr "type" "fcmov"))
+  "sw8_fa,nothing*3,sw8_fa")
+
+(define_bypass 10 "sw8_fcmov" "sw8_fst,sw8_ftoi")
+
+(define_insn_reservation "sw8_fdivsf" 12
+  (and (eq_attr "tune" "sw8")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "si")))
+  "sw8_fa*9")
+
+(define_insn_reservation "sw8_fdivdf" 15
+  (and (eq_attr "tune" "sw8")
+       (and (eq_attr "type" "fdiv")
+	    (eq_attr "opsize" "di")))
+  "sw8_fa*12")
+
+(define_insn_reservation "sw8_sqrtsf" 18
+  (and (eq_attr "tune" "sw8")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "si")))
+  "sw8_fa*15")
+
+(define_insn_reservation "sw8_sqrtdf" 33
+  (and (eq_attr "tune" "sw8")
+       (and (eq_attr "type" "fsqrt")
+	    (eq_attr "opsize" "di")))
+  "sw8_fa*30")
diff --git a/gcc/config/sw_64/sw_64-modes.def b/gcc/config/sw_64/sw_64-modes.def
new file mode 100644
index 0000000000000000000000000000000000000000..537a1b6545fb679d60babd417af2dcfc2916c2f1
--- /dev/null
+++ b/gcc/config/sw_64/sw_64-modes.def
@@ -0,0 +1,27 @@
+/* Sw_64 extra machine modes.
+   Copyright (C) 2003-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* 128-bit floating point.  This gets reset in sw_64_option_override
+   if VAX float format is in use.  */
+FLOAT_MODE (TF, 16, ieee_quad_format);
+
+/* Vector modes.  */
+VECTOR_MODES (INT, 8);	/*       V8QI V4HI V2SI.  */
+VECTOR_MODE (INT, QI, 4);     /*		 V4QI.  */
+VECTOR_MODE (INT, QI, 2);     /*		 V2QI.  */
diff --git a/gcc/config/sw_64/sw_64-passes.def b/gcc/config/sw_64/sw_64-passes.def
new file mode 100644
index 0000000000000000000000000000000000000000..9d3964cdb3507cf6263c4c7c93131628942ae3ea
--- /dev/null
+++ b/gcc/config/sw_64/sw_64-passes.def
@@ -0,0 +1,21 @@
+/* Description of target passes for Sw_64
+   Copyright (C) 2016-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+  INSERT_PASS_AFTER (pass_convert_to_eh_region_ranges, 1, pass_handle_trap_shadows);
+  INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_insns);
diff --git a/gcc/config/sw_64/sw_64-protos.h b/gcc/config/sw_64/sw_64-protos.h
new file mode 100644
index 0000000000000000000000000000000000000000..c20a1cfece2ba34e8b9f0eca200ec1669d3cc673
--- /dev/null
+++ b/gcc/config/sw_64/sw_64-protos.h
@@ -0,0 +1,146 @@
+/* Prototypes for sw_64.c functions used in the md file & elsewhere.
+   Copyright (C) 1999-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+extern int sw_64_next_sequence_number;
+
+extern void
+literal_section (void);
+extern int zap_mask (HOST_WIDE_INT);
+extern bool
+direct_return (void);
+
+extern HOST_WIDE_INT
+sw_64_initial_elimination_offset (unsigned int, unsigned int);
+extern void
+sw_64_expand_prologue (void);
+extern void
+sw_64_expand_epilogue (void);
+extern void
+sw_64_output_filename (FILE *, const char *);
+
+extern bool sw_64_legitimate_constant_p (machine_mode, rtx);
+extern rtx
+sw_64_legitimize_reload_address (rtx, machine_mode, int, int, int);
+
+extern rtx split_small_symbolic_operand (rtx);
+
+extern void
+get_aligned_mem (rtx, rtx *, rtx *);
+extern rtx get_unaligned_address (rtx);
+extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT);
+extern enum reg_class sw_64_preferred_reload_class (rtx, enum reg_class);
+
+extern void sw_64_set_memflags (rtx, rtx);
+extern bool
+sw_64_split_const_mov (machine_mode, rtx *);
+extern bool
+sw_64_expand_mov (machine_mode, rtx *);
+extern bool
+sw_64_expand_mov_nobwx (machine_mode, rtx *);
+extern void
+sw_64_expand_movmisalign (machine_mode, rtx *);
+extern void sw_64_emit_floatuns (rtx[]);
+extern rtx sw_64_emit_conditional_move (rtx, machine_mode);
+extern void
+sw_64_split_tmode_pair (rtx[], machine_mode, bool);
+extern void sw_64_split_tfmode_frobsign (rtx[], rtx (*) (rtx, rtx, rtx));
+extern void
+sw_64_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT, HOST_WIDE_INT, int);
+extern void sw_64_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT,
+					  HOST_WIDE_INT);
+extern int sw_64_expand_block_move (rtx[]);
+extern int sw_64_expand_block_clear (rtx[]);
+extern rtx sw_64_expand_zap_mask (HOST_WIDE_INT);
+extern void sw_64_expand_builtin_vector_binop (rtx (*) (rtx, rtx, rtx),
+					       machine_mode, rtx, rtx, rtx);
+
+extern rtx
+sw_64_return_addr (int, rtx);
+extern rtx
+sw_64_gp_save_rtx (void);
+extern void
+sw_64_initialize_trampoline (rtx, rtx, rtx, int, int, int);
+
+extern rtx sw_64_va_arg (tree, tree);
+
+extern void
+sw_64_start_function (FILE *, const char *, tree);
+extern void
+sw_64_end_function (FILE *, const char *, tree);
+
+extern bool sw_64_find_lo_sum_using_gp (rtx);
+
+#ifdef REAL_VALUE_TYPE
+extern int
+check_float_value (machine_mode, REAL_VALUE_TYPE *, int);
+#endif
+
+#ifdef RTX_CODE
+extern void sw_64_emit_conditional_branch (rtx[], machine_mode);
+extern bool sw_64_emit_setcc (rtx[], machine_mode);
+extern int sw_64_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx);
+extern void sw_64_emit_xfloating_arith (enum rtx_code, rtx[]);
+extern void sw_64_emit_xfloating_cvt (enum rtx_code, rtx[]);
+extern void sw_64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx,
+				   enum memmodel);
+extern void
+sw_64_split_compare_and_swap (rtx op[]);
+extern void
+sw_64_expand_compare_and_swap_12 (rtx op[]);
+extern void
+sw_64_split_compare_and_swap_12 (rtx op[]);
+extern void
+sw_64_split_atomic_exchange (rtx op[]);
+extern void
+sw_64_expand_atomic_exchange_12 (rtx op[]);
+extern void
+sw_64_split_atomic_exchange_12 (rtx op[]);
+#endif
+
+extern void
+sw_64_split_atomic_cas (rtx op[]);
+extern void
+sw_64_split_atomic_cas_12 (rtx op[]);
+
+extern rtx
+sw_64_use_linkage (rtx, bool, bool);
+
+extern rtx unicosmk_add_call_info_word (rtx);
+
+extern bool some_small_symbolic_operand_int (rtx);
+extern int
+tls_symbolic_operand_1 (rtx, int, int);
+extern rtx resolve_reload_operand (rtx);
+
+namespace gcc {
+class context;
+}
+class rtl_opt_pass;
+
+extern rtl_opt_pass *
+make_pass_handle_trap_shadows (gcc::context *);
+extern rtl_opt_pass *
+make_pass_align_insns (gcc::context *);
+
+extern void
+sw_64_emit_rsqrt (rtx, rtx, bool);
+
+extern void
+sw_64_emit_swdiv (rtx, rtx, rtx, bool);
+extern rtx gen_move_reg (rtx);
diff --git a/gcc/config/sw_64/sw_64.c b/gcc/config/sw_64/sw_64.c
new file mode 100644
index 0000000000000000000000000000000000000000..68b85b828968bfeab12fb15f8d0ff303f37f913b
--- /dev/null
+++ b/gcc/config/sw_64/sw_64.c
@@ -0,0 +1,10058 @@
+/* Subroutines used for code generation on the Sw_64.
+   Copyright (C) 1992-2020 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "memmodel.h"
+#include "gimple.h"
+#include "df.h"
+#include "predict.h"
+#include "tm_p.h"
+#include "ssa.h"
+#include "expmed.h"
+#include "optabs.h"
+#include "regs.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "diagnostic-core.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "explow.h"
+#include "expr.h"
+#include "reload.h"
+#include "except.h"
+#include "common/common-target.h"
+#include "debug.h"
+#include "langhooks.h"
+#include "cfgrtl.h"
+#include "tree-pass.h"
+#include "context.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "tree-stdarg.h"
+#include "tm-constrs.h"
+#include "libfuncs.h"
+#include "opts.h"
+#include "builtins.h"
+#include "rtl-iter.h"
+#include "asan.h"
+
+#include "flags.h"
+/* This file should be included last.  */
+#include "target-def.h"
+
+/* Specify which cpu to schedule for.  */
+enum processor_type sw_64_tune;
+
+/* Which cpu we're generating code for.  */
+enum processor_type sw_64_cpu;
+
+static const char *const sw_64_cpu_name[] = {"sw6", "sw8a"};
+
+/* Specify how accurate floating-point traps need to be.  */
+
+enum sw_64_trap_precision sw_64_tp;
+
+/* Specify the floating-point rounding mode.  */
+
+enum sw_64_fp_rounding_mode sw_64_fprm;
+
+/* Specify which things cause traps.  */
+
+enum sw_64_fp_trap_mode sw_64_fptm;
+
+/* Nonzero if inside of a function, because the Sw_64 asm can't
+   handle .files inside of functions.  */
+
+static int inside_function = FALSE;
+
+/* The number of cycles of latency we should assume on memory reads.  */
+
+static int sw_64_memory_latency = 3;
+
+/* Whether the function needs the GP.  */
+
+static int sw_64_function_needs_gp;
+
+/* The assembler name of the current function.  */
+
+static const char *sw_64_fnname;
+
+/* The next explicit relocation sequence number.  */
+extern GTY (()) int sw_64_next_sequence_number;
+int sw_64_next_sequence_number = 1;
+
+int stfp3_flag;
+extern int flag_fpcr_set;
+
+int warning_sbt_num = 0;
+int warning_cbt_num = 0;
+
+/* The literal and gpdisp sequence numbers for this insn, as printed
+   by %# and %* respectively.  */
+extern GTY (()) int sw_64_this_literal_sequence_number;
+extern GTY (()) int sw_64_this_gpdisp_sequence_number;
+int sw_64_this_literal_sequence_number;
+int sw_64_this_gpdisp_sequence_number;
+
+/* Costs of various operations on the different architectures.  */
+
+struct sw_64_rtx_cost_data
+{
+  unsigned char fp_add;
+  unsigned char fp_mult;
+  unsigned char fp_div_sf;
+  unsigned char fp_div_df;
+  unsigned char int_mult_si;
+  unsigned char int_mult_di;
+  unsigned char int_shift;
+  unsigned char int_cmov;
+  unsigned short int_div;
+};
+
+static struct sw_64_rtx_cost_data const sw_64_rtx_cost_data[PROCESSOR_MAX + 1]
+  = {
+    {
+      /* sw6b */
+      COSTS_N_INSNS (6),  /* fp_add */
+      COSTS_N_INSNS (6),  /* fp_mult */
+      COSTS_N_INSNS (19), /* fp_div_sf */
+      COSTS_N_INSNS (19), /* fp_div_df */
+      COSTS_N_INSNS (4),  /* int_mult_si */
+      COSTS_N_INSNS (4),  /* int_mult_di */
+      COSTS_N_INSNS (1),  /* int_shift */
+      COSTS_N_INSNS (1),  /* int_cmov */
+      COSTS_N_INSNS (83), /* int_div */
+    },
+    {
+      /* sw8a */
+      COSTS_N_INSNS (6),  /* fp_add */
+      COSTS_N_INSNS (6),  /* fp_mult */
+      COSTS_N_INSNS (19), /* fp_div_sf */
+      COSTS_N_INSNS (19), /* fp_div_df */
+      COSTS_N_INSNS (4),  /* int_mult_si */
+      COSTS_N_INSNS (4),  /* int_mult_di */
+      COSTS_N_INSNS (1),  /* int_shift */
+      COSTS_N_INSNS (1),  /* int_cmov */
+      COSTS_N_INSNS (20), /* int_div */
+    },
+    {
+      /* rtx-cost */
+      COSTS_N_INSNS (6),  /* fp_add */
+      COSTS_N_INSNS (6),  /* fp_mult */
+      COSTS_N_INSNS (19), /* fp_div_sf */
+      COSTS_N_INSNS (19), /* fp_div_df */
+      COSTS_N_INSNS (4),  /* int_mult_si */
+      COSTS_N_INSNS (4),  /* int_mult_di */
+      COSTS_N_INSNS (3),  /* int_shift */
+      COSTS_N_INSNS (1),  /* int_cmov */
+      COSTS_N_INSNS (20), /* int_div */
+    },
+};
+
+/* Similar but tuned for code size instead of execution latency.  The
+   extra +N is fractional cost tuning based on latency.  It's used to
+   encourage use of cheaper insns like shift, but only if there's just
+   one of them.  */
+
+static struct sw_64_rtx_cost_data const sw_64_rtx_cost_size = {
+  COSTS_N_INSNS (1),     /* fp_add */
+  COSTS_N_INSNS (1),     /* fp_mult */
+  COSTS_N_INSNS (1),     /* fp_div_sf */
+  COSTS_N_INSNS (1) + 1, /* fp_div_df */
+  COSTS_N_INSNS (1) + 1, /* int_mult_si */
+  COSTS_N_INSNS (1) + 2, /* int_mult_di */
+  COSTS_N_INSNS (1),     /* int_shift */
+  COSTS_N_INSNS (1),     /* int_cmov */
+  COSTS_N_INSNS (6),     /* int_div */
+};
+
+/* Get the number of args of a function in one of two ways.  */
+#define NUM_ARGS crtl->args.info
+
+#define REG_PV 27
+#define REG_RA 26
+
+/* Declarations of static functions.  */
+static struct machine_function *
+sw_64_init_machine_status (void);
+static rtx
+sw_64_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
+static void
+sw_64_handle_trap_shadows (void);
+static void
+sw_64_align_insns (void);
+static void
+sw_64_override_options_after_change (void);
+
+static unsigned int
+rest_of_handle_trap_shadows (void)
+{
+  sw_64_handle_trap_shadows ();
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_handle_trap_shadows = {
+  RTL_PASS,
+  "trap_shadows", /* name */
+  OPTGROUP_NONE,  /* optinfo_flags */
+  TV_NONE,	/* tv_id */
+  0,		  /* properties_required */
+  0,		  /* properties_provided */
+  0,		  /* properties_destroyed */
+  0,		  /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_handle_trap_shadows : public rtl_opt_pass
+{
+public:
+  pass_handle_trap_shadows (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_handle_trap_shadows, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+  {
+    return sw_64_tp != SW_64_TP_PROG || flag_exceptions;
+  }
+
+  virtual unsigned int execute (function *)
+  {
+    return rest_of_handle_trap_shadows ();
+  }
+
+}; // class pass_handle_trap_shadows
+
+} // namespace
+
+rtl_opt_pass *
+make_pass_handle_trap_shadows (gcc::context *ctxt)
+{
+  return new pass_handle_trap_shadows (ctxt);
+}
+
+static unsigned int
+rest_of_align_insns (void)
+{
+  sw_64_align_insns ();
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_align_insns = {
+  RTL_PASS,
+  "align_insns",  /* name */
+  OPTGROUP_NONE,  /* optinfo_flags */
+  TV_NONE,	/* tv_id */
+  0,		  /* properties_required */
+  0,		  /* properties_provided */
+  0,		  /* properties_destroyed */
+  0,		  /* todo_flags_start */
+  TODO_df_finish, /* todo_flags_finish */
+};
+
+class pass_align_insns : public rtl_opt_pass
+{
+public:
+  pass_align_insns (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_align_insns, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+  {
+    /* Due to the number of extra memb insns, don't bother fixing up
+       alignment when trap precision is instruction.  Moreover, we can
+       only do our job when sched2 is run.  */
+    return ((sw_64_tune != PROCESSOR_SW6 && sw_64_tune != PROCESSOR_SW8)
+	    && optimize && !optimize_size && sw_64_tp != SW_64_TP_INSN
+	    && flag_schedule_insns_after_reload);
+  }
+
+  virtual unsigned int execute (function *) { return rest_of_align_insns (); }
+
+}; // class pass_align_insns
+
+} // namespace
+
+rtl_opt_pass *
+make_pass_align_insns (gcc::context *ctxt)
+{
+  return new pass_align_insns (ctxt);
+}
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+/* Implement TARGET_MANGLE_TYPE.  */
+
+static const char *
+sw_64_mangle_type (const_tree type)
+{
+  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
+      && TARGET_LONG_DOUBLE_128)
+    return "g";
+
+  /* For all other types, use normal C++ mangling.  */
+  return NULL;
+}
+#endif
+
+/* Parse target option strings.  */
+
+static void
+sw_64_option_override (void)
+{
+  static const struct cpu_table
+  {
+    const char *const name;
+    const enum processor_type processor;
+    const int flags;
+    const unsigned short line_size; /* in bytes.  */
+    const unsigned short l1_size;   /* in kb.  */
+    const unsigned short l2_size;   /* in kb.  */
+  } cpu_table[] = {
+    {"sw6a", PROCESSOR_SW6, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW6A, 128, 32,
+     512},
+    {"sw6b", PROCESSOR_SW6, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW6B, 128, 32,
+     512},
+    {"sw8a", PROCESSOR_SW8, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW8A, 128, 32,
+     512},
+  };
+
+  int const ct_size = ARRAY_SIZE (cpu_table);
+  int line_size = 0, l1_size = 0, l2_size = 0;
+  int i;
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  /* Default to full IEEE compliance mode for Go language.  */
+  if (strcmp (lang_hooks.name, "GNU Go") == 0
+      && !(target_flags_explicit & MASK_IEEE))
+    target_flags |= MASK_IEEE;
+
+  sw_64_fprm = SW_64_FPRM_NORM;
+  sw_64_tp = SW_64_TP_PROG;
+  sw_64_fptm = SW_64_FPTM_N;
+
+  if (TARGET_IEEE)
+    {
+      sw_64_tp = SW_64_TP_INSN;
+      sw_64_fptm = SW_64_FPTM_SU;
+    }
+  if (TARGET_IEEE_WITH_INEXACT)
+    {
+      sw_64_tp = SW_64_TP_INSN;
+      sw_64_fptm = SW_64_FPTM_SUI;
+    }
+  if (TARGET_IEEE_MAIN)
+    {
+      sw_64_tp = SW_64_TP_INSN;
+      sw_64_fptm = SW_64_FPTM_SU;
+    }
+
+  if (sw_64_tp_string)
+    {
+      if (!strcmp (sw_64_tp_string, "p"))
+	sw_64_tp = SW_64_TP_PROG;
+      else if (!strcmp (sw_64_tp_string, "f"))
+	sw_64_tp = SW_64_TP_FUNC;
+      else if (!strcmp (sw_64_tp_string, "i"))
+	sw_64_tp = SW_64_TP_INSN;
+      else
+	error ("bad value %qs for %<-mtrap-precision%> switch",
+	       sw_64_tp_string);
+    }
+
+  if (sw_64_fprm_string)
+    {
+      if (!strcmp (sw_64_fprm_string, "n"))
+	sw_64_fprm = SW_64_FPRM_NORM;
+      else if (!strcmp (sw_64_fprm_string, "m"))
+	sw_64_fprm = SW_64_FPRM_MINF;
+      else if (!strcmp (sw_64_fprm_string, "c"))
+	sw_64_fprm = SW_64_FPRM_CHOP;
+      else if (!strcmp (sw_64_fprm_string, "d"))
+	sw_64_fprm = SW_64_FPRM_DYN;
+      else
+	error ("bad value %qs for %<-mfp-rounding-mode%> switch",
+	       sw_64_fprm_string);
+    }
+
+  if (sw_64_fptm_string)
+    {
+      if (strcmp (sw_64_fptm_string, "n") == 0)
+	sw_64_fptm = SW_64_FPTM_N;
+      else if (strcmp (sw_64_fptm_string, "u") == 0)
+	sw_64_fptm = SW_64_FPTM_U;
+      else if (strcmp (sw_64_fptm_string, "su") == 0)
+	sw_64_fptm = SW_64_FPTM_SU;
+      else if (strcmp (sw_64_fptm_string, "sui") == 0)
+	sw_64_fptm = SW_64_FPTM_SUI;
+      else
+	error ("bad value %qs for %<-mfp-trap-mode%> switch",
+	       sw_64_fptm_string);
+    }
+
+  if (sw_64_cpu_string)
+    {
+      for (i = 0; i < ct_size; i++)
+	if (!strcmp (sw_64_cpu_string, cpu_table[i].name))
+	  {
+	    sw_64_tune = sw_64_cpu = cpu_table[i].processor;
+	    line_size = cpu_table[i].line_size;
+	    l1_size = cpu_table[i].l1_size;
+	    l2_size = cpu_table[i].l2_size;
+	    target_flags &= ~(MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX
+			      | MASK_SW6A | MASK_SW6B | MASK_SW8A);
+	    target_flags |= cpu_table[i].flags;
+	    break;
+	  }
+      if (i == ct_size)
+	error ("bad value %qs for %<-mcpu%> switch", sw_64_cpu_string);
+    }
+
+  if (sw_64_tune_string)
+    {
+      for (i = 0; i < ct_size; i++)
+	if (!strcmp (sw_64_tune_string, cpu_table[i].name))
+	  {
+	    sw_64_tune = cpu_table[i].processor;
+	    line_size = cpu_table[i].line_size;
+	    l1_size = cpu_table[i].l1_size;
+	    l2_size = cpu_table[i].l2_size;
+	    break;
+	  }
+      if (i == ct_size)
+	error ("bad value %qs for %<-mtune%> switch", sw_64_tune_string);
+    }
+  if (line_size)
+    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+			 param_l1_cache_line_size, line_size);
+  if (l1_size)
+    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+			 param_l1_cache_size, l1_size);
+  if (l2_size)
+    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+			 param_l2_cache_size, l2_size);
+
+  // generate prefetch for cases like stream add
+  if (flag_sw_prefetch_add == 1)
+    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+			 param_prefetch_min_insn_to_mem_ratio, 2);
+
+  if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch)
+    flag_prefetch_loop_arrays = 1;
+
+   /* set simultaneous prefetches and latency for sw
+    *  *     need add some conditions to decide what the cpu kind.  */
+  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+		       param_simultaneous_prefetches, 8);
+
+  if (flag_sw_prefetch_unroll == 1)
+    {
+      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+			   param_max_unrolled_insns, 400);
+    }
+  /* Do some sanity checks on the above options.  */
+
+  if ((sw_64_fptm == SW_64_FPTM_SU || sw_64_fptm == SW_64_FPTM_SUI)
+      && sw_64_tp != SW_64_TP_INSN && sw_64_cpu != PROCESSOR_SW6
+      && sw_64_cpu != PROCESSOR_SW8)
+    {
+      warning (0, "fp software completion requires %<-mtrap-precision=i%>");
+      sw_64_tp = SW_64_TP_INSN;
+    }
+
+  if (sw_64_cpu == PROCESSOR_SW6 || sw_64_cpu == PROCESSOR_SW8)
+    {
+      /* Except for SW6 pass 1 (not released), we always have precise
+	 arithmetic traps.  Which means we can do software completion
+	 without minding trap shadows.  */
+      sw_64_tp = SW_64_TP_PROG;
+    }
+
+  if (TARGET_FLOAT_VAX)
+    {
+      if (sw_64_fprm == SW_64_FPRM_MINF || sw_64_fprm == SW_64_FPRM_DYN)
+	{
+	  warning (0, "rounding mode not supported for VAX floats");
+	  sw_64_fprm = SW_64_FPRM_NORM;
+	}
+      if (sw_64_fptm == SW_64_FPTM_SUI)
+	{
+	  warning (0, "trap mode not supported for VAX floats");
+	  sw_64_fptm = SW_64_FPTM_SU;
+	}
+      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
+	warning (0, "128-bit long double not supported for VAX floats");
+      target_flags &= ~MASK_LONG_DOUBLE_128;
+    }
+
+  {
+    char *end;
+    int lat;
+
+    if (!sw_64_mlat_string)
+      sw_64_mlat_string = "L1";
+
+    if (ISDIGIT ((unsigned char) sw_64_mlat_string[0])
+	&& (lat = strtol (sw_64_mlat_string, &end, 10), *end == '\0'))
+      ;
+    else if ((sw_64_mlat_string[0] == 'L' || sw_64_mlat_string[0] == 'l')
+	     && ISDIGIT ((unsigned char) sw_64_mlat_string[1])
+	     && sw_64_mlat_string[2] == '\0')
+      {
+	static int cache_latency[][4] = {
+	  {3, 12, 30}, /* sw6 -- Bcache from LMbench.  */
+	  //	  { 4, 15, 90 },	/* sw6b -- Bcache from LMbench.  */
+	  {3, 7, 11}, /* sw8a -- Bcache from LMbench.  */
+	};
+	if (flag_sw_rtx_cost)
+	  {
+	    cache_latency[sw_64_tune][0] = 3;
+	    cache_latency[sw_64_tune][1] = 7;
+	    cache_latency[sw_64_tune][2] = 11;
+	  }
+
+	lat = sw_64_mlat_string[1] - '0';
+	if (lat <= 0 || lat > 3 || cache_latency[sw_64_tune][lat - 1] == -1)
+	  {
+	    warning (0, "L%d cache latency unknown for %s", lat,
+		     sw_64_cpu_name[sw_64_tune]);
+	    lat = 3;
+	  }
+	else
+	  lat = cache_latency[sw_64_tune][lat - 1];
+      }
+    else if (!strcmp (sw_64_mlat_string, "main"))
+      {
+	/* Most current memories have about 370ns latency.  This is
+	   a reasonable guess for a fast cpu.  */
+	lat = 150;
+      }
+    else
+      {
+	warning (0, "bad value %qs for %<-mmemory-latency%>",
+		 sw_64_mlat_string);
+	lat = 3;
+      }
+
+    sw_64_memory_latency = lat;
+  }
+
+  /* Default the definition of "small data" to 8 bytes.  */
+  if (!global_options_set.x_g_switch_value)
+    g_switch_value = 8;
+
+  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
+  if (flag_pic == 1)
+    target_flags |= MASK_SMALL_DATA;
+  else if (flag_pic == 2)
+    target_flags &= ~MASK_SMALL_DATA;
+
+  sw_64_override_options_after_change ();
+
+  /* Register variables and functions with the garbage collector.  */
+
+  /* Set up function hooks.  */
+  init_machine_status = sw_64_init_machine_status;
+
+  /* Tell the compiler when we're using VAX floating point.  */
+  if (TARGET_FLOAT_VAX)
+    {
+      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
+      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
+      REAL_MODE_FORMAT (TFmode) = NULL;
+    }
+
+#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
+  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
+    target_flags |= MASK_LONG_DOUBLE_128;
+#endif
+}
+
+/* Implement targetm.override_options_after_change.  */
+
+static void
+sw_64_override_options_after_change (void)
+{
+  /* Align labels and loops for optimal branching.  */
+  /* ??? Kludge these by not doing anything if we don't optimize.  */
+  if (optimize > 0)
+    {
+      if (flag_align_loops && !str_align_loops)
+	str_align_loops = "16";
+      if (flag_align_jumps && !str_align_jumps)
+	str_align_jumps = "16";
+    }
+  if (flag_align_functions && !str_align_functions)
+    str_align_functions = "16";
+}
+
+/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
+
+int
+zap_mask (HOST_WIDE_INT value)
+{
+  int i;
+
+  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++, value >>= 8)
+    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
+      return 0;
+
+  return 1;
+}
+
+/* Return true if OP is valid for a particular TLS relocation.
+   We are already guaranteed that OP is a CONST.  */
+
+int
+tls_symbolic_operand_1 (rtx op, int size, int unspec)
+{
+  op = XEXP (op, 0);
+
+  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
+    return 0;
+  op = XVECEXP (op, 0, 0);
+
+  if (GET_CODE (op) != SYMBOL_REF)
+    return 0;
+
+  switch (SYMBOL_REF_TLS_MODEL (op))
+    {
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      return unspec == UNSPEC_DTPREL && size == sw_64_tls_size;
+    case TLS_MODEL_INITIAL_EXEC:
+      return unspec == UNSPEC_TPREL && size == 64;
+    case TLS_MODEL_LOCAL_EXEC:
+      return unspec == UNSPEC_TPREL && size == sw_64_tls_size;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Used by aligned_memory_operand and unaligned_memory_operand to
+   resolve what reload is going to do with OP if it's a register.  */
+
+rtx
+resolve_reload_operand (rtx op)
+{
+  if (reload_in_progress)
+    {
+      rtx tmp = op;
+      if (SUBREG_P (tmp))
+	tmp = SUBREG_REG (tmp);
+      if (REG_P (tmp) && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
+	{
+	  op = reg_equiv_memory_loc (REGNO (tmp));
+	  if (op == 0)
+	    return 0;
+	}
+    }
+  return op;
+}
+
+/* The scalar modes supported differs from the default check-what-c-supports
+   version in that sometimes TFmode is available even when long double
+   indicates only DFmode.  */
+
+static bool
+sw_64_scalar_mode_supported_p (scalar_mode mode)
+{
+  switch (mode)
+    {
+    case E_QImode:
+    case E_HImode:
+    case E_SImode:
+    case E_DImode:
+    case E_TImode: /* via optabs.c.  */
+      return true;
+
+    case E_SFmode:
+    case E_DFmode:
+      return true;
+
+    case E_TFmode:
+      return TARGET_HAS_XFLOATING_LIBS;
+
+    default:
+      return false;
+    }
+}
+
+/* Sw_64 implements a couple of integer vector mode operations when
+   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
+   which allows the vectorizer to operate on e.g. move instructions,
+   or when expand_vector_operations can do something useful.  */
+
+static bool
+sw_64_vector_mode_supported_p (machine_mode mode)
+{
+  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
+}
+
+/* Return the TLS model to use for SYMBOL.  */
+
+static enum tls_model
+tls_symbolic_operand_type (rtx symbol)
+{
+  enum tls_model model;
+
+  if (GET_CODE (symbol) != SYMBOL_REF)
+    return TLS_MODEL_NONE;
+  model = SYMBOL_REF_TLS_MODEL (symbol);
+
+  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
+  if (model == TLS_MODEL_LOCAL_EXEC && sw_64_tls_size == 64)
+    model = TLS_MODEL_INITIAL_EXEC;
+
+  return model;
+}
+
+/* Return true if the function DECL will share the same GP as any
+   function in the current unit of translation.  */
+
+static bool
+decl_has_samegp (const_tree decl)
+{
+  /* Functions that are not local can be overridden, and thus may
+     not share the same gp.  */
+  if (!(*targetm.binds_local_p) (decl))
+    return false;
+
+  /* If -msmall-data is in effect, assume that there is only one GP
+     for the module, and so any local symbol has this property.  We
+     need explicit relocations to be able to enforce this for symbols
+     not defined in this unit of translation, however.  */
+  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
+    return true;
+
+  /* Functions that are not external are defined in this UoT.  */
+  /* ??? Irritatingly, static functions not yet emitted are still
+     marked "external".  Apply this to non-static functions only.  */
+  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
+}
+
+/* Return true if EXP should be placed in the small data section.  */
+
+static bool
+sw_64_in_small_data_p (const_tree exp)
+{
+  /* We want to merge strings, so we never consider them small data.  */
+  if (TREE_CODE (exp) == STRING_CST)
+    return false;
+
+  /* Functions are never in the small data area.  Duh.  */
+  if (TREE_CODE (exp) == FUNCTION_DECL)
+    return false;
+
+  /* COMMON symbols are never small data.  */
+  if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
+    return false;
+
+  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
+    {
+      const char *section = DECL_SECTION_NAME (exp);
+      if (strcmp (section, ".sdata") == 0 || strcmp (section, ".sbss") == 0)
+	return true;
+    }
+  else
+    {
+      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
+
+      /* If this is an incomplete type with size 0, then we can't put it
+	 in sdata because it might be too big when completed.  */
+      if (size > 0 && size <= g_switch_value)
+	return true;
+    }
+
+  return false;
+}
+
+/* legitimate_address_p recognizes an RTL expression that is a valid
+   memory address for an instruction.  The MODE argument is the
+   machine mode for the MEM expression that wants to use this address.
+
+   For Sw_64, we have either a constant address or the sum of a
+   register and a constant address, or just a register.  For DImode,
+   any of those forms can be surrounded with an AND that clear the
+   low-order three bits; this is an "unaligned" access.  */
+
+static bool
+sw_64_legitimate_address_p (machine_mode mode, rtx x, bool strict)
+{
+  /* If this is an ldl_u type address, discard the outer AND.  */
+  if (((TARGET_SW_M32 && mode == SImode) || (!TARGET_SW_M32 && mode == DImode))
+      && GET_CODE (x) == AND && CONST_INT_P (XEXP (x, 1))
+      && INTVAL (XEXP (x, 1)) == -8)
+    x = XEXP (x, 0);
+
+  /* Discard non-paradoxical subregs.  */
+  if (SUBREG_P (x)
+      && (GET_MODE_SIZE (GET_MODE (x))
+	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+    x = SUBREG_REG (x);
+
+  /* Unadorned general registers are valid.  */
+  if (REG_P (x)
+      && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
+		 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
+    return true;
+
+  /* Constant addresses (i.e. +/- 32k) are valid.  */
+  if (CONSTANT_ADDRESS_P (x))
+    return true;
+
+  if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC
+       || GET_CODE (x) == POST_MODIFY)
+      && TARGET_SW8A
+      && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
+		 : NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))))
+    return true;
+  /* Register plus a small constant offset is valid.  */
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx ofs = XEXP (x, 1);
+      x = XEXP (x, 0);
+
+      /* Discard non-paradoxical subregs.  */
+      if (SUBREG_P (x)
+	  && (GET_MODE_SIZE (GET_MODE (x))
+	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+	x = SUBREG_REG (x);
+
+      if (REG_P (x))
+	{
+	  if (!strict && NONSTRICT_REG_OK_FP_BASE_P (x) && CONST_INT_P (ofs))
+	    return true;
+	  if ((strict ? STRICT_REG_OK_FOR_BASE_P (x)
+		      : NONSTRICT_REG_OK_FOR_BASE_P (x))
+	      && CONSTANT_ADDRESS_P (ofs))
+	    return true;
+	}
+    }
+
+  /* If we're managing explicit relocations, LO_SUM is valid, as are small
+     data symbols.  Avoid explicit relocations of modes larger than word
+     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
+  else if (TARGET_EXPLICIT_RELOCS && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
+    {
+      if (small_symbolic_operand (x, Pmode))
+	return true;
+
+      if (GET_CODE (x) == LO_SUM)
+	{
+	  rtx ofs = XEXP (x, 1);
+	  x = XEXP (x, 0);
+
+	  /* Discard non-paradoxical subregs.  */
+	  if (SUBREG_P (x)
+	      && (GET_MODE_SIZE (GET_MODE (x))
+		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
+	    x = SUBREG_REG (x);
+
+	  /* Must have a valid base register.  */
+	  if (!(REG_P (x)
+		&& (strict ? STRICT_REG_OK_FOR_BASE_P (x)
+			   : NONSTRICT_REG_OK_FOR_BASE_P (x))))
+	    return false;
+
+	  /* The symbol must be local.  */
+	  if (local_symbolic_operand (ofs, Pmode)
+	      || dtp32_symbolic_operand (ofs, Pmode)
+	      || tp32_symbolic_operand (ofs, Pmode))
+	    return true;
+	}
+    }
+
+  return false;
+}
+
+/* Build the SYMBOL_REF for __tls_get_addr.  */
+
+static GTY (()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  If we find one, return the new, valid address.  */
+
+static rtx
+sw_64_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
+{
+  HOST_WIDE_INT addend;
+
+  /* If the address is (plus reg const_int) and the CONST_INT is not a
+     valid offset, compute the high part of the constant and add it to
+     the register.  Then our address is (plus temp low-part-const).  */
+  if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))
+      && !CONSTANT_ADDRESS_P (XEXP (x, 1)))
+    {
+      addend = INTVAL (XEXP (x, 1));
+      x = XEXP (x, 0);
+      goto split_addend;
+    }
+
+  /* If the address is (const (plus FOO const_int)), find the low-order
+     part of the CONST_INT.  Then load FOO plus any high-order part of the
+     CONST_INT into a register.  Our address is (plus reg low-part-const).
+     This is done to reduce the number of GOT entries.  */
+  if (can_create_pseudo_p () && GET_CODE (x) == CONST
+      && GET_CODE (XEXP (x, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      addend = INTVAL (XEXP (XEXP (x, 0), 1));
+      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
+      goto split_addend;
+    }
+
+  /* If we have a (plus reg const), emit the load as in (2), then add
+     the two registers, and finally generate (plus reg low-part-const) as
+     our address.  */
+  if (can_create_pseudo_p () && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
+      && GET_CODE (XEXP (x, 1)) == CONST
+      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
+      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
+    {
+      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
+      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
+			       XEXP (XEXP (XEXP (x, 1), 0), 0), NULL_RTX, 1,
+			       OPTAB_LIB_WIDEN);
+      goto split_addend;
+    }
+
+  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
+     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
+     around +/- 32k offset.  */
+  if (TARGET_EXPLICIT_RELOCS && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
+      && symbolic_operand (x, Pmode))
+    {
+      rtx r0, r16, eqv, tga, tp, dest, seq;
+      rtx_insn *insn;
+
+      switch (tls_symbolic_operand_type (x))
+	{
+	case TLS_MODEL_NONE:
+	  break;
+
+	case TLS_MODEL_GLOBAL_DYNAMIC:
+	  {
+	    start_sequence ();
+
+	    r0 = gen_rtx_REG (Pmode, 0);
+	    r16 = gen_rtx_REG (Pmode, 16);
+	    tga = get_tls_get_addr ();
+	    dest = gen_reg_rtx (Pmode);
+	    seq = GEN_INT (sw_64_next_sequence_number++);
+	    if (sw_64_tls_gd == 16)
+	      {
+		emit_insn (
+		  gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
+	      }
+	    else if (sw_64_tls_gd == 32)
+	      {
+		eqv
+		  = gen_rtx_UNSPEC (Pmode,
+				    gen_rtvec (3, pic_offset_table_rtx, x, seq),
+				    UNSPEC_TLSRELGOT);
+
+		emit_insn (gen_rtx_SET (r16, eqv));
+		emit_insn (gen_movdi_er_tlsgd (r16, r16, x, seq));
+	      }
+	    rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
+	    insn = emit_call_insn (val);
+	    RTL_CONST_CALL_P (insn) = 1;
+	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
+
+	    insn = get_insns ();
+	    end_sequence ();
+
+	    emit_libcall_block (insn, dest, r0, x);
+	    return dest;
+	  }
+
+	case TLS_MODEL_LOCAL_DYNAMIC:
+	  {
+	    start_sequence ();
+
+	    r0 = gen_rtx_REG (Pmode, 0);
+	    r16 = gen_rtx_REG (Pmode, 16);
+	    tga = get_tls_get_addr ();
+	    scratch = gen_reg_rtx (Pmode);
+	    seq = GEN_INT (sw_64_next_sequence_number++);
+	    if (sw_64_tls_ldm == 16)
+	      {
+		emit_insn (
+		  gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
+	      }
+	    else if (sw_64_tls_ldm == 32)
+	      {
+		eqv
+		  = gen_rtx_UNSPEC (Pmode,
+				    gen_rtvec (3, pic_offset_table_rtx, x, seq),
+				    UNSPEC_TLSRELGOT);
+
+		emit_insn (gen_rtx_SET (r16, eqv));
+		emit_insn (gen_movdi_er_tlsldm (r16, r16, seq));
+	      }
+	    rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
+	    insn = emit_call_insn (val);
+	    RTL_CONST_CALL_P (insn) = 1;
+	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
+
+	    insn = get_insns ();
+	    end_sequence ();
+
+	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
+				  UNSPEC_TLSLDM_CALL);
+	    emit_libcall_block (insn, scratch, r0, eqv);
+
+	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
+	    eqv = gen_rtx_CONST (Pmode, eqv);
+
+	    if (sw_64_tls_size == 64)
+	      {
+		if (sw_64_tls_gotdtprel == 16)
+		  {
+		    dest = gen_reg_rtx (Pmode);
+		    emit_insn (gen_rtx_SET (dest, eqv));
+		    emit_insn (gen_adddi3 (dest, dest, scratch));
+		  }
+		else if (sw_64_tls_gotdtprel == 32)
+		  {
+		    seq = GEN_INT (sw_64_next_sequence_number++);
+		    eqv = gen_rtx_UNSPEC (Pmode,
+					  gen_rtvec (3, pic_offset_table_rtx, x,
+						     seq),
+					  UNSPEC_TLSRELGOT);
+		    dest = gen_reg_rtx (Pmode);
+		    emit_insn (gen_rtx_SET (dest, eqv));
+
+		    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, dest, x, seq),
+					  UNSPEC_GOTDTPREL);
+		    emit_insn (gen_rtx_SET (dest, eqv));
+
+		    emit_insn (gen_adddi3 (dest, dest, scratch));
+		  }
+		return dest;
+	      }
+	    if (sw_64_tls_size == 32)
+	      {
+		rtx temp = gen_rtx_HIGH (Pmode, eqv);
+		temp = gen_rtx_PLUS (Pmode, scratch, temp);
+		scratch = gen_reg_rtx (Pmode);
+		emit_insn (gen_rtx_SET (scratch, temp));
+	      }
+	    return gen_rtx_LO_SUM (Pmode, scratch, eqv);
+	  }
+
+	case TLS_MODEL_INITIAL_EXEC:
+	  {
+	    if (sw_64_tls_gottprel == 16)
+	      {
+		eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
+		eqv = gen_rtx_CONST (Pmode, eqv);
+		tp = gen_reg_rtx (Pmode);
+		scratch = gen_reg_rtx (Pmode);
+		dest = gen_reg_rtx (Pmode);
+
+		emit_insn (gen_get_thread_pointerdi (tp));
+		emit_insn (gen_rtx_SET (scratch, eqv));
+		emit_insn (gen_adddi3 (dest, tp, scratch));
+	      }
+	    else if (sw_64_tls_gottprel == 32)
+	      {
+		seq = GEN_INT (sw_64_next_sequence_number++);
+
+		tp = gen_reg_rtx (Pmode);
+		emit_insn (gen_get_thread_pointerdi (tp));
+
+		scratch = gen_reg_rtx (Pmode);
+		eqv
+		  = gen_rtx_UNSPEC (Pmode,
+				    gen_rtvec (3, pic_offset_table_rtx, x, seq),
+				    UNSPEC_TLSRELGOT);
+		emit_insn (gen_rtx_SET (scratch, eqv));
+		eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, scratch, x, seq),
+				      UNSPEC_TPREL);
+		emit_insn (gen_rtx_SET (scratch, eqv));
+
+		dest = gen_reg_rtx (Pmode);
+		emit_insn (gen_adddi3 (dest, tp, scratch));
+	      }
+	    return dest;
+	  }
+
+	case TLS_MODEL_LOCAL_EXEC:
+	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
+	  eqv = gen_rtx_CONST (Pmode, eqv);
+	  tp = gen_reg_rtx (Pmode);
+
+	  emit_insn (gen_get_thread_pointerdi (tp));
+	  if (sw_64_tls_size == 32)
+	    {
+	      rtx temp = gen_rtx_HIGH (Pmode, eqv);
+	      temp = gen_rtx_PLUS (Pmode, tp, temp);
+	      tp = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (tp, temp));
+	    }
+	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (local_symbolic_operand (x, Pmode))
+	{
+	  if (small_symbolic_operand (x, Pmode))
+	    return x;
+	  else
+	    {
+	      if (can_create_pseudo_p ())
+		scratch = gen_reg_rtx (Pmode);
+	      emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
+	      return gen_rtx_LO_SUM (Pmode, scratch, x);
+	    }
+	}
+    }
+
+  return NULL;
+
+split_addend:
+  {
+    HOST_WIDE_INT low, high;
+
+    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
+    addend -= low;
+    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
+    addend -= high;
+
+    if (addend)
+      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
+			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
+			       1, OPTAB_LIB_WIDEN);
+    if (high)
+      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
+			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
+			       1, OPTAB_LIB_WIDEN);
+
+    return plus_constant (Pmode, x, low);
+  }
+}
+
+/* Try machine-dependent ways of modifying an illegitimate address
+   to be legitimate.  Return X or the new, valid address.  */
+
+static rtx
+sw_64_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, machine_mode mode)
+{
+  rtx new_x = sw_64_legitimize_address_1 (x, NULL_RTX, mode);
+  return new_x ? new_x : x;
+}
+
+/* Return true if ADDR has an effect that depends on the machine mode it
+   is used for.  On the Sw_64 this is true only for the unaligned modes.
+   We can simplify the test since we know that the address must be valid.  */
+
+static bool
+sw_64_mode_dependent_address_p (const_rtx addr,
+				addr_space_t as ATTRIBUTE_UNUSED)
+{
+  return GET_CODE (addr) == AND;
+}
+
+/* Primarily this is required for TLS symbols, but given that our move
+   patterns *ought* to be able to handle any symbol at any time, we
+   should never be spilling symbolic operands to the constant pool, ever.  */
+
+static bool
+sw_64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
+{
+  enum rtx_code code = GET_CODE (x);
+  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
+}
+
+/* We do not allow indirect calls to be optimized into sibling calls, nor
+   can we allow a call to a function with a different GP to be optimized
+   into a sibcall.  */
+
+static bool
+sw_64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
+{
+  /* Can't do indirect tail calls, since we don't know if the target
+     uses the same GP.  */
+  if (!decl)
+    return false;
+
+  /* Otherwise, we can make a tail call if the target function shares
+     the same GP.  */
+  return decl_has_samegp (decl);
+}
+
+bool
+some_small_symbolic_operand_int (rtx x)
+{
+  subrtx_var_iterator::array_type array;
+  FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
+    {
+      rtx x = *iter;
+      /* Don't re-split.  */
+      if (GET_CODE (x) == LO_SUM)
+	iter.skip_subrtxes ();
+      else if (small_symbolic_operand (x, Pmode))
+	return true;
+    }
+  return false;
+}
+
+rtx
+split_small_symbolic_operand (rtx x)
+{
+  x = copy_insn (x);
+  subrtx_ptr_iterator::array_type array;
+  FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
+    {
+      rtx *ptr = *iter;
+      rtx x = *ptr;
+      /* Don't re-split.  */
+      if (GET_CODE (x) == LO_SUM)
+	iter.skip_subrtxes ();
+      else if (small_symbolic_operand (x, Pmode))
+	{
+	  *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
+	  iter.skip_subrtxes ();
+	}
+    }
+  return x;
+}
+
+/* Indicate that INSN cannot be duplicated.  This is true for any insn
+   that we've marked with gpdisp relocs, since those have to stay in
+   1-1 correspondence with one another.
+
+   Technically we could copy them if we could set up a mapping from one
+   sequence number to another, across the set of insns to be duplicated.
+   This seems overly complicated and error-prone since interblock motion
+   from sched-ebb could move one of the pair of insns to a different block.
+
+   Also cannot allow call insns to be duplicated.  If they throw exceptions,
+   then they'll be in a different block from their ldgp.  Which could lead
+   the bb reorder code to think that it would be ok to copy just the block
+   containing the call and branch to the block containing the ldgp.  */
+
+static bool
+sw_64_cannot_copy_insn_p (rtx_insn *insn)
+{
+  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
+    return false;
+  if (recog_memoized (insn) >= 0)
+    return get_attr_cannot_copy (insn);
+  else
+    return false;
+}
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and return the new rtx.  */
+
+rtx
+sw_64_legitimize_reload_address (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
+				 int opnum, int type,
+				 int ind_levels ATTRIBUTE_UNUSED)
+{
+  /* We must recognize output that we have already generated ourselves.  */
+  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
+      && REG_P (XEXP (XEXP (x, 0), 0)) && CONST_INT_P (XEXP (XEXP (x, 0), 1))
+      && CONST_INT_P (XEXP (x, 1)))
+    {
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, BASE_REG_CLASS,
+		   GET_MODE (x), VOIDmode, 0, 0, opnum,
+		   (enum reload_type) type);
+      return x;
+    }
+
+  /* We wish to handle large displacements off a base register by
+     splitting the addend across an ldih and the mem insn.  This
+     cuts number of extra insns needed from 3 to 1.  */
+  if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
+      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
+      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) && CONST_INT_P (XEXP (x, 1)))
+    {
+      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT high
+	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+      /* Check for 32-bit overflow.  */
+      if (high + low != val)
+	return NULL_RTX;
+
+      /* Reload the high part into a base reg; leave the low part
+	 in the mem directly.  */
+      x = gen_rtx_PLUS (GET_MODE (x),
+			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
+				      GEN_INT (high)),
+			GEN_INT (low));
+
+      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, BASE_REG_CLASS,
+		   GET_MODE (x), VOIDmode, 0, 0, opnum,
+		   (enum reload_type) type);
+      return x;
+    }
+
+  return NULL_RTX;
+}
+
+/* Return the cost of moving between registers of various classes.  Moving
+   between FLOAT_REGS and anything else except float regs is expensive.
+   In fact, we make it quite expensive because we really don't want to
+   do these moves unless it is clearly worth it.  Optimizations may
+   reduce the impact of not being able to allocate a pseudo to a
+   hard register.  */
+
+static int
+sw_64_register_move_cost (machine_mode mode, reg_class_t from_i,
+			  reg_class_t to_i)
+{
+  enum reg_class from = (enum reg_class) from_i;
+  enum reg_class to = (enum reg_class) to_i;
+  if (!flag_sw_rtx_cost)
+    {
+      if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
+	return 2;
+      if (TARGET_FIX)
+	return (from == FLOAT_REGS) ? 6 : 8;
+      return 4 + 2 * sw_64_memory_latency;
+    }
+  if (from == R0_REG || from == R24_REG || from == R25_REG || from == R27_REG)
+    from = GENERAL_REGS;
+  if (to == R0_REG || to == R24_REG || to == R25_REG || to == R27_REG)
+    to = GENERAL_REGS;
+  if (GET_MODE_SIZE (mode) == 32)
+    {
+      if (from == GENERAL_REGS && to == GENERAL_REGS)
+	return 1;
+      else if (from == GENERAL_REGS)
+	return 16;
+      else if (to == GENERAL_REGS)
+	return 16;
+      if (!TARGET_SW_SIMD)
+	return 34;
+      return 2;
+    }
+  if (from == GENERAL_REGS && to == GENERAL_REGS)
+    return 1;
+  else if (from == GENERAL_REGS)
+    return 4;
+  else if (to == GENERAL_REGS)
+    return 4;
+  return 2;
+}
+
+/* Return the cost of moving data of MODE from a register to
+   or from memory.  On the Sw_64, bump this up a bit.  */
+
+static int
+sw_64_memory_move_cost (machine_mode /*mode.  */, reg_class_t /*regclass.  */,
+			bool /*in.  */)
+{
+  if (flag_sw_rtx_cost)
+    return sw_64_memory_latency;
+  return 2 * sw_64_memory_latency;
+}
+
+/* Compute a (partial) cost for rtx X.  Return true if the complete
+   cost has been computed, and false if subexpressions should be
+   scanned.  In either case, *TOTAL contains the cost result.  */
+
+static bool
+sw_64_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
+		 bool speed)
+{
+  int code = GET_CODE (x);
+  bool float_mode_p = FLOAT_MODE_P (mode);
+  const struct sw_64_rtx_cost_data *cost_data;
+
+  if (!speed)
+    cost_data = &sw_64_rtx_cost_size;
+  else if (flag_sw_rtx_cost)
+    cost_data = &sw_64_rtx_cost_data[2];
+  else
+    cost_data = &sw_64_rtx_cost_data[sw_64_tune];
+
+  switch (code)
+    {
+    case CONST_INT:
+      /* If this is an 8-bit constant, return zero since it can be used
+	 nearly anywhere with no cost.  If it is a valid operand for an
+	 ADD or AND, likewise return 0 if we know it will be used in that
+	 context.  Otherwise, return 2 since it might be used there later.
+	 All other constants take at least two insns.  */
+      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
+	{
+	  *total = 0;
+	  return true;
+	}
+      /* FALLTHRU */
+
+    case CONST_DOUBLE:
+    case CONST_WIDE_INT:
+      if (x == CONST0_RTX (mode))
+	*total = 0;
+      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
+	       || (outer_code == AND && and_operand (x, VOIDmode)))
+	*total = 0;
+      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
+	*total = 2;
+      else
+	*total = COSTS_N_INSNS (2);
+      return true;
+
+    case CONST:
+    case SYMBOL_REF:
+    case LABEL_REF:
+      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
+	*total = COSTS_N_INSNS (outer_code != MEM);
+      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
+	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
+      else if (tls_symbolic_operand_type (x))
+	/* ??? How many insns do we emit here?  More than one...  */
+	*total = COSTS_N_INSNS (15);
+      else
+	/* Otherwise we do a load from the GOT.  */
+	*total = COSTS_N_INSNS (!speed ? 1 : sw_64_memory_latency);
+      return true;
+
+    case HIGH:
+      /* This is effectively an add_operand.  */
+      *total = 2;
+      return true;
+
+    case PLUS:
+    case MINUS:
+      if (float_mode_p)
+	*total = cost_data->fp_add;
+      else if ((GET_CODE (XEXP (x, 0)) == ASHIFT)
+	       || (GET_CODE (XEXP (x, 0)) == MULT)
+		    && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
+	{
+	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+			      (enum rtx_code) outer_code, opno, speed)
+		    + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) outer_code,
+				opno, speed)
+		    + COSTS_N_INSNS (1));
+	  return true;
+	}
+      return false;
+
+    case MULT:
+      if (float_mode_p)
+	*total = cost_data->fp_mult;
+      else if (mode == DImode)
+	*total = cost_data->int_mult_di;
+      else
+	*total = cost_data->int_mult_si;
+      return false;
+
+    case ASHIFT:
+      if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) <= 3)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ASHIFTRT:
+    case LSHIFTRT:
+      *total = cost_data->int_shift;
+      return false;
+
+    case IF_THEN_ELSE:
+      if (float_mode_p)
+	*total = cost_data->fp_add;
+      else
+	*total = cost_data->int_cmov;
+      if (flag_sw_rtx_cost && float_mode_p)
+	*total = COSTS_N_INSNS (2);
+      return false;
+
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      if (!float_mode_p)
+	*total = cost_data->int_div;
+      else if (mode == SFmode)
+	*total = cost_data->fp_div_sf;
+      else
+	*total = cost_data->fp_div_df;
+      return false;
+
+    case MEM:
+      *total = COSTS_N_INSNS (!speed ? 1 : sw_64_memory_latency);
+      return true;
+
+    case NEG:
+      if (!float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case ABS:
+      if (!float_mode_p)
+	{
+	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
+	  return false;
+	}
+      if (flag_sw_rtx_cost)
+	{
+	  *total = COSTS_N_INSNS (2);
+	  return false;
+	}
+      /* FALLTHRU */
+
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+    case FIX:
+    case UNSIGNED_FIX:
+      if (flag_sw_rtx_cost)
+	{
+	  *total = COSTS_N_INSNS (4);
+	  return false;
+	}
+    case FLOAT_TRUNCATE:
+      *total = cost_data->fp_add;
+      return false;
+
+    case FLOAT_EXTEND:
+      if (MEM_P (XEXP (x, 0)))
+	*total = 0;
+      else
+	*total = cost_data->fp_add;
+      return false;
+
+    default:
+      return false;
+    }
+}
+
+/* REF is an alignable memory location.  Place an aligned SImode
+   reference into *PALIGNED_MEM and the number of bits to shift into
+   *PBITNUM.  SCRATCH is a free register for use in reloading out
+   of range stack slots.  */
+
+void
+get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
+{
+  rtx base;
+  HOST_WIDE_INT disp, offset;
+
+  gcc_assert (MEM_P (ref));
+
+  if (reload_in_progress)
+    {
+      base = find_replacement (&XEXP (ref, 0));
+      gcc_assert (memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    base = XEXP (ref, 0);
+
+  if (GET_CODE (base) == PLUS)
+    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
+  else
+    disp = 0;
+
+  /* Find the byte offset within an aligned word.  If the memory itself is
+     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
+     will have examined the base register and determined it is aligned, and
+     thus displacements from it are naturally alignable.  */
+  if (MEM_ALIGN (ref) >= 32)
+    offset = 0;
+  else
+    offset = disp & 3;
+
+  /* The location should not cross aligned word boundary.  */
+  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
+	      <= GET_MODE_SIZE (SImode));
+
+  /* Access the entire aligned word.  */
+  *paligned_mem = widen_memory_access (ref, SImode, -offset);
+
+  /* Convert the byte offset within the word to a bit offset.  */
+  offset *= BITS_PER_UNIT;
+  *pbitnum = GEN_INT (offset);
+}
+
+/* Similar, but just get the address.  Handle the two reload cases.
+   Add EXTRA_OFFSET to the address we return.  */
+
+rtx
+get_unaligned_address (rtx ref)
+{
+  rtx base;
+  HOST_WIDE_INT offset = 0;
+
+  gcc_assert (MEM_P (ref));
+
+  if (reload_in_progress)
+    {
+      base = find_replacement (&XEXP (ref, 0));
+      gcc_assert (memory_address_p (GET_MODE (ref), base));
+    }
+  else
+    base = XEXP (ref, 0);
+
+  if (GET_CODE (base) == PLUS)
+    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
+
+  return plus_constant (Pmode, base, offset);
+}
+
+/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
+   X is always returned in a register.  */
+
+rtx
+get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
+{
+  if (GET_CODE (addr) == PLUS)
+    {
+      ofs += INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), NULL_RTX, 1,
+			      OPTAB_LIB_WIDEN);
+}
+
+/* On the Sw_64, all (non-symbolic) constants except zero go into
+   a floating-point register via memory.  Note that we cannot
+   return anything that is not a subset of RCLASS, and that some
+   symbolic constants cannot be dropped to memory.  */
+
+enum reg_class
+sw_64_preferred_reload_class (rtx x, enum reg_class rclass)
+{
+  /* Zero is present in any register class.  */
+  if (x == CONST0_RTX (GET_MODE (x)))
+    return rclass;
+
+  /* These sorts of constants we can easily drop to memory.  */
+  if (CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x)
+      || GET_CODE (x) == CONST_VECTOR)
+    {
+      if (rclass == FLOAT_REGS)
+	return NO_REGS;
+      if (rclass == ALL_REGS)
+	return GENERAL_REGS;
+      return rclass;
+    }
+
+  /* All other kinds of constants should not (and in the case of HIGH
+     cannot) be dropped to memory -- instead we use a GENERAL_REGS
+     secondary reload.  */
+  if (CONSTANT_P (x))
+    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
+
+  return rclass;
+}
+
+/* Inform reload about cases where moving X with a mode MODE to a register in
+   RCLASS requires an extra scratch or immediate register.  Return the class
+   needed for the immediate register.  */
+
+static reg_class_t
+sw_64_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
+			machine_mode mode, secondary_reload_info *sri)
+{
+  enum reg_class rclass = (enum reg_class) rclass_i;
+
+  /* Loading and storing HImode or QImode values to and from memory
+     usually requires a scratch register.  */
+  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
+    {
+      if (any_memory_operand (x, mode))
+	{
+	  if (in_p)
+	    {
+	      if (!aligned_memory_operand (x, mode))
+		sri->icode = direct_optab_handler (reload_in_optab, mode);
+	    }
+	  else
+	    sri->icode = direct_optab_handler (reload_out_optab, mode);
+	  return NO_REGS;
+	}
+    }
+
+  /* We also cannot do integral arithmetic into FP regs, as might result
+     from register elimination into a DImode fp register.  */
+  if (rclass == FLOAT_REGS)
+    {
+      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
+	return GENERAL_REGS;
+      if (in_p && INTEGRAL_MODE_P (mode) && !MEM_P (x) && !REG_P (x)
+	  && !CONST_INT_P (x))
+	return GENERAL_REGS;
+    }
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
+
+   If we are copying between general and FP registers, we need a memory
+   location unless the FIX extension is available.  */
+
+static bool
+sw_64_secondary_memory_needed (machine_mode, reg_class_t class1,
+			       reg_class_t class2)
+{
+  return (!TARGET_FIX
+	  && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
+	      || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
+}
+
+/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.  If MODE is
+   floating-point, use it.  Otherwise, widen to a word like the default.
+   This is needed because we always store integers in FP registers in
+   quadword format.  This whole area is very tricky!  */
+
+static machine_mode
+sw_64_secondary_memory_needed_mode (machine_mode mode)
+{
+  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
+    return mode;
+  if (GET_MODE_SIZE (mode) >= 4)
+    return mode;
+  return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
+}
+
+/* Given SEQ, which is an INSN list, look for any MEMs in either
+   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
+   volatile flags from REF into each of the MEMs found.  If REF is not
+   a MEM, don't do anything.  */
+
+void
+sw_64_set_memflags (rtx seq, rtx ref)
+{
+  rtx_insn *insn;
+
+  if (!MEM_P (ref))
+    return;
+
+  /* This is only called from sw_64.md, after having had something
+     generated from one of the insn patterns.  So if everything is
+     zero, the pattern is already up-to-date.  */
+  if (!MEM_VOLATILE_P (ref) && !MEM_NOTRAP_P (ref) && !MEM_READONLY_P (ref))
+    return;
+
+  subrtx_var_iterator::array_type array;
+  for (insn = as_a<rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
+    if (INSN_P (insn))
+      FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
+	{
+	  rtx x = *iter;
+	  if (MEM_P (x))
+	    {
+	      MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
+	      MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
+	      MEM_READONLY_P (x) = MEM_READONLY_P (ref);
+	      /* Sadly, we cannot use alias sets because the extra
+		 aliasing produced by the AND interferes.  Given that
+		 two-byte quantities are the only thing we would be
+		 able to differentiate anyway, there does not seem to
+		 be any point in convoluting the early out of the
+		 alias check.  */
+	      iter.skip_subrtxes ();
+	    }
+	}
+    else
+      gcc_unreachable ();
+}
+
+static rtx
+sw_64_emit_set_const (rtx, machine_mode, HOST_WIDE_INT, int, bool);
+
+/* Internal routine for sw_64_emit_set_const to check for N or below insns.
+   If NO_OUTPUT is true, then we only check to see if N insns are possible,
+   and return pc_rtx if successful.  */
+
+static rtx
+sw_64_emit_set_const_1 (rtx target, machine_mode mode, HOST_WIDE_INT c, int n,
+			bool no_output)
+{
+  HOST_WIDE_INT new_const;
+  int i, bits;
+  /* Use a pseudo if highly optimizing and still generating RTL.  */
+  rtx subtarget
+    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
+  rtx temp, insn;
+
+  /* If this is a sign-extended 32-bit constant, we can do this in at most
+     three insns, so do it if we have enough insns left.  */
+
+  if (c >> 31 == -1 || c >> 31 == 0)
+    {
+      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT tmp1 = c - low;
+      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
+      HOST_WIDE_INT extra = 0;
+
+      /* If HIGH will be interpreted as negative but the constant is
+	 positive, we must adjust it to do two ldha insns.  */
+
+      if ((high & 0x8000) != 0 && c >= 0)
+	{
+	  extra = 0x4000;
+	  tmp1 -= 0x40000000;
+	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
+	}
+
+      if (c == low || (low == 0 && extra == 0))
+	{
+	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
+	     but that meant that we can't handle INT_MIN on 32-bit machines
+	     (like NT/Sw_64), because we recurse indefinitely through
+	     emit_move_insn to gen_movdi.  So instead, since we know exactly
+	     what we want, create it explicitly.  */
+
+	  if (no_output)
+	    return pc_rtx;
+	  if (target == NULL)
+	    target = gen_reg_rtx (mode);
+	  emit_insn (gen_rtx_SET (target, GEN_INT (c)));
+	  return target;
+	}
+      else if (n >= 2 + (extra != 0))
+	{
+	  if (no_output)
+	    return pc_rtx;
+	  if (!can_create_pseudo_p ())
+	    {
+	      emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
+	      temp = target;
+	    }
+	  else
+	    temp
+	      = copy_to_suggested_reg (GEN_INT (high << 16), subtarget, mode);
+
+	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
+	     This means that if we go through expand_binop, we'll try to
+	     generate extensions, etc, which will require new pseudos, which
+	     will fail during some split phases.  The SImode add patterns
+	     still exist, but are not named.  So build the insns by hand.  */
+
+	  if (extra != 0)
+	    {
+	      if (!subtarget)
+		subtarget = gen_reg_rtx (mode);
+	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
+	      insn = gen_rtx_SET (subtarget, insn);
+	      emit_insn (insn);
+	      temp = subtarget;
+	    }
+
+	  if (target == NULL)
+	    target = gen_reg_rtx (mode);
+	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
+	  insn = gen_rtx_SET (target, insn);
+	  emit_insn (insn);
+	  return target;
+	}
+    }
+
+  /* If we couldn't do it that way, try some other methods.  But if we have
+     no instructions left, don't bother.  Likewise, if this is SImode and
+     we can't make pseudos, we can't do anything since the expand_binop
+     and expand_unop calls will widen and try to make pseudos.  */
+
+  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
+    return 0;
+
+  /* Next, see if we can load a related constant and then shift and possibly
+     negate it to get the constant we want.  Try this once each increasing
+     numbers of insns.  */
+
+  for (i = 1; i < n; i++)
+    {
+      /* First, see if minus some low bits, we've an easy load of
+	 high bits.  */
+
+      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
+      if (new_const != 0)
+	{
+	  temp = sw_64_emit_set_const (subtarget, mode, c - new_const, i,
+				       no_output);
+	  if (temp)
+	    {
+	      if (no_output)
+		return temp;
+	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
+				   target, 0, OPTAB_WIDEN);
+	    }
+	}
+
+      /* Next try complementing.  */
+      temp = sw_64_emit_set_const (subtarget, mode, ~c, i, no_output);
+      if (temp)
+	{
+	  if (no_output)
+	    return temp;
+	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
+	}
+
+      /* Next try to form a constant and do a left shift.  We can do this
+	 if some low-order bits are zero; the exact_log2 call below tells
+	 us that information.  The bits we are shifting out could be any
+	 value, but here we'll just try the 0- and sign-extended forms of
+	 the constant.  To try to increase the chance of having the same
+	 constant in more than one insn, start at the highest number of
+	 bits to shift, but try all possibilities in case a ZAPNOT will
+	 be useful.  */
+
+      bits = exact_log2 (c & -c);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c >> bits;
+	    temp
+	      = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp && c < 0)
+	      {
+		new_const = (unsigned HOST_WIDE_INT) c >> bits;
+		temp = sw_64_emit_set_const (subtarget, mode, new_const, i,
+					     no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
+				     target, 0, OPTAB_WIDEN);
+	      }
+	  }
+
+      /* Now try high-order zero bits.  Here we try the shifted-in bits as
+	 all zero and all ones.  Be careful to avoid shifting outside the
+	 mode and to avoid shifting outside the host wide int size.  */
+
+      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
+	      - floor_log2 (c) - 1);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c << bits;
+	    temp
+	      = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp)
+	      {
+		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
+		temp = sw_64_emit_set_const (subtarget, mode, new_const, i,
+					     no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
+				     target, 1, OPTAB_WIDEN);
+	      }
+	  }
+
+      /* Now try high-order 1 bits.  We get that with a sign-extension.
+	 But one bit isn't enough here.  Be careful to avoid shifting outside
+	 the mode and to avoid shifting outside the host wide int size.  */
+
+      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
+	      - floor_log2 (~c) - 2);
+      if (bits > 0)
+	for (; bits > 0; bits--)
+	  {
+	    new_const = c << bits;
+	    temp
+	      = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output);
+	    if (!temp)
+	      {
+		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
+		temp = sw_64_emit_set_const (subtarget, mode, new_const, i,
+					     no_output);
+	      }
+	    if (temp)
+	      {
+		if (no_output)
+		  return temp;
+		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
+				     target, 0, OPTAB_WIDEN);
+	      }
+	  }
+    }
+
+  /* Finally, see if can load a value into the target that is the same as the
+     constant except that all bytes that are 0 are changed to be 0xff.  If we
+     can, then we can do a ZAPNOT to obtain the desired constant.  */
+
+  new_const = c;
+  for (i = 0; i < 64; i += 8)
+    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
+      new_const |= (HOST_WIDE_INT) 0xff << i;
+
+  /* We are only called for SImode and DImode.  If this is SImode, ensure that
+     we are sign extended to a full word.  */
+
+  if (mode == SImode)
+    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
+
+  if (new_const != c)
+    {
+      temp
+	= sw_64_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
+      if (temp)
+	{
+	  if (no_output)
+	    return temp;
+	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~new_const),
+			       target, 0, OPTAB_WIDEN);
+	}
+    }
+
+  return 0;
+}
+
+/* Try to output insns to set TARGET equal to the constant C if it can be
+   done in less than N insns.  Do all computations in MODE.  Returns the place
+   where the output has been placed if it can be done and the insns have been
+   emitted.  If it would take more than N insns, zero is returned and no
+   insns and emitted.  */
+
+static rtx
+sw_64_emit_set_const (rtx target, machine_mode mode, HOST_WIDE_INT c, int n,
+		      bool no_output)
+{
+  machine_mode orig_mode = mode;
+  rtx orig_target = target;
+  rtx result = 0;
+  int i;
+
+  /* If we can't make any pseudos, TARGET is an SImode hard register, we
+     can't load this constant in one insn, do this in DImode.  */
+  if (!can_create_pseudo_p () && mode == SImode && REG_P (target)
+      && REGNO (target) < FIRST_PSEUDO_REGISTER)
+    {
+      result = sw_64_emit_set_const_1 (target, mode, c, 1, no_output);
+      if (result)
+	return result;
+
+      target = no_output ? NULL : gen_lowpart (DImode, target);
+      mode = DImode;
+    }
+  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
+    {
+      target = no_output ? NULL : gen_lowpart (DImode, target);
+      mode = DImode;
+    }
+
+  /* Try 1 insn, then 2, then up to N.  */
+  for (i = 1; i <= n; i++)
+    {
+      result = sw_64_emit_set_const_1 (target, mode, c, i, no_output);
+      if (result)
+	{
+	  rtx_insn *insn;
+	  rtx set;
+
+	  if (no_output)
+	    return result;
+
+	  insn = get_last_insn ();
+	  set = single_set (insn);
+	  if (!CONSTANT_P (SET_SRC (set)))
+	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
+	  break;
+	}
+    }
+
+  /* Allow for the case where we changed the mode of TARGET.  */
+  if (result)
+    {
+      if (result == target)
+	result = orig_target;
+      else if (mode != orig_mode)
+	result = gen_lowpart (orig_mode, result);
+    }
+
+  return result;
+}
+
+/* Having failed to find a 3 insn sequence in sw_64_emit_set_const,
+   fall back to a straight forward decomposition.  We do this to avoid
+   exponential run times encountered when looking for longer sequences
+   with sw_64_emit_set_const.  */
+
+static rtx
+sw_64_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
+{
+  HOST_WIDE_INT d1, d2, d3, d4;
+
+  /* Decompose the entire word.  */
+
+  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d1;
+  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  c1 = (c1 - d2) >> 32;
+  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
+  c1 -= d3;
+  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  gcc_assert (c1 == d4);
+
+  /* Construct the high word.  */
+  if (d4)
+    {
+      emit_move_insn (target, GEN_INT (d4));
+      if (d3)
+	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
+    }
+  else
+    emit_move_insn (target, GEN_INT (d3));
+
+  /* Shift it into place.  */
+  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
+
+  /* Add in the low bits.  */
+  if (d2)
+    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
+  if (d1)
+    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
+
+  return target;
+}
+
+/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits.  */
+
+static HOST_WIDE_INT
+sw_64_extract_integer (rtx x)
+{
+  if (GET_CODE (x) == CONST_VECTOR)
+    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
+
+  gcc_assert (CONST_INT_P (x));
+
+  return INTVAL (x);
+}
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for which
+   we are willing to load the value into a register via a move pattern.
+   Normally this is all symbolic constants, integral constants that
+   take three or fewer instructions, and floating-point zero.  */
+
+bool
+sw_64_legitimate_constant_p (machine_mode mode, rtx x)
+{
+  HOST_WIDE_INT i0;
+
+  switch (GET_CODE (x))
+    {
+    case LABEL_REF:
+    case HIGH:
+      return true;
+
+    case CONST:
+      if (GET_CODE (XEXP (x, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+	x = XEXP (XEXP (x, 0), 0);
+      else
+	return true;
+
+      if (GET_CODE (x) != SYMBOL_REF)
+	return true;
+      /* FALLTHRU  */
+
+    case SYMBOL_REF:
+      /* TLS symbols are never valid.  */
+      return SYMBOL_REF_TLS_MODEL (x) == 0;
+
+    case CONST_WIDE_INT:
+      if (TARGET_BUILD_CONSTANTS)
+	return true;
+      if (x == CONST0_RTX (mode))
+	return true;
+      mode = DImode;
+      gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
+      i0 = CONST_WIDE_INT_ELT (x, 1);
+      if (sw_64_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
+	return false;
+      i0 = CONST_WIDE_INT_ELT (x, 0);
+      goto do_integer;
+
+    case CONST_DOUBLE:
+      if (x == CONST0_RTX (mode))
+	return true;
+      return false;
+
+    case CONST_VECTOR:
+      if (x == CONST0_RTX (mode))
+	return true;
+      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+	return false;
+      if (GET_MODE_SIZE (mode) != 8)
+	return false;
+      /* FALLTHRU  */
+
+    case CONST_INT:
+      if (TARGET_BUILD_CONSTANTS)
+	return true;
+      i0 = sw_64_extract_integer (x);
+    do_integer:
+      return sw_64_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
+
+    default:
+      return false;
+    }
+}
+
+/* Operand 1 is known to be a constant, and should require more than one
+   instruction to load.  Emit that multi-part load.  */
+
+bool
+sw_64_split_const_mov (machine_mode mode, rtx *operands)
+{
+  HOST_WIDE_INT i0;
+  rtx temp = NULL_RTX;
+
+  i0 = sw_64_extract_integer (operands[1]);
+
+  temp = sw_64_emit_set_const (operands[0], mode, i0, 3, false);
+
+  if (!temp && TARGET_BUILD_CONSTANTS)
+    temp = sw_64_emit_set_long_const (operands[0], i0);
+
+  if (temp)
+    {
+      if (!rtx_equal_p (operands[0], temp))
+	emit_move_insn (operands[0], temp);
+      return true;
+    }
+
+  return false;
+}
+
+/* Expand a move instruction; return true if all work is done.
+   We don't handle non-bwx subword loads here.  */
+
+bool
+sw_64_expand_mov (machine_mode mode, rtx *operands)
+{
+  rtx tmp;
+
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0]) && !reg_or_0_operand (operands[1], mode))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Allow legitimize_address to perform some simplifications.  */
+  if (mode == Pmode && symbolic_operand (operands[1], mode))
+    {
+      tmp = sw_64_legitimize_address_1 (operands[1], operands[0], mode);
+      if (tmp)
+	{
+	  if (tmp == operands[0])
+	    return true;
+	  operands[1] = tmp;
+	  return false;
+	}
+    }
+
+  /* Early out for non-constants and valid constants.  */
+  if (!CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
+    return false;
+
+  /* Split large integers.  */
+  if (CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_VECTOR)
+    {
+      if (sw_64_split_const_mov (mode, operands))
+	return true;
+    }
+
+  /* Otherwise we've nothing left but to drop the thing to memory.  */
+  tmp = force_const_mem (mode, operands[1]);
+
+  if (tmp == NULL_RTX)
+    return false;
+
+  if (reload_in_progress)
+    {
+      emit_move_insn (operands[0], XEXP (tmp, 0));
+      operands[1] = replace_equiv_address (tmp, operands[0]);
+    }
+  else
+    operands[1] = validize_mem (tmp);
+  return false;
+}
+
+/* Expand a non-bwx QImode or HImode move instruction;
+   return true if all work is done.  */
+
+bool
+sw_64_expand_mov_nobwx (machine_mode mode, rtx *operands)
+{
+  rtx seq;
+
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* Handle four memory cases, unaligned and aligned for either the input
+     or the output.  The only case where we can be called during reload is
+     for aligned loads; all other cases require temporaries.  */
+
+  if (any_memory_operand (operands[1], mode))
+    {
+      if (aligned_memory_operand (operands[1], mode))
+	{
+	  if (reload_in_progress)
+	    {
+	      seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
+	      emit_insn (seq);
+	    }
+	  else
+	    {
+	      rtx aligned_mem, bitnum;
+	      rtx scratch = gen_reg_rtx (SImode);
+	      rtx subtarget;
+	      bool copyout;
+
+	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+
+	      subtarget = operands[0];
+	      if (REG_P (subtarget))
+		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
+	      else
+		subtarget = gen_reg_rtx (DImode), copyout = true;
+
+	      if (mode == QImode)
+		seq = gen_aligned_loadqi (subtarget, aligned_mem, bitnum,
+					  scratch);
+	      else
+		seq = gen_aligned_loadhi (subtarget, aligned_mem, bitnum,
+					  scratch);
+	      emit_insn (seq);
+
+	      if (copyout)
+		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
+	    }
+	}
+      else
+	{
+	  /* Don't pass these as parameters since that makes the generated
+	     code depend on parameter evaluation order which will cause
+	     bootstrap failures.  */
+
+	  rtx temp1, temp2, subtarget, ua;
+	  bool copyout;
+
+	  temp1 = gen_reg_rtx (DImode);
+	  temp2 = gen_reg_rtx (DImode);
+
+	  subtarget = operands[0];
+	  if (REG_P (subtarget))
+	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
+	  else
+	    subtarget = gen_reg_rtx (DImode), copyout = true;
+
+	  ua = get_unaligned_address (operands[1]);
+	  if (mode == QImode)
+	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
+	  else
+	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
+
+	  sw_64_set_memflags (seq, operands[1]);
+	  emit_insn (seq);
+
+	  if (copyout)
+	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
+	}
+      return true;
+    }
+
+  if (any_memory_operand (operands[0], mode))
+    {
+      if (aligned_memory_operand (operands[0], mode))
+	{
+	  rtx aligned_mem, bitnum;
+	  rtx temp1 = gen_reg_rtx (SImode);
+	  rtx temp2 = gen_reg_rtx (SImode);
+
+	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+
+	  emit_insn (
+	    gen_aligned_store (aligned_mem, operands[1], bitnum, temp1, temp2));
+	}
+      else
+	{
+	  rtx temp1 = gen_reg_rtx (DImode);
+	  rtx temp2 = gen_reg_rtx (DImode);
+	  rtx temp3 = gen_reg_rtx (DImode);
+	  rtx ua = get_unaligned_address (operands[0]);
+
+	  seq
+	    = gen_unaligned_store (mode, ua, operands[1], temp1, temp2, temp3);
+
+	  sw_64_set_memflags (seq, operands[0]);
+	  emit_insn (seq);
+	}
+      return true;
+    }
+
+  return false;
+}
+
+/* Implement the movmisalign patterns.  One of the operands is a memory
+   that is not naturally aligned.  Emit instructions to load it.  */
+
+void
+sw_64_expand_movmisalign (machine_mode mode, rtx *operands)
+{
+  /* Honor misaligned loads, for those we promised to do so.  */
+  if (MEM_P (operands[1]))
+    {
+      rtx tmp;
+
+      if (register_operand (operands[0], mode))
+	tmp = operands[0];
+      else
+	tmp = gen_reg_rtx (mode);
+
+      sw_64_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
+      if (tmp != operands[0])
+	emit_move_insn (operands[0], tmp);
+    }
+  else if (MEM_P (operands[0]))
+    {
+      if (!reg_or_0_operand (operands[1], mode))
+	operands[1] = force_reg (mode, operands[1]);
+      sw_64_expand_unaligned_store (operands[0], operands[1], 8, 0);
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Generate an unsigned DImode to FP conversion.  This is the same code
+   optabs would emit if we didn't have TFmode patterns.
+
+   For SFmode, this is the only construction I've found that can pass
+   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
+   intermediates will work, because you'll get intermediate rounding
+   that ruins the end result.  Some of this could be fixed by turning
+   on round-to-positive-infinity, but that requires diddling the fpsr,
+   which kills performance.  I tried turning this around and converting
+   to a negative number, so that I could turn on /m, but either I did
+   it wrong or there's something else cause I wound up with the exact
+   same single-bit error.  There is a branch-less form of this same code:
+
+	srl     $16,1,$1
+	and     $16,1,$2
+	cmplt   $16,0,$3
+	or      $1,$2,$2
+	selge   $16,$16,$2
+	ifmovd	$3,$f10
+	ifmovd	$2,$f11
+	fcvtlf  $f11,$f11
+	fadds   $f11,$f11,$f0
+	fseleq $f10,$f11,$f0
+
+   I'm not using it because it's the same number of instructions as
+   this branch-full form, and it has more serialized long latency
+   instructions on the critical path.
+
+   For DFmode, we can avoid rounding errors by breaking up the word
+   into two pieces, converting them separately, and adding them back:
+
+   LC0: .long 0,0x5f800000
+
+	ifmovd	$16,$f11
+	ldi	$2,LC0
+	cmplt	$16,0,$1
+	fcpyse	$f11,$f31,$f10
+	fcpyse	$f31,$f11,$f11
+	s4addw	$1,$2,$1
+	lds	$f12,0($1)
+	fcvtls	$f10,$f10
+	fcvtls	$f11,$f11
+	faddd	$f12,$f10,$f0
+	faddd	$f0,$f11,$f0
+
+   This doesn't seem to be a clear-cut win over the optabs form.
+   It probably all depends on the distribution of numbers being
+   converted -- in the optabs form, all but high-bit-set has a
+   much lower minimum execution time.  */
+
+void
+sw_64_emit_floatuns (rtx operands[2])
+{
+  rtx neglab, donelab, i0, i1, f0, in, out;
+  machine_mode mode;
+
+  out = operands[0];
+  in = force_reg (DImode, operands[1]);
+  mode = GET_MODE (out);
+  neglab = gen_label_rtx ();
+  donelab = gen_label_rtx ();
+  i0 = gen_reg_rtx (DImode);
+  i1 = gen_reg_rtx (DImode);
+  f0 = gen_reg_rtx (mode);
+
+  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
+
+  emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
+  emit_jump_insn (gen_jump (donelab));
+  emit_barrier ();
+
+  emit_label (neglab);
+
+  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
+  emit_insn (gen_anddi3 (i1, in, const1_rtx));
+  emit_insn (gen_iordi3 (i0, i0, i1));
+  emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
+  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
+
+  emit_label (donelab);
+}
+
+/* Generate the comparison for a conditional branch.  */
+
+void
+sw_64_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
+{
+  enum rtx_code cmp_code, branch_code;
+  machine_mode branch_mode = VOIDmode;
+  enum rtx_code code = GET_CODE (operands[0]);
+  rtx op0 = operands[1], op1 = operands[2];
+  rtx tem;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = sw_64_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  /* The general case: fold the comparison code to the types of compares
+     that we have, choosing the branch as necessary.  */
+  switch (code)
+    {
+    case EQ:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+    case UNORDERED:
+      /* We have these compares.  */
+      cmp_code = code, branch_code = NE;
+      break;
+
+    case NE:
+    case ORDERED:
+      /* These must be reversed.  */
+      cmp_code = reverse_condition (code), branch_code = EQ;
+      break;
+
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* For FP, we swap them, for INT, we reverse them.  */
+      if (cmp_mode == DFmode || (cmp_mode == SFmode && flag_sw_sf_cmpsel))
+	{
+	  cmp_code = swap_condition (code);
+	  branch_code = NE;
+	  std::swap (op0, op1);
+	}
+      else
+	{
+	  cmp_code = reverse_condition (code);
+	  branch_code = EQ;
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DFmode)
+    {
+      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
+	{
+	  /* When we are not as concerned about non-finite values, and we
+	     are comparing against zero, we can branch directly.  */
+	  if (op1 == CONST0_RTX (DFmode))
+	    cmp_code = UNKNOWN, branch_code = code;
+	  else if (op0 == CONST0_RTX (DFmode))
+	    {
+	      /* Undo the swap we probably did just above.  */
+	      std::swap (op0, op1);
+	      branch_code = swap_condition (cmp_code);
+	      cmp_code = UNKNOWN;
+	    }
+	}
+      else
+	{
+	  /* ??? We mark the branch mode to be CCmode to prevent the
+	     compare and branch from being combined, since the compare
+	     insn follows IEEE rules that the branch does not.  */
+	  branch_mode = CCmode;
+	}
+    }
+  else if (cmp_mode == SFmode && flag_sw_sf_cmpsel)
+    {
+      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
+	{
+	  /* When we are not as concerned about non-finite values, and we
+	     are comparing against zero, we can branch directly.  */
+	  if (op1 == CONST0_RTX (SFmode))
+	    cmp_code = UNKNOWN, branch_code = code;
+	  else if (op0 == CONST0_RTX (SFmode))
+	    {
+	      /* Undo the swap we probably did just above.  */
+	      std::swap (op0, op1);
+	      branch_code = swap_condition (cmp_code);
+	      cmp_code = UNKNOWN;
+	    }
+	}
+      else
+	{
+	  /* ??? We mark the branch mode to be CCmode to prevent the
+	     compare and branch from being combined, since the compare
+	     insn follows IEEE rules that the branch does not.  */
+	  branch_mode = CCmode;
+	}
+    }
+  else
+    {
+      /* The following optimizations are only for signed compares.  */
+      if (code != LEU && code != LTU && code != GEU && code != GTU)
+	{
+	  /* Whee.  Compare and branch against 0 directly.  */
+	  if (op1 == const0_rtx)
+	    cmp_code = UNKNOWN, branch_code = code;
+
+	  /* If the constants doesn't fit into an immediate, but can
+	     be generated by ldi/ldih, we adjust the argument and
+	     compare against zero, so we can use beq/bne directly.  */
+	  /* ??? Don't do this when comparing against symbols, otherwise
+	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
+	     be declared false out of hand (at least for non-weak).  */
+	  else if (CONST_INT_P (op1) && (code == EQ || code == NE)
+		   && !(symbolic_operand (op0, VOIDmode)
+			|| (REG_P (op0) && REG_POINTER (op0))))
+	    {
+	      rtx n_op1 = GEN_INT (-INTVAL (op1));
+
+	      if (!satisfies_constraint_I (op1)
+		  && (satisfies_constraint_K (n_op1)
+		      || satisfies_constraint_L (n_op1)))
+		cmp_code = PLUS, branch_code = code, op1 = n_op1;
+	    }
+	}
+
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* Emit an initial compare instruction, if necessary.  */
+  tem = op0;
+  if (cmp_code != UNKNOWN)
+    {
+      tem = gen_reg_rtx (cmp_mode);
+      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
+    }
+
+  /* Emit the branch instruction.  */
+  tem = gen_rtx_SET (
+    pc_rtx,
+    gen_rtx_IF_THEN_ELSE (VOIDmode,
+			  gen_rtx_fmt_ee (branch_code, branch_mode, tem,
+					  CONST0_RTX (cmp_mode)),
+			  gen_rtx_LABEL_REF (VOIDmode, operands[3]), pc_rtx));
+  emit_jump_insn (tem);
+}
+
+/* Certain simplifications can be done to make invalid setcc operations
+   valid.  Return the final comparison, or NULL if we can't work.  */
+
+bool
+sw_64_emit_setcc (rtx operands[], machine_mode cmp_mode)
+{
+  enum rtx_code cmp_code;
+  enum rtx_code code = GET_CODE (operands[1]);
+  rtx op0 = operands[2], op1 = operands[3];
+  rtx tmp;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = sw_64_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  if (cmp_mode == DFmode && !TARGET_FIX)
+    return 0;
+
+  /* The general case: fold the comparison code to the types of compares
+     that we have, choosing the branch as necessary.  */
+
+  cmp_code = UNKNOWN;
+  switch (code)
+    {
+    case EQ:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+    case UNORDERED:
+      /* We have these compares.  */
+      if (cmp_mode == DFmode)
+	cmp_code = code, code = NE;
+      break;
+
+    case NE:
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      /* FALLTHRU */
+
+    case ORDERED:
+      cmp_code = reverse_condition (code);
+      code = EQ;
+      break;
+
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      if (cmp_mode == DFmode)
+	cmp_code = code, code = NE;
+      std::swap (op0, op1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!register_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* Emit an initial compare instruction, if necessary.  */
+  if (cmp_code != UNKNOWN)
+    {
+      tmp = gen_reg_rtx (cmp_mode);
+      emit_insn (
+	gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
+
+      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
+      op1 = const0_rtx;
+    }
+
+  /* Emit the setcc instruction.  */
+  emit_insn (
+    gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode, op0, op1)));
+  return true;
+}
+
+/* Rewrite a comparison against zero CMP of the form
+   (CODE (cc0) (const_int 0)) so it can be written validly in
+   a conditional move (if_then_else CMP ...).
+   If both of the operands that set cc0 are nonzero we must emit
+   an insn to perform the compare (it can't be done within
+   the conditional move).  */
+
+rtx
+sw_64_emit_conditional_move (rtx cmp, machine_mode mode)
+{
+  enum rtx_code code = GET_CODE (cmp);
+  enum rtx_code cmov_code = NE;
+  rtx op0 = XEXP (cmp, 0);
+  rtx op1 = XEXP (cmp, 1);
+  machine_mode cmp_mode
+    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
+  machine_mode cmov_mode = VOIDmode;
+  int local_fast_math = flag_unsafe_math_optimizations;
+  rtx tem;
+
+  if (cmp_mode == TFmode)
+    {
+      op0 = sw_64_emit_xfloating_compare (&code, op0, op1);
+      op1 = const0_rtx;
+      cmp_mode = DImode;
+    }
+
+  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode || cmp_mode == SFmode);
+
+  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
+    {
+      enum rtx_code cmp_code;
+
+      if (!TARGET_FIX)
+	return 0;
+
+      /* If we have fp<->int register move instructions, do a cmov by
+	 performing the comparison in fp registers, and move the
+	 zero/nonzero value to integer registers, where we can then
+	 use a normal cmov, or vice-versa.  */
+
+      switch (code)
+	{
+	case EQ:
+	case LE:
+	case LT:
+	case LEU:
+	case LTU:
+	case UNORDERED:
+	  /* We have these compares.  */
+	  cmp_code = code, code = NE;
+	  break;
+
+	case NE:
+	case ORDERED:
+	  /* These must be reversed.  */
+	  cmp_code = reverse_condition (code), code = EQ;
+	  break;
+
+	case GE:
+	case GT:
+	case GEU:
+	case GTU:
+	  /* These normally need swapping, but for integer zero we have
+	     special patterns that recognize swapped operands.  */
+	  if (cmp_mode == DImode && op1 == const0_rtx)
+	    cmp_code = code, code = NE;
+	  else
+	    {
+	      cmp_code = swap_condition (code);
+	      code = NE;
+	      std::swap (op0, op1);
+	    }
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (cmp_mode == DImode)
+	{
+	  if (!reg_or_0_operand (op0, DImode))
+	    op0 = force_reg (DImode, op0);
+	  if (!reg_or_8bit_operand (op1, DImode))
+	    op1 = force_reg (DImode, op1);
+	}
+
+      tem = gen_reg_rtx (cmp_mode);
+      emit_insn (
+	gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
+
+      cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
+      op0 = gen_lowpart (cmp_mode, tem);
+      op1 = CONST0_RTX (cmp_mode);
+      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+      local_fast_math = 1;
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* We may be able to use a conditional move directly.
+     This avoids emitting spurious compares.  */
+  if (signed_comparison_operator (cmp, VOIDmode)
+      && (cmp_mode == DImode || local_fast_math)
+      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
+    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
+
+  /* We can't put the comparison inside the conditional move;
+     emit a compare instruction and put that inside the
+     conditional move.  Make sure we emit only comparisons we have;
+     swap or reverse as necessary.  */
+
+  if (!can_create_pseudo_p ())
+    return NULL_RTX;
+
+  switch (code)
+    {
+    case EQ:
+    case LE:
+    case LT:
+    case LEU:
+    case LTU:
+    case UNORDERED:
+      /* We have these compares: */
+      break;
+
+    case NE:
+    case ORDERED:
+      /* These must be reversed.  */
+      code = reverse_condition (code);
+      cmov_code = EQ;
+      break;
+
+    case GE:
+    case GT:
+    case GEU:
+    case GTU:
+      /* These normally need swapping, but for integer zero we have
+	 special patterns that recognize swapped operands.  */
+      if (cmp_mode == DImode && op1 == const0_rtx)
+	break;
+      code = swap_condition (code);
+      std::swap (op0, op1);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (cmp_mode == DImode)
+    {
+      if (!reg_or_0_operand (op0, DImode))
+	op0 = force_reg (DImode, op0);
+      if (!reg_or_8bit_operand (op1, DImode))
+	op1 = force_reg (DImode, op1);
+    }
+
+  /* ??? We mark the branch mode to be CCmode to prevent the compare
+     and cmov from being combined, since the compare insn follows IEEE
+     rules that the cmov does not.  */
+  if (cmp_mode == DFmode && !local_fast_math)
+    cmov_mode = CCmode;
+
+  tem = gen_reg_rtx (cmp_mode);
+  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
+  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
+}
+
+/* Simplify a conditional move of two constants into a setcc with
+   arithmetic.  This is done with a splitter since combine would
+   just undo the work if done during code generation.  It also catches
+   cases we wouldn't have before cse.  */
+
+int
+sw_64_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, rtx t_rtx,
+			      rtx f_rtx)
+{
+  HOST_WIDE_INT t, f, diff;
+  machine_mode mode;
+  rtx target, subtarget, tmp;
+
+  mode = GET_MODE (dest);
+  t = INTVAL (t_rtx);
+  f = INTVAL (f_rtx);
+  diff = t - f;
+
+  if (((code == NE || code == EQ) && diff < 0) || (code == GE || code == GT))
+    {
+      code = reverse_condition (code);
+      std::swap (t, f);
+      diff = -diff;
+    }
+
+  subtarget = target = dest;
+  if (mode != DImode)
+    {
+      target = gen_lowpart (DImode, dest);
+      if (can_create_pseudo_p ())
+	subtarget = gen_reg_rtx (DImode);
+      else
+	subtarget = target;
+    }
+  /* Below, we must be careful to use copy_rtx on target and subtarget
+     in intermediate insns, as they may be a subreg rtx, which may not
+     be shared.  */
+
+  if (f == 0
+      && exact_log2 (diff) > 0
+      /* On SW6, we've got enough shifters to make non-arithmetic shifts
+	 viable over a longer latency cmove.  */
+      && (diff <= 8 || sw_64_tune == PROCESSOR_SW6
+	  || sw_64_tune == PROCESSOR_SW8))
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
+
+      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
+			    GEN_INT (exact_log2 (t)));
+      emit_insn (gen_rtx_SET (target, tmp));
+    }
+  else if (f == 0 && t == -1)
+    {
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
+
+      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
+    }
+  else if (diff == 1 || diff == 4 || diff == 8)
+    {
+      rtx add_op;
+
+      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
+      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
+
+      if (diff == 1)
+	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
+      else
+	{
+	  add_op = GEN_INT (f);
+	  if (sext_add_operand (add_op, mode))
+	    {
+	      // in sw_64 sxsubw is ra*x + rb;
+	      tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), GEN_INT (diff));
+	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
+	      emit_insn (gen_rtx_SET (target, tmp));
+	    }
+	  else
+	    return 0;
+	}
+    }
+  else
+    return 0;
+
+  return 1;
+}
+
+/* Look up the function X_floating library function name for the
+   given operation.  */
+
+struct GTY (()) xfloating_op
+{
+  const enum rtx_code code;
+  const char *const GTY ((skip)) osf_func;
+  const char *const GTY ((skip)) vms_func;
+  rtx libcall;
+};
+
+static GTY (()) struct xfloating_op xfloating_ops[]
+  = {{PLUS, "_OtsAddX", "OTS$ADD_X", 0},
+     {MINUS, "_OtsSubX", "OTS$SUB_X", 0},
+     {MULT, "_OtsMulX", "OTS$MUL_X", 0},
+     {DIV, "_OtsDivX", "OTS$DIV_X", 0},
+     {EQ, "_OtsEqlX", "OTS$EQL_X", 0},
+     {NE, "_OtsNeqX", "OTS$NEQ_X", 0},
+     {LT, "_OtsLssX", "OTS$LSS_X", 0},
+     {LE, "_OtsLeqX", "OTS$LEQ_X", 0},
+     {GT, "_OtsGtrX", "OTS$GTR_X", 0},
+     {GE, "_OtsGeqX", "OTS$GEQ_X", 0},
+     {FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0},
+     {FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0},
+     {UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0},
+     {FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0},
+     {FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0}};
+
+static GTY (()) struct xfloating_op vax_cvt_ops[]
+  = {{FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0},
+     {FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0}};
+
+static rtx
+sw_64_lookup_xfloating_lib_func (enum rtx_code code)
+{
+  struct xfloating_op *ops = xfloating_ops;
+  long n = ARRAY_SIZE (xfloating_ops);
+  long i;
+
+  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
+
+  /* How irritating.  Nothing to key off for the main table.  */
+  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
+    {
+      ops = vax_cvt_ops;
+      n = ARRAY_SIZE (vax_cvt_ops);
+    }
+
+  for (i = 0; i < n; ++i, ++ops)
+    if (ops->code == code)
+      {
+	rtx func = ops->libcall;
+	if (!func)
+	  {
+	    func = init_one_libfunc (ops->osf_func);
+	    ops->libcall = func;
+	  }
+	return func;
+      }
+
+  gcc_unreachable ();
+}
+
+/* Most X_floating operations take the rounding mode as an argument.
+   Compute that here.  */
+
+static int
+sw_64_compute_xfloating_mode_arg (enum rtx_code code,
+				  enum sw_64_fp_rounding_mode round)
+{
+  int mode;
+
+  switch (round)
+    {
+    case SW_64_FPRM_NORM:
+      mode = 2;
+      break;
+    case SW_64_FPRM_MINF:
+      mode = 1;
+      break;
+    case SW_64_FPRM_CHOP:
+      mode = 0;
+      break;
+    case SW_64_FPRM_DYN:
+      mode = 4;
+      break;
+    default:
+      gcc_unreachable ();
+
+      /* XXX For reference, round to +inf is mode = 3.  */
+    }
+
+  if (code == FLOAT_TRUNCATE && sw_64_fptm == SW_64_FPTM_N)
+    mode |= 0x10000;
+
+  return mode;
+}
+
+/* Emit an X_floating library function call.
+
+   Note that these functions do not follow normal calling conventions:
+   TFmode arguments are passed in two integer registers (as opposed to
+   indirect); TFmode return values appear in R16+R17.
+
+   FUNC is the function to call.
+   TARGET is where the output belongs.
+   OPERANDS are the inputs.
+   NOPERANDS is the count of inputs.
+   EQUIV is the expression equivalent for the function.
+*/
+
+static void
+sw_64_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
+			      int noperands, rtx equiv)
+{
+  rtx usage = NULL_RTX, reg;
+  int regno = 16, i;
+
+  start_sequence ();
+
+  for (i = 0; i < noperands; ++i)
+    {
+      switch (GET_MODE (operands[i]))
+	{
+	case E_TFmode:
+	  reg = gen_rtx_REG (TFmode, regno);
+	  regno += 2;
+	  break;
+
+	case E_DFmode:
+	  reg = gen_rtx_REG (DFmode, regno + 32);
+	  regno += 1;
+	  break;
+
+	case E_VOIDmode:
+	  gcc_assert (CONST_INT_P (operands[i]));
+	  /* FALLTHRU */
+	case E_DImode:
+	  reg = gen_rtx_REG (DImode, regno);
+	  regno += 1;
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      emit_move_insn (reg, operands[i]);
+      use_reg (&usage, reg);
+    }
+
+  switch (GET_MODE (target))
+    {
+    case E_TFmode:
+      reg = gen_rtx_REG (TFmode, 16);
+      break;
+    case E_DFmode:
+      reg = gen_rtx_REG (DFmode, 32);
+      break;
+    case E_DImode:
+      reg = gen_rtx_REG (DImode, 0);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx mem = gen_rtx_MEM (QImode, func);
+  rtx_insn *tmp = emit_call_insn (
+    gen_call_value (reg, mem, const0_rtx, const0_rtx, const0_rtx));
+  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
+  RTL_CONST_CALL_P (tmp) = 1;
+
+  tmp = get_insns ();
+  end_sequence ();
+
+  emit_libcall_block (tmp, target, reg, equiv);
+}
+
+/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
+
+void
+sw_64_emit_xfloating_arith (enum rtx_code code, rtx operands[])
+{
+  rtx func;
+  int mode;
+  rtx out_operands[3];
+
+  func = sw_64_lookup_xfloating_lib_func (code);
+  mode = sw_64_compute_xfloating_mode_arg (code, sw_64_fprm);
+
+  out_operands[0] = operands[1];
+  out_operands[1] = operands[2];
+  out_operands[2] = GEN_INT (mode);
+  sw_64_emit_xfloating_libcall (func, operands[0], out_operands, 3,
+				gen_rtx_fmt_ee (code, TFmode, operands[1],
+						operands[2]));
+}
+
+/* Emit an X_floating library function call for a comparison.  */
+
+static rtx
+sw_64_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
+{
+  enum rtx_code cmp_code, res_code;
+  rtx func, out, operands[2], note;
+
+  /* X_floating library comparison functions return
+	   -1  unordered
+	    0  false
+	    1  true
+     Convert the compare against the raw return value.  */
+
+  cmp_code = *pcode;
+  switch (cmp_code)
+    {
+    case UNORDERED:
+      cmp_code = EQ;
+      res_code = LT;
+      break;
+    case ORDERED:
+      cmp_code = EQ;
+      res_code = GE;
+      break;
+    case NE:
+      res_code = NE;
+      break;
+    case EQ:
+    case LT:
+    case GT:
+    case LE:
+    case GE:
+      res_code = GT;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  *pcode = res_code;
+
+  func = sw_64_lookup_xfloating_lib_func (cmp_code);
+
+  operands[0] = op0;
+  operands[1] = op1;
+  out = gen_reg_rtx (DImode);
+
+  /* What's actually returned is -1,0,1, not a proper boolean value.  */
+  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
+  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
+  sw_64_emit_xfloating_libcall (func, out, operands, 2, note);
+
+  return out;
+}
+
+/* Emit an X_floating library function call for a conversion.  */
+
+void
+sw_64_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
+{
+  int noperands = 1, mode;
+  rtx out_operands[2];
+  rtx func;
+  enum rtx_code code = orig_code;
+
+  if (code == UNSIGNED_FIX)
+    code = FIX;
+
+  func = sw_64_lookup_xfloating_lib_func (code);
+
+  out_operands[0] = operands[1];
+
+  switch (code)
+    {
+    case FIX:
+      mode = sw_64_compute_xfloating_mode_arg (code, SW_64_FPRM_CHOP);
+      out_operands[1] = GEN_INT (mode);
+      noperands = 2;
+      break;
+    case FLOAT_TRUNCATE:
+      mode = sw_64_compute_xfloating_mode_arg (code, sw_64_fprm);
+      out_operands[1] = GEN_INT (mode);
+      noperands = 2;
+      break;
+    default:
+      break;
+    }
+
+  sw_64_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
+				gen_rtx_fmt_e (orig_code,
+					       GET_MODE (operands[0]),
+					       operands[1]));
+}
+
+/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
+   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
+   guarantee that the sequence
+     set (OP[0] OP[2])
+     set (OP[1] OP[3])
+   is valid.  Naturally, output operand ordering is little-endian.
+   This is used by *movtf_internal and *movti_internal.  */
+
+void
+sw_64_split_tmode_pair (rtx operands[4], machine_mode mode, bool fixup_overlap)
+{
+  switch (GET_CODE (operands[1]))
+    {
+    case REG:
+      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
+      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
+      break;
+
+    case MEM:
+      operands[3] = adjust_address (operands[1], DImode, 8);
+      operands[2] = adjust_address (operands[1], DImode, 0);
+      break;
+
+    CASE_CONST_SCALAR_INT:
+    case CONST_DOUBLE:
+      gcc_assert (operands[1] == CONST0_RTX (mode));
+      operands[2] = operands[3] = const0_rtx;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  switch (GET_CODE (operands[0]))
+    {
+    case REG:
+      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
+      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
+      break;
+
+    case MEM:
+      operands[1] = adjust_address (operands[0], DImode, 8);
+      operands[0] = adjust_address (operands[0], DImode, 0);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
+    {
+      std::swap (operands[0], operands[1]);
+      std::swap (operands[2], operands[3]);
+    }
+}
+
+/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
+   op2 is a register containing the sign bit, operation is the
+   logical operation to be performed.  */
+
+void
+sw_64_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
+{
+  rtx high_bit = operands[2];
+  rtx scratch;
+  int move;
+
+  sw_64_split_tmode_pair (operands, TFmode, false);
+
+  /* Detect three flavors of operand overlap.  */
+  move = 1;
+  if (rtx_equal_p (operands[0], operands[2]))
+    move = 0;
+  else if (rtx_equal_p (operands[1], operands[2]))
+    {
+      if (rtx_equal_p (operands[0], high_bit))
+	move = 2;
+      else
+	move = -1;
+    }
+
+  if (move < 0)
+    emit_move_insn (operands[0], operands[2]);
+
+  /* ??? If the destination overlaps both source tf and high_bit, then
+     assume source tf is dead in its entirety and use the other half
+     for a scratch register.  Otherwise "scratch" is just the proper
+     destination register.  */
+  scratch = operands[move < 2 ? 1 : 3];
+
+  emit_insn ((*operation) (scratch, high_bit, operands[3]));
+
+  if (move > 0)
+    {
+      emit_move_insn (operands[0], operands[2]);
+      if (move > 1)
+	emit_move_insn (operands[1], scratch);
+    }
+}
+
+/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
+   unaligned data:
+
+	   unsigned:		       signed:
+   word:   ldl_u  r1,X(r11)		ldl_u  r1,X(r11)
+	   ldl_u  r2,X+1(r11)		ldl_u  r2,X+1(r11)
+	   ldi    r3,X(r11)		ldi    r3,X+2(r11)
+	   exthl  r1,r3,r1		extll  r1,r3,r1
+	   exthh  r2,r3,r2		extlh  r2,r3,r2
+	   or     r1.r2.r1		or     r1,r2,r1
+					sra    r1,48,r1
+
+   long:   ldl_u  r1,X(r11)		ldl_u  r1,X(r11)
+	   ldl_u  r2,X+3(r11)		ldl_u  r2,X+3(r11)
+	   ldi    r3,X(r11)		ldi    r3,X(r11)
+	   extll  r1,r3,r1		extll  r1,r3,r1
+	   extlh  r2,r3,r2		extlh  r2,r3,r2
+	   or     r1.r2.r1		addl   r1,r2,r1
+
+   quad:   ldl_u  r1,X(r11)
+	   ldl_u  r2,X+7(r11)
+	   ldi    r3,X(r11)
+	   extll  r1,r3,r1
+	   extlh  r2,r3,r2
+	   or     r1.r2.r1
+*/
+
+void
+sw_64_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
+			     HOST_WIDE_INT ofs, int sign)
+{
+  rtx meml, memh, addr, extl, exth, tmp, mema;
+  machine_mode mode;
+
+  if (TARGET_BWX && size == 2)
+    {
+      meml = adjust_address (mem, QImode, ofs);
+      memh = adjust_address (mem, QImode, ofs + 1);
+      extl = gen_reg_rtx (DImode);
+      exth = gen_reg_rtx (DImode);
+      emit_insn (gen_zero_extendqidi2 (extl, meml));
+      emit_insn (gen_zero_extendqidi2 (exth, memh));
+      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), NULL, 1,
+				  OPTAB_LIB_WIDEN);
+      addr = expand_simple_binop (DImode, IOR, extl, exth, NULL, 1,
+				  OPTAB_LIB_WIDEN);
+
+      if (sign && GET_MODE (tgt) != HImode)
+	{
+	  addr = gen_lowpart (HImode, addr);
+	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
+	}
+      else
+	{
+	  if (GET_MODE (tgt) != DImode)
+	    addr = gen_lowpart (GET_MODE (tgt), addr);
+	  emit_move_insn (tgt, addr);
+	}
+      return;
+    }
+
+  meml = gen_reg_rtx (Pmode);
+  memh = gen_reg_rtx (Pmode);
+  addr = gen_reg_rtx (Pmode);
+  extl = gen_reg_rtx (Pmode);
+  exth = gen_reg_rtx (Pmode);
+
+  mema = XEXP (mem, 0);
+  rtx mema_const, mema_ptr;
+  if (GET_CODE (mema) == LO_SUM)
+    mema = force_reg (Pmode, mema);
+
+  // TODO: split const ptr
+  if (GET_CODE (mema) == PLUS)
+    {
+      mema_ptr = XEXP (mema, 0);
+      mema_const = XEXP (mema, 1);
+    }
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+
+  tmp = change_address (mem, Pmode,
+			gen_rtx_AND (Pmode, plus_constant (Pmode, mema, ofs),
+				     GEN_INT (-8)));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (meml, tmp);
+
+  tmp
+    = change_address (mem, Pmode,
+		      gen_rtx_AND (Pmode,
+				   plus_constant (Pmode, mema, ofs + size - 1),
+				   GEN_INT (-8)));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (memh, tmp);
+
+  if (sign && size == 2)
+    {
+      emit_move_insn (addr, plus_constant (Pmode, mema, ofs + 2));
+
+      emit_insn (gen_extql (extl, meml, addr));
+      emit_insn (gen_extqh (exth, memh, addr));
+
+      /* We must use tgt here for the target.  Sw_64 port fails if we use
+	 addr for the target, because addr is marked as a pointer and combine
+	 knows that pointers are always sign-extended 32-bit values.  */
+      addr = expand_binop (Pmode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
+      addr = expand_binop (Pmode, ashr_optab, addr, GEN_INT (48), addr, 1,
+			   OPTAB_WIDEN);
+    }
+  else
+    {
+      if (GET_CODE (mema) == PLUS && CONST_INT_P (mema_const) &&
+	  //	(INTVAL (mema_const) > 32767 || INTVAL (mema_const) < -32767))
+	  //	{
+	  (!add_operand (mema_const, VOIDmode)))
+	{
+	  rtx tmpreg = gen_reg_rtx (DImode);
+	  tmpreg = sw_64_emit_set_const (
+	    tmpreg, DImode, INTVAL (plus_constant (Pmode, mema_const, ofs)), 2,
+	    false);
+	  emit_insn (gen_adddi3 (addr, mema_ptr, tmpreg));
+	}
+      else
+	{
+	  emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
+	}
+      emit_insn (gen_extxl (extl, meml, GEN_INT (size * 8), addr));
+      switch ((int) size)
+	{
+	case 2:
+	  emit_insn (gen_extwh (exth, memh, addr));
+	  mode = HImode;
+	  break;
+	case 4:
+	  emit_insn (gen_extlh (exth, memh, addr));
+	  mode = SImode;
+	  break;
+	case 8:
+	  emit_insn (gen_extqh (exth, memh, addr));
+	  mode = DImode;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
+			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
+			   sign, OPTAB_WIDEN);
+    }
+
+  if (addr != tgt)
+    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
+}
+
+/* Similarly, use ins and msk instructions to perform unaligned stores.  */
+
+void
+sw_64_expand_unaligned_store (rtx dst, rtx src, HOST_WIDE_INT size,
+			      HOST_WIDE_INT ofs)
+{
+  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
+
+  if (TARGET_BWX && size == 2)
+    {
+      if (src != const0_rtx)
+	{
+	  dstl = gen_lowpart (QImode, src);
+	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), NULL,
+				      1, OPTAB_LIB_WIDEN);
+	  dsth = gen_lowpart (QImode, dsth);
+	}
+      else
+	dstl = dsth = const0_rtx;
+
+      meml = adjust_address (dst, QImode, ofs);
+      memh = adjust_address (dst, QImode, ofs + 1);
+
+      emit_move_insn (meml, dstl);
+      emit_move_insn (memh, dsth);
+      return;
+    }
+
+  dstl = gen_reg_rtx (Pmode);
+  dsth = gen_reg_rtx (Pmode);
+  insl = gen_reg_rtx (Pmode);
+  insh = gen_reg_rtx (Pmode);
+
+  dsta = XEXP (dst, 0);
+  if (GET_CODE (dsta) == LO_SUM)
+    dsta = force_reg (Pmode, dsta);
+
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+
+  meml = change_address (dst, Pmode,
+			 gen_rtx_AND (Pmode, plus_constant (Pmode, dsta, ofs),
+				      GEN_INT (-8)));
+  set_mem_alias_set (meml, 0);
+
+  memh
+    = change_address (dst, Pmode,
+		      gen_rtx_AND (Pmode,
+				   plus_constant (Pmode, dsta, ofs + size - 1),
+				   GEN_INT (-8)));
+  set_mem_alias_set (memh, 0);
+
+  emit_move_insn (dsth, memh);
+  emit_move_insn (dstl, meml);
+
+  addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
+
+  if (src != CONST0_RTX (GET_MODE (src)))
+    {
+      emit_insn (
+	gen_insxh (insh, gen_lowpart (DImode, src), GEN_INT (size * 8), addr));
+
+      switch ((int) size)
+	{
+	case 2:
+	  emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
+	  break;
+	case 4:
+	  emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
+	  break;
+	case 8:
+	  emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr));
+
+  switch ((int) size)
+    {
+    case 2:
+      emit_insn (gen_mskwl (dstl, dstl, addr));
+      break;
+    case 4:
+      emit_insn (gen_mskll (dstl, dstl, addr));
+      break;
+    case 8:
+      emit_insn (gen_mskql (dstl, dstl, addr));
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (src != CONST0_RTX (GET_MODE (src)))
+    {
+      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
+      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
+    }
+
+  /* Must store high before low for degenerate case of aligned.  */
+  emit_move_insn (memh, dsth);
+  emit_move_insn (meml, dstl);
+}
+
+/* The block move code tries to maximize speed by separating loads and
+   stores at the expense of register pressure: we load all of the data
+   before we store it back out.  There are two secondary effects worth
+   mentioning, that this speeds copying to/from aligned and unaligned
+   buffers, and that it makes the code significantly easier to write.  */
+
+#define MAX_MOVE_WORDS 8
+
+/* Load an integral number of consecutive unaligned quadwords.  */
+
+static void
+sw_64_expand_unaligned_load_words (rtx *out_regs, rtx smem, HOST_WIDE_INT words,
+				   HOST_WIDE_INT ofs)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS + 1];
+  rtx sreg, areg, tmp, smema;
+  HOST_WIDE_INT i;
+
+  smema = XEXP (smem, 0);
+  if (GET_CODE (smema) == LO_SUM)
+    smema = force_reg (Pmode, smema);
+
+  /* Generate all the tmp registers we need.  */
+  for (i = 0; i < words; ++i)
+    {
+      data_regs[i] = out_regs[i];
+      ext_tmps[i] = gen_reg_rtx (DImode);
+    }
+  data_regs[words] = gen_reg_rtx (DImode);
+
+  if (ofs != 0)
+    smem = adjust_address (smem, GET_MODE (smem), ofs);
+
+  /* Load up all of the source data.  */
+  for (i = 0; i < words; ++i)
+    {
+      tmp = change_address (smem, Pmode,
+			    gen_rtx_AND (Pmode,
+					 plus_constant (Pmode, smema, 8 * i),
+					 im8));
+      set_mem_alias_set (tmp, 0);
+      emit_move_insn (data_regs[i], tmp);
+    }
+
+  tmp = change_address (
+    smem, Pmode,
+    gen_rtx_AND (Pmode, plus_constant (Pmode, smema, 8 * words - 1), im8));
+  set_mem_alias_set (tmp, 0);
+  emit_move_insn (data_regs[words], tmp);
+
+  /* Extract the half-word fragments.  Unfortunately decided to make
+     extxh with offset zero a noop instead of zeroing the register, so
+     we must take care of that edge condition ourselves with cmov.  */
+
+  sreg = copy_addr_to_reg (smema);
+  areg
+    = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, 1, OPTAB_WIDEN);
+  for (i = 0; i < words; ++i)
+    {
+      emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
+      emit_insn (gen_extqh (ext_tmps[i], data_regs[i + 1], sreg));
+      emit_insn (gen_rtx_SET (
+	ext_tmps[i],
+	gen_rtx_IF_THEN_ELSE (DImode, gen_rtx_EQ (DImode, areg, const0_rtx),
+			      const0_rtx, ext_tmps[i])));
+    }
+
+  /* Merge the half-words into whole words.  */
+  for (i = 0; i < words; ++i)
+    {
+      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], ext_tmps[i],
+				  data_regs[i], 1, OPTAB_WIDEN);
+    }
+}
+
+/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
+   may be NULL to store zeros.  */
+
+static void
+sw_64_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
+				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx ins_tmps[MAX_MOVE_WORDS];
+  rtx st_tmp_1, st_tmp_2, dreg;
+  rtx st_addr_1, st_addr_2, dmema;
+  HOST_WIDE_INT i;
+
+  dmema = XEXP (dmem, 0);
+  if (GET_CODE (dmema) == LO_SUM)
+    dmema = force_reg (Pmode, dmema);
+
+  /* Generate all the tmp registers we need.  */
+  if (data_regs != NULL)
+    for (i = 0; i < words; ++i)
+      ins_tmps[i] = gen_reg_rtx (DImode);
+  st_tmp_1 = gen_reg_rtx (DImode);
+  st_tmp_2 = gen_reg_rtx (DImode);
+
+  if (ofs != 0)
+    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
+
+  st_addr_2 = change_address (
+    dmem, Pmode,
+    gen_rtx_AND (Pmode, plus_constant (Pmode, dmema, words * 8 - 1), im8));
+  set_mem_alias_set (st_addr_2, 0);
+
+  st_addr_1 = change_address (dmem, Pmode, gen_rtx_AND (Pmode, dmema, im8));
+  set_mem_alias_set (st_addr_1, 0);
+
+  /* Load up the destination end bits.  */
+  emit_move_insn (st_tmp_2, st_addr_2);
+  emit_move_insn (st_tmp_1, st_addr_1);
+
+  /* Shift the input data into place.  */
+  dreg = copy_addr_to_reg (dmema);
+  if (data_regs != NULL)
+    {
+      for (i = words - 1; i >= 0; --i)
+	{
+	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
+	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
+	}
+      for (i = words - 1; i > 0; --i)
+	{
+	  ins_tmps[i - 1]
+	    = expand_binop (DImode, ior_optab, data_regs[i], ins_tmps[i - 1],
+			    ins_tmps[i - 1], 1, OPTAB_WIDEN);
+	}
+    }
+
+  /* Split and merge the ends with the destination data.  */
+  emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
+  emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
+
+  if (data_regs != NULL)
+    {
+      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words - 1],
+			       st_tmp_2, 1, OPTAB_WIDEN);
+      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
+			       st_tmp_1, 1, OPTAB_WIDEN);
+    }
+
+  /* Store it all.  */
+  emit_move_insn (st_addr_2, st_tmp_2);
+  for (i = words - 1; i > 0; --i)
+    {
+      rtx tmp = change_address (
+	dmem, Pmode,
+	gen_rtx_AND (Pmode, plus_constant (Pmode, dmema, i * 8), im8));
+      set_mem_alias_set (tmp, 0);
+      emit_move_insn (tmp, data_regs ? ins_tmps[i - 1] : const0_rtx);
+    }
+  emit_move_insn (st_addr_1, st_tmp_1);
+}
+
+/* Expand string/block move operations.
+
+   operands[0] is the pointer to the destination.
+   operands[1] is the pointer to the source.
+   operands[2] is the number of bytes to move.
+   operands[3] is the alignment.  */
+
+int
+sw_64_expand_block_move (rtx operands[])
+{
+  rtx bytes_rtx = operands[2];
+  rtx align_rtx = operands[3];
+  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
+  HOST_WIDE_INT bytes = orig_bytes;
+  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
+  HOST_WIDE_INT dst_align = src_align;
+  rtx orig_src = operands[1];
+  rtx orig_dst = operands[0];
+  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
+  rtx tmp;
+  unsigned int i, words, ofs, nregs = 0;
+
+  if (orig_bytes <= 0)
+    return 1;
+  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
+    return 0;
+
+  /* Look for additional alignment information from recorded register info.  */
+
+  tmp = XEXP (orig_src, 0);
+  if (REG_P (tmp))
+    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > src_align)
+	{
+	  if (a >= 64 && c % 8 == 0)
+	    src_align = 64;
+	  else if (a >= 32 && c % 4 == 0)
+	    src_align = 32;
+	  else if (a >= 16 && c % 2 == 0)
+	    src_align = 16;
+	}
+    }
+
+  tmp = XEXP (orig_dst, 0);
+  if (REG_P (tmp))
+    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > dst_align)
+	{
+	  if (a >= 64 && c % 8 == 0)
+	    dst_align = 64;
+	  else if (a >= 32 && c % 4 == 0)
+	    dst_align = 32;
+	  else if (a >= 16 && c % 2 == 0)
+	    dst_align = 16;
+	}
+    }
+
+  ofs = 0;
+  if (src_align >= 64 && bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (DImode);
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (data_regs[nregs + i],
+			adjust_address (orig_src, DImode, ofs + i * 8));
+
+      nregs += words;
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  if (src_align >= 32 && bytes >= 4)
+    {
+      words = bytes / 4;
+
+      for (i = 0; i < words; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (SImode);
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (data_regs[nregs + i],
+			adjust_address (orig_src, SImode, ofs + i * 4));
+
+      nregs += words;
+      bytes -= words * 4;
+      ofs += words * 4;
+    }
+
+  if (bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words + 1; ++i)
+	data_regs[nregs + i] = gen_reg_rtx (DImode);
+
+      sw_64_expand_unaligned_load_words (data_regs + nregs, orig_src, words,
+					 ofs);
+
+      nregs += words;
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  if (!TARGET_BWX && bytes >= 4)
+    {
+      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
+      sw_64_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
+      bytes -= 4;
+      ofs += 4;
+    }
+
+  if (bytes >= 2)
+    {
+      if (src_align >= 16)
+	{
+	  do
+	    {
+	      data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
+	      emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
+	      bytes -= 2;
+	      ofs += 2;
+	    }
+	  while (bytes >= 2);
+	}
+      else if (!TARGET_BWX)
+	{
+	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
+	  sw_64_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
+	  bytes -= 2;
+	  ofs += 2;
+	}
+    }
+
+  while (bytes > 0)
+    {
+      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
+      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
+      bytes -= 1;
+      ofs += 1;
+    }
+
+  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
+
+  /* Now save it back out again.  */
+
+  i = 0, ofs = 0;
+
+  /* Write out the data in whatever chunks reading the source allowed.  */
+  if (dst_align >= 64)
+    {
+      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
+	{
+	  emit_move_insn (adjust_address (orig_dst, DImode, ofs), data_regs[i]);
+	  ofs += 8;
+	  i++;
+	}
+    }
+
+  if (dst_align >= 32)
+    {
+      /* If the source has remaining DImode regs, write them out in
+	 two pieces.  */
+      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
+	{
+	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
+			      NULL_RTX, 1, OPTAB_WIDEN);
+
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+			  gen_lowpart (SImode, data_regs[i]));
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
+			  gen_lowpart (SImode, tmp));
+	  ofs += 8;
+	  i++;
+	}
+
+      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
+	{
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), data_regs[i]);
+	  ofs += 4;
+	  i++;
+	}
+    }
+
+  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
+    {
+      /* Write out a remaining block of words using unaligned methods.  */
+
+      for (words = 1; i + words < nregs; words++)
+	if (GET_MODE (data_regs[i + words]) != DImode)
+	  break;
+
+      if (words == 1)
+	sw_64_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+      else
+	sw_64_expand_unaligned_store_words (data_regs + i, orig_dst, words,
+					    ofs);
+
+      i += words;
+      ofs += words * 8;
+    }
+
+  /* Due to the above, this won't be aligned.  */
+  /* ??? If we have more than one of these, consider constructing full
+     words in registers and using sw_64_expand_unaligned_store_words.  */
+  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
+    {
+      sw_64_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
+      ofs += 4;
+      i++;
+    }
+
+  if (dst_align >= 16)
+    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
+      {
+	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
+	i++;
+	ofs += 2;
+      }
+  else
+    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
+      {
+	sw_64_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
+	i++;
+	ofs += 2;
+      }
+
+  /* The remainder must be byte copies.  */
+  while (i < nregs)
+    {
+      gcc_assert (GET_MODE (data_regs[i]) == QImode);
+      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
+      i++;
+      ofs += 1;
+    }
+
+  return 1;
+}
+
+int
+sw_64_expand_block_clear (rtx operands[])
+{
+  rtx bytes_rtx = operands[1];
+  rtx align_rtx = operands[3];
+  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
+  HOST_WIDE_INT bytes = orig_bytes;
+  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
+  HOST_WIDE_INT alignofs = 0;
+  rtx orig_dst = operands[0];
+  rtx tmp;
+  int i, words, ofs = 0;
+
+  if (orig_bytes <= 0)
+    return 1;
+  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
+    return 0;
+
+  /* Look for stricter alignment.  */
+  tmp = XEXP (orig_dst, 0);
+  if (REG_P (tmp))
+    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
+  else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0))
+	   && CONST_INT_P (XEXP (tmp, 1)))
+    {
+      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
+      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+
+      if (a > align)
+	{
+	  if (a >= 64)
+	    align = a, alignofs = 8 - c % 8;
+	  else if (a >= 32)
+	    align = a, alignofs = 4 - c % 4;
+	  else if (a >= 16)
+	    align = a, alignofs = 2 - c % 2;
+	}
+    }
+
+  /* Handle an unaligned prefix first.  */
+
+  if (alignofs > 0)
+    {
+      /* Given that alignofs is bounded by align, the only time BWX could
+	 generate three stores is for a 7 byte fill.  Prefer two individual
+	 stores over a load/mask/store sequence.  */
+      if ((!TARGET_BWX || alignofs == 7) && align >= 32
+	  && !(alignofs == 4 && bytes >= 4))
+	{
+	  machine_mode mode = (align >= 64 ? DImode : SImode);
+	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
+	  if (bytes < alignofs)
+	    {
+	      mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
+	      ofs += bytes;
+	      bytes = 0;
+	    }
+	  else
+	    {
+	      bytes -= alignofs;
+	      ofs += alignofs;
+	    }
+	  alignofs = 0;
+
+	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), NULL_RTX, 1,
+			      OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	}
+
+      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
+	{
+	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+	  bytes -= 1;
+	  ofs += 1;
+	  alignofs -= 1;
+	}
+      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
+	{
+	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
+	  bytes -= 2;
+	  ofs += 2;
+	  alignofs -= 2;
+	}
+      if (alignofs == 4 && bytes >= 4)
+	{
+	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
+	  bytes -= 4;
+	  ofs += 4;
+	  alignofs = 0;
+	}
+
+      /* If we've not used the extra lead alignment information by now,
+	 we won't be able to.  Downgrade align to match what's left over.  */
+      if (alignofs > 0)
+	{
+	  alignofs = alignofs & -alignofs;
+	  align = MIN (align, alignofs * BITS_PER_UNIT);
+	}
+    }
+
+  /* Handle a block of contiguous long-words.  */
+
+  if (align >= 64 && bytes >= 8)
+    {
+      words = bytes / 8;
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
+			const0_rtx);
+
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  /* If the block is large and appropriately aligned, emit a single
+     store followed by a sequence of stl_u insns.  */
+
+  if (align >= 32 && bytes > 16)
+    {
+      rtx orig_dsta;
+
+      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
+      bytes -= 4;
+      ofs += 4;
+
+      orig_dsta = XEXP (orig_dst, 0);
+      if (GET_CODE (orig_dsta) == LO_SUM)
+	orig_dsta = force_reg (Pmode, orig_dsta);
+
+      words = bytes / 8;
+      for (i = 0; i < words; ++i)
+	{
+	  rtx mem = change_address (
+	    orig_dst, Pmode,
+	    gen_rtx_AND (Pmode, plus_constant (Pmode, orig_dsta, ofs + i * 8),
+			 GEN_INT (-8)));
+	  set_mem_alias_set (mem, 0);
+	  emit_move_insn (mem, const0_rtx);
+	}
+
+      /* Depending on the alignment, the first stl_u may have overlapped
+	 with the initial stl, which means that the last stl_u didn't
+	 write as much as it would appear.  Leave those questionable bytes
+	 unaccounted for.  */
+      bytes -= words * 8 - 4;
+      ofs += words * 8 - 4;
+    }
+
+  /* Handle a smaller block of aligned words.  */
+
+  if ((align >= 64 && bytes == 4) || (align == 32 && bytes >= 4))
+    {
+      words = bytes / 4;
+
+      for (i = 0; i < words; ++i)
+	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
+			const0_rtx);
+
+      bytes -= words * 4;
+      ofs += words * 4;
+    }
+
+  /* An unaligned block uses stl_u stores for as many as possible.  */
+
+  if (bytes >= 8)
+    {
+      words = bytes / 8;
+
+      sw_64_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
+
+      bytes -= words * 8;
+      ofs += words * 8;
+    }
+
+  /* Next clean up any trailing pieces.  */
+
+  /* Count the number of bits in BYTES for which aligned stores could
+     be emitted.  */
+  words = 0;
+  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align; i <<= 1)
+    if (bytes & i)
+      words += 1;
+
+  /* If we have appropriate alignment (and it wouldn't take too many
+     instructions otherwise), mask out the bytes we need.  */
+  if (TARGET_BWX ? words > 2 : bytes > 0)
+    {
+      if (align >= 64)
+	{
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, DImode, ofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
+
+	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), NULL_RTX,
+			      1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	  return 1;
+	}
+      else if (align >= 32 && bytes < 4)
+	{
+	  rtx mem, tmp;
+	  HOST_WIDE_INT mask;
+
+	  mem = adjust_address (orig_dst, SImode, ofs);
+	  set_mem_alias_set (mem, 0);
+
+	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
+
+	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), NULL_RTX,
+			      1, OPTAB_WIDEN);
+
+	  emit_move_insn (mem, tmp);
+	  return 1;
+	}
+    }
+
+  if (!TARGET_BWX && bytes >= 4)
+    {
+      sw_64_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
+      bytes -= 4;
+      ofs += 4;
+    }
+
+  if (bytes >= 2)
+    {
+      if (align >= 16)
+	{
+	  do
+	    {
+	      emit_move_insn (adjust_address (orig_dst, HImode, ofs),
+			      const0_rtx);
+	      bytes -= 2;
+	      ofs += 2;
+	    }
+	  while (bytes >= 2);
+	}
+      else if (!TARGET_BWX)
+	{
+	  sw_64_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
+	  bytes -= 2;
+	  ofs += 2;
+	}
+    }
+
+  while (bytes > 0)
+    {
+      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+      bytes -= 1;
+      ofs += 1;
+    }
+
+  return 1;
+}
+
+/* Returns a mask so that zap(x, value) == x & mask.  */
+
+rtx
+sw_64_expand_zap_mask (HOST_WIDE_INT value)
+{
+  rtx result;
+  int i;
+  HOST_WIDE_INT mask = 0;
+
+  for (i = 7; i >= 0; --i)
+    {
+      mask <<= 8;
+      if (!((value >> i) & 1))
+	mask |= 0xff;
+    }
+
+  result = gen_int_mode (mask, DImode);
+  return result;
+}
+
+void
+sw_64_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
+				   machine_mode mode, rtx op0, rtx op1, rtx op2)
+{
+  op0 = gen_lowpart (mode, op0);
+
+  if (op1 == const0_rtx)
+    op1 = CONST0_RTX (mode);
+  else
+    op1 = gen_lowpart (mode, op1);
+
+  if (op2 == const0_rtx)
+    op2 = CONST0_RTX (mode);
+  else
+    op2 = gen_lowpart (mode, op2);
+
+  emit_insn ((*gen) (op0, op1, op2));
+}
+
+/* A subroutine of the atomic operation splitters.  Jump to LABEL if
+   COND is true.  Mark the jump as unlikely to be taken.  */
+
+static void
+emit_unlikely_jump (rtx cond, rtx label)
+{
+  rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
+  rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
+  add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
+}
+
+/* Subroutines of the atomic operation splitters.  Emit barriers
+   as needed for the memory MODEL.  */
+
+static void
+sw_64_pre_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, true))
+    emit_insn (gen_memory_barrier ());
+}
+
+static void
+sw_64_post_atomic_barrier (enum memmodel model)
+{
+  if (need_atomic_barrier_p (model, false))
+    emit_insn (gen_memory_barrier ());
+}
+
+/* A subroutine of the atomic operation splitters.  Emit an insxl
+   instruction in MODE.  */
+
+static rtx
+emit_insxl (machine_mode mode, rtx op1, rtx op2)
+{
+  rtx ret = gen_reg_rtx (DImode);
+  rtx (*fn) (rtx, rtx, rtx);
+
+  switch (mode)
+    {
+    case E_QImode:
+      fn = gen_insbl;
+      break;
+    case E_HImode:
+      fn = gen_inswl;
+      break;
+    case E_SImode:
+      fn = gen_insll;
+      break;
+    case E_DImode:
+      fn = gen_insql;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  op1 = force_reg (mode, op1);
+  emit_insn (fn (ret, op1, op2));
+
+  return ret;
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  SCRATCH is
+   a scratch register.  */
+
+void
+sw_64_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
+		       rtx after, rtx scratch, enum memmodel model)
+{
+  machine_mode mode = GET_MODE (mem);
+  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
+
+  label = gen_label_rtx ();
+  emit_label (label);
+  label = gen_rtx_LABEL_REF (DImode, label);
+
+  if (before == NULL)
+    before = scratch;
+  emit_insn (gen_load_locked (mode, before, mem));
+
+  if (!TARGET_SW8A)
+    {
+      if (after)
+	{
+	  rtx cond1 = gen_rtx_REG (DImode, REGNO (after));
+	  emit_insn (gen_rtx_SET (cond1, const1_rtx));
+	  emit_insn (gen_builtin_wr_f (cond1));
+	}
+      else
+	{
+	  rtx cond2 = gen_rtx_REG (DImode, 28);
+	  emit_insn (gen_rtx_SET (cond2, const1_rtx));
+	  emit_insn (gen_builtin_wr_f (cond2));
+	}
+    }
+  if (code == NOT)
+    {
+      x = gen_rtx_AND (mode, before, val);
+      emit_insn (gen_rtx_SET (val, x));
+
+      x = gen_rtx_NOT (mode, val);
+    }
+  else
+    x = gen_rtx_fmt_ee (code, mode, before, val);
+  if (after)
+    emit_insn (gen_rtx_SET (after, copy_rtx (x)));
+  emit_insn (gen_rtx_SET (scratch, x));
+
+  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
+  if (!TARGET_SW8A)
+    emit_insn (gen_builtin_rd_f (cond));
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+}
+
+/* Expand a compare and swap operation.  */
+
+void
+sw_64_split_compare_and_swap (rtx operands[])
+{
+  rtx cond, retval, mem, oldval, newval;
+  bool is_weak;
+  enum memmodel mod_s, mod_f;
+  machine_mode mode;
+  rtx label1, label2, x;
+
+  rtx imust = operands[8];
+  cond = operands[0];
+  retval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = (operands[5] != const0_rtx);
+  mod_s = memmodel_from_int (INTVAL (operands[6]));
+  mod_f = memmodel_from_int (INTVAL (operands[7]));
+  mode = GET_MODE (mem);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+      emit_label (XEXP (label1, 0));
+    }
+  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+
+  emit_insn (gen_load_locked (mode, retval, mem));
+  x = gen_lowpart (DImode, retval);
+
+  rtx imust1;
+  if (TARGET_SW8A)
+    {
+      if (oldval == const0_rtx)
+	{
+	  emit_move_insn (cond, const0_rtx);
+	  x = gen_rtx_NE (DImode, x, const0_rtx);
+	}
+      else
+	{
+	  x = gen_rtx_EQ (DImode, x, oldval);
+	  emit_insn (gen_rtx_SET (cond, x));
+	  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+	}
+      emit_unlikely_jump (x, label2);
+    }
+  else
+    {
+      x = gen_rtx_EQ (DImode, x, oldval);
+      imust1 = gen_lowpart (DImode, imust);
+      emit_insn (gen_rtx_SET (imust1, x));
+      emit_insn (gen_builtin_wr_f (imust1));
+    }
+
+  emit_move_insn (cond, newval);
+  emit_insn (gen_store_conditional (mode, cond, mem, gen_lowpart (mode, cond)));
+
+  if (!TARGET_SW8A)
+    {
+      emit_insn (gen_builtin_rd_f (cond));
+      imust1 = gen_rtx_EQ (DImode, imust1, const0_rtx);
+      emit_unlikely_jump (imust1, label2);
+    }
+  if (!is_weak)
+    {
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+      emit_unlikely_jump (x, label1);
+    }
+
+  if (!is_mm_relaxed (mod_f))
+    emit_label (XEXP (label2, 0));
+
+  if (is_mm_relaxed (mod_f))
+    emit_label (XEXP (label2, 0));
+}
+
+void
+sw_64_expand_compare_and_swap_12 (rtx operands[])
+{
+  rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
+  machine_mode mode;
+  rtx addr, align, wdst;
+  rtx imust;
+
+  cond = operands[0];
+  dst = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  is_weak = operands[5];
+  mod_s = operands[6];
+  mod_f = operands[7];
+  mode = GET_MODE (mem);
+  bool use_cas = GET_MODE_SIZE (mode) >= 32 && flag_sw_use_cas;
+  if (!use_cas)
+    imust = operands[8];
+
+  /* We forced the address into a register via mem_noofs_operand.  */
+  addr = XEXP (mem, 0);
+  gcc_assert (register_operand (addr, DImode));
+
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), NULL_RTX, 1,
+			       OPTAB_DIRECT);
+  if (oldval != const0_rtx && TARGET_SW8A && use_cas)
+    oldval = emit_insxl (mode, oldval, addr);
+  oldval = convert_modes (DImode, mode, oldval, 1);
+
+  if (newval != const0_rtx)
+    newval = emit_insxl (mode, newval, addr);
+
+  wdst = gen_reg_rtx (DImode);
+  if (TARGET_SW8A && use_cas)
+    emit_insn (gen_atomic_compare_and_swap_1_target_sw8a (
+      mode, cond, wdst, mem, oldval, newval, align, is_weak, mod_s, mod_f));
+  else
+    emit_insn (gen_atomic_compare_and_swap_1 (mode, cond, wdst, mem, oldval,
+					      newval, align, is_weak, mod_s,
+					      mod_f, imust));
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+sw_64_split_compare_and_swap_12 (rtx operands[])
+{
+  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
+  machine_mode mode;
+  bool is_weak;
+  enum memmodel mod_s, mod_f;
+  rtx label1, label2, mem, addr, width, mask, x;
+  rtx imust;
+
+  cond = operands[0];
+  dest = operands[1];
+  orig_mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  align = operands[5];
+  is_weak = (operands[6] != const0_rtx);
+  mod_s = memmodel_from_int (INTVAL (operands[7]));
+  mod_f = memmodel_from_int (INTVAL (operands[8]));
+  imust = operands[9];
+  scratch = operands[10];
+  mode = GET_MODE (orig_mem);
+  addr = XEXP (orig_mem, 0);
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
+  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
+    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
+
+  label1 = NULL_RTX;
+  if (!is_weak)
+    {
+      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+      emit_label (XEXP (label1, 0));
+    }
+  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+
+  emit_insn (gen_load_locked (DImode, scratch, mem));
+
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  emit_insn (gen_extxl (dest, scratch, width, addr));
+
+  rtx imust1;
+  if (TARGET_SW8A)
+    {
+      if (oldval == const0_rtx)
+	{
+	  emit_move_insn (cond, const0_rtx);
+	  x = gen_rtx_NE (DImode, dest, const0_rtx);
+	}
+      else
+	{
+	  x = gen_rtx_EQ (DImode, dest, oldval);
+	  emit_insn (gen_rtx_SET (cond, x));
+	  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+	}
+      emit_unlikely_jump (x, label2);
+    }
+  else
+    {
+      x = gen_rtx_EQ (DImode, dest, oldval);
+      imust1 = gen_lowpart (DImode, imust);
+      emit_insn (gen_rtx_SET (imust1, x));
+      emit_insn (gen_builtin_wr_f (imust1));
+    }
+
+  emit_insn (gen_mskxl (cond, scratch, mask, addr));
+
+  if (newval != const0_rtx)
+    emit_insn (gen_iordi3 (cond, cond, newval));
+
+  emit_insn (gen_store_conditional (DImode, cond, mem, cond));
+  if (!TARGET_SW8A)
+    {
+      emit_insn (gen_builtin_rd_f (cond));
+      imust1 = gen_rtx_EQ (DImode, imust1, const0_rtx);
+      emit_unlikely_jump (imust1, label2);
+    }
+
+  if (!is_weak)
+    {
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+      emit_unlikely_jump (x, label1);
+    }
+
+  if (!is_mm_relaxed (mod_f))
+    emit_label (XEXP (label2, 0));
+
+  if (is_mm_relaxed (mod_f))
+    emit_label (XEXP (label2, 0));
+}
+
+/* Expand an atomic exchange operation.  */
+
+void
+sw_64_split_atomic_exchange (rtx operands[])
+{
+  rtx retval, mem, val, scratch;
+  enum memmodel model;
+  machine_mode mode;
+  rtx label, x, cond;
+
+  retval = operands[0];
+  mem = operands[1];
+  val = operands[2];
+  model = (enum memmodel) INTVAL (operands[3]);
+  scratch = operands[4];
+  mode = GET_MODE (mem);
+  cond = gen_lowpart (DImode, scratch);
+
+  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_insn (gen_load_locked (mode, retval, mem));
+  if (!TARGET_SW8A)
+    {
+      emit_insn (gen_rtx_SET (cond, const1_rtx));
+      emit_insn (gen_builtin_wr_f (cond));
+    }
+  emit_move_insn (scratch, val);
+  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
+  if (!TARGET_SW8A)
+    emit_insn (gen_builtin_rd_f (cond));
+
+  x = gen_rtx_EQ (DImode, cond, const0_rtx);
+  emit_unlikely_jump (x, label);
+}
+
+void
+sw_64_expand_atomic_exchange_12 (rtx operands[])
+{
+  rtx dst, mem, val, model;
+  machine_mode mode;
+  rtx addr, align, wdst;
+
+  dst = operands[0];
+  mem = operands[1];
+  val = operands[2];
+  model = operands[3];
+  mode = GET_MODE (mem);
+
+  /* We forced the address into a register via mem_noofs_operand.  */
+  addr = XEXP (mem, 0);
+  gcc_assert (register_operand (addr, DImode));
+
+  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), NULL_RTX, 1,
+			       OPTAB_DIRECT);
+
+  /* Insert val into the correct byte location within the word.  */
+  if (val != const0_rtx)
+    val = emit_insxl (mode, val, addr);
+
+  wdst = gen_reg_rtx (DImode);
+  emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model));
+  emit_move_insn (dst, gen_lowpart (mode, wdst));
+}
+
+void
+sw_64_split_atomic_exchange_12 (rtx operands[])
+{
+  rtx dest, orig_mem, addr, val, align, scratch;
+  rtx label, mem, width, mask, x;
+  machine_mode mode;
+  enum memmodel model;
+
+  dest = operands[0];
+  orig_mem = operands[1];
+  val = operands[2];
+  align = operands[3];
+  model = (enum memmodel) INTVAL (operands[4]);
+  scratch = operands[5];
+  mode = GET_MODE (orig_mem);
+  addr = XEXP (orig_mem, 0);
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
+  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
+    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
+
+  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  emit_insn (gen_load_locked (DImode, scratch, mem));
+  if (!TARGET_SW8A)
+    {
+      emit_insn (gen_rtx_SET (dest, const1_rtx));
+      emit_insn (gen_builtin_wr_f (dest));
+    }
+
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  emit_insn (gen_extxl (dest, scratch, width, addr));
+  emit_insn (gen_mskxl (scratch, scratch, mask, addr));
+  if (val != const0_rtx)
+    emit_insn (gen_iordi3 (scratch, scratch, val));
+
+  emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
+  if (!TARGET_SW8A)
+    emit_insn (gen_builtin_rd_f (scratch));
+
+  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
+  emit_unlikely_jump (x, label);
+}
+
+/* Emit an atomic compare-and-swap operation.  SI and larger modes.  */
+
+void
+sw_64_split_atomic_cas (rtx operands[])
+{
+  rtx cond, retval, mem, oldval, newval;
+  rtx (*gen) (rtx, rtx, rtx);
+  enum memmodel mod_s;
+  machine_mode mode;
+
+  cond = operands[0];
+  retval = operands[1];
+  mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+
+  mod_s = memmodel_from_int (INTVAL (operands[6]));
+  mode = GET_MODE (mem);
+
+  if (GET_MODE (mem) == SImode && GET_MODE (oldval) == DImode
+      && GET_MODE (newval) == DImode)
+    {
+      oldval = gen_rtx_REG (SImode, REGNO (oldval));
+      newval = gen_rtx_REG (SImode, REGNO (newval));
+    }
+
+  switch (mode)
+    {
+    case E_SImode:
+      gen = gen_sw_64_atomic_cassi;
+      break;
+    case E_DImode:
+      gen = gen_sw_64_atomic_casdi;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  emit_insn (gen_rtx_SET (retval, newval));
+  emit_insn (gen (oldval, mem, retval));
+
+  rtx x = gen_lowpart (DImode, retval);
+  rtx x1 = gen_lowpart (DImode, oldval);
+  x = gen_rtx_EQ (DImode, x, x1);
+  emit_insn (gen_rtx_SET (cond, x));
+}
+
+/* Emit an atomic compare-and-swap operation.  HI and smaller modes.  */
+
+void
+sw_64_split_atomic_cas_12 (rtx operands[])
+{
+  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
+  machine_mode mode;
+  bool is_weak;
+  enum memmodel mod_s, mod_f;
+  rtx label1, label2, mem, addr, width, mask, x;
+
+  cond = operands[0];
+  dest = operands[1];
+  orig_mem = operands[2];
+  oldval = operands[3];
+  newval = operands[4];
+  align = operands[5];
+  is_weak = (operands[6] != const0_rtx);
+  mod_s = memmodel_from_int (INTVAL (operands[7]));
+  mod_f = memmodel_from_int (INTVAL (operands[8]));
+  scratch = operands[9];
+  mode = GET_MODE (orig_mem);
+  addr = XEXP (orig_mem, 0);
+
+  mem = gen_rtx_MEM (DImode, align);
+  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
+  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
+    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
+
+  emit_move_insn (scratch, mem);
+
+  width = GEN_INT (GET_MODE_BITSIZE (mode));
+  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
+  emit_insn (gen_extxl (dest, scratch, width, addr));
+  emit_insn (gen_mskxl (cond, scratch, mask, addr));
+
+  rtx scratch2 = operands[10];
+  if (newval != const0_rtx)
+    emit_insn (gen_iordi3 (scratch2, cond, newval));
+  if (oldval == const0_rtx)
+    {
+      emit_move_insn (cond, const0_rtx);
+      x = gen_rtx_NE (DImode, dest, const0_rtx);
+    }
+  else
+    {
+      emit_insn (gen_iordi3 (scratch, cond, oldval));
+      emit_insn (gen_sw_64_atomic_casdi (scratch, mem, scratch2));
+
+      x = gen_rtx_EQ (DImode, scratch2, scratch);
+      emit_insn (gen_rtx_SET (cond, x));
+      x = gen_rtx_EQ (DImode, cond, const0_rtx);
+    }
+}
+
+/* Adjust the cost of a scheduling dependency.  Return the new cost of
+   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
+
+static int
+sw_64_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
+		   unsigned int)
+{
+  enum attr_type dep_insn_type;
+
+  /* If the dependence is an anti-dependence, there is no cost.  For an
+     output dependence, there is sometimes a cost, but it doesn't seem
+     worth handling those few cases.  */
+  if (dep_type != 0)
+    return cost;
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
+    return cost;
+
+  dep_insn_type = get_attr_type (dep_insn);
+
+  /* Bring in the user-defined memory latency.  */
+  if (dep_insn_type == TYPE_ILD || dep_insn_type == TYPE_FLD
+      || dep_insn_type == TYPE_LDSYM)
+    cost += sw_64_memory_latency - 1;
+
+  /* Everything else handled in DFA bypasses now.  */
+
+  return cost;
+}
+
+/* The number of instructions that can be issued per cycle.  */
+
+static int
+sw_64_issue_rate (void)
+{
+  return ((sw_64_tune == PROCESSOR_SW6 || sw_64_tune == PROCESSOR_SW8) ? 4 : 2);
+}
+
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.  */
+
+static int
+sw_64_multipass_dfa_lookahead (void)
+{
+  return ((sw_64_tune == PROCESSOR_SW6 || sw_64_tune == PROCESSOR_SW8) ? 4 : 2);
+}
+
+/* Machine-specific function data.  */
+
+struct GTY (()) sw_64_links;
+
+/* Information about a function's frame layout.  */
+struct GTY (()) sw_64_frame_info
+{
+  /* The size of the frame in bytes.  */
+  HOST_WIDE_INT frame_size;
+
+  /* Bit X is set if the function saves or restores GPR X.  */
+  unsigned HOST_WIDE_INT sa_mask;
+
+  /* The size of the saved callee-save int/FP registers.  */
+  HOST_WIDE_INT saved_regs_size;
+
+  /* The number of extra stack bytes taken up by register varargs.  */
+  HOST_WIDE_INT saved_varargs_size;
+
+  /* Offset of virtual frame pointer from stack pointer/frame bottom.  */
+  HOST_WIDE_INT callee_offset;
+
+  /* Offset of hard frame pointer from stack pointer/frame bottom.  */
+  HOST_WIDE_INT hard_frame_pointer_offset;
+
+  HOST_WIDE_INT local_offset;
+
+  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
+  HOST_WIDE_INT arg_pointer_offset;
+
+  bool emit_frame_pointer;
+};
+
+struct GTY (()) machine_function
+{
+  unsigned HOST_WIDE_INT sa_mask;
+  HOST_WIDE_INT sa_size;
+  HOST_WIDE_INT frame_size;
+
+  /* For flag_reorder_blocks_and_partition.  */
+  rtx gp_save_rtx;
+
+  /* For VMS condition handlers.  */
+  bool uses_condition_handler;
+
+  struct sw_64_frame_info frame;
+
+  /* Linkage entries.  */
+  hash_map<nofree_string_hash, sw_64_links *> *links;
+};
+
+/* How to allocate a 'struct machine_function'.  */
+
+static struct machine_function *
+sw_64_init_machine_status (void)
+{
+  return ggc_cleared_alloc<machine_function> ();
+}
+
+/* Start the ball rolling with RETURN_ADDR_RTX.  */
+
+rtx
+sw_64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return const0_rtx;
+
+  return get_hard_reg_initial_val (Pmode, REG_RA);
+}
+
+/* Return or create a memory slot containing the gp value for the current
+   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
+
+rtx
+sw_64_gp_save_rtx (void)
+{
+  rtx_insn *seq;
+  rtx m = cfun->machine->gp_save_rtx;
+
+  if (m == NULL)
+    {
+      start_sequence ();
+
+      m = assign_stack_local (Pmode, UNITS_PER_WORD, BITS_PER_WORD);
+      m = validize_mem (m);
+      emit_move_insn (m, pic_offset_table_rtx);
+
+      seq = get_insns ();
+      end_sequence ();
+
+      /* We used to simply emit the sequence after entry_of_function.
+	 However this breaks the CFG if the first instruction in the
+	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
+	 label.  Emit the sequence properly on the edge.  We are only
+	 invoked from dw2_build_landing_pads and finish_eh_generation
+	 will call commit_edge_insertions thanks to a kludge.  */
+      insert_insn_on_edge (seq,
+			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
+
+      cfun->machine->gp_save_rtx = m;
+    }
+
+  return m;
+}
+
+static void
+sw_64_instantiate_decls (void)
+{
+  if (cfun->machine->gp_save_rtx != NULL_RTX)
+    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
+}
+
+static int
+sw_64_ra_ever_killed (void)
+{
+  rtx_insn *top;
+
+  if (!has_hard_reg_initial_val (Pmode, REG_RA))
+    return (int) df_regs_ever_live_p (REG_RA);
+
+  push_topmost_sequence ();
+  top = get_insns ();
+  pop_topmost_sequence ();
+
+  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
+}
+
+/* Return the trap mode suffix applicable to the current
+   instruction, or NULL.  */
+
+static const char *
+get_trap_mode_suffix (void)
+{
+  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case TRAP_SUFFIX_NONE:
+      return NULL;
+
+    case TRAP_SUFFIX_SU:
+      if (sw_64_fptm >= SW_64_FPTM_SU)
+	return "su";
+      return NULL;
+
+    case TRAP_SUFFIX_SUI:
+      if (sw_64_fptm >= SW_64_FPTM_SUI)
+	return "sui";
+      return NULL;
+
+    case TRAP_SUFFIX_V_SV:
+      switch (sw_64_fptm)
+	{
+	case SW_64_FPTM_N:
+	  return NULL;
+	case SW_64_FPTM_U:
+	  return "v";
+	case SW_64_FPTM_SU:
+	case SW_64_FPTM_SUI:
+	  return "sv";
+	default:
+	  gcc_unreachable ();
+	}
+
+    case TRAP_SUFFIX_V_SV_SVI:
+      switch (sw_64_fptm)
+	{
+	case SW_64_FPTM_N:
+	  return NULL;
+	case SW_64_FPTM_U:
+	  return "v";
+	case SW_64_FPTM_SU:
+	  return "sv";
+	case SW_64_FPTM_SUI:
+	  return "svi";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case TRAP_SUFFIX_U_SU_SUI:
+      switch (sw_64_fptm)
+	{
+	case SW_64_FPTM_N:
+	  return NULL;
+	case SW_64_FPTM_U:
+	  return "u";
+	case SW_64_FPTM_SU:
+	  return "su";
+	case SW_64_FPTM_SUI:
+	  return "sui";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Return the rounding mode suffix applicable to the current
+   instruction, or NULL.  */
+
+static const char *
+get_round_mode_suffix (void)
+{
+  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case ROUND_SUFFIX_NONE:
+      return NULL;
+    case ROUND_SUFFIX_NORMAL:
+      switch (sw_64_fprm)
+	{
+	case SW_64_FPRM_NORM:
+	  return NULL;
+	case SW_64_FPRM_MINF:
+	  return "m";
+	case SW_64_FPRM_CHOP:
+	  return "c";
+	case SW_64_FPRM_DYN:
+	  return "d";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case ROUND_SUFFIX_C:
+      return "c";
+
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+
+/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
+
+static bool
+sw_64_print_operand_punct_valid_p (unsigned char code)
+{
+  return (code == '/' || code == ',' || code == '-' || code == '~'
+	  || code == '#' || code == '*' || code == '&');
+}
+
+/* Implement TARGET_PRINT_OPERAND.  The sw_64-specific
+   operand codes are documented below.  */
+
+static const char *
+get_round_mode_suffix_sw (void)
+{
+  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
+
+  switch (s)
+    {
+    case ROUND_SUFFIX_NONE:
+      return NULL;
+    case ROUND_SUFFIX_NORMAL:
+      switch (sw_64_fprm)
+	{
+	case SW_64_FPRM_NORM:
+	  return "_g";
+	case SW_64_FPRM_MINF:
+	  return "_p";
+	case SW_64_FPRM_CHOP:
+	  return "_z";
+	case SW_64_FPRM_DYN:
+	  return "_n";
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case ROUND_SUFFIX_C:
+      return "_z";
+
+    default:
+      gcc_unreachable ();
+    }
+  gcc_unreachable ();
+}
+static void
+sw_64_print_operand (FILE *file, rtx x, int code)
+{
+  int i;
+
+  switch (code)
+    {
+    case '~':
+      /* Print the assembler name of the current function.  */
+      assemble_name (file, sw_64_fnname);
+      break;
+
+    case '&':
+      if (const char *name = get_some_local_dynamic_name ())
+	assemble_name (file, name);
+      else
+	output_operand_lossage ("'%%&' used without any "
+				"local dynamic TLS references");
+      break;
+
+    case '/':
+      /* Generates the instruction suffix.  The TRAP_SUFFIX and ROUND_SUFFIX
+	 attributes are examined to determine what is appropriate.  */
+      {
+	const char *trap = get_trap_mode_suffix ();
+	const char *round = get_round_mode_suffix ();
+
+	break;
+      }
+
+    case 'T':
+      {
+	const char *round_sw = get_round_mode_suffix_sw ();
+
+	if (round_sw)
+	  fprintf (file, "%s", (round_sw ? round_sw : ""));
+	break;
+      }
+    case ',':
+      /* Generates single precision suffix for floating point
+	 instructions (s for IEEE, f for VAX).  */
+      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
+      break;
+
+    case '-':
+      /* Generates double precision suffix for floating point
+	 instructions (t for IEEE, g for VAX).  */
+      fputc ((TARGET_FLOAT_VAX ? 'g' : 'd'), file);
+      break;
+
+    case '#':
+      if (sw_64_this_literal_sequence_number == 0)
+	sw_64_this_literal_sequence_number = sw_64_next_sequence_number++;
+      fprintf (file, "%d", sw_64_this_literal_sequence_number);
+      break;
+
+    case '*':
+      if (sw_64_this_gpdisp_sequence_number == 0)
+	sw_64_this_gpdisp_sequence_number = sw_64_next_sequence_number++;
+      fprintf (file, "%d", sw_64_this_gpdisp_sequence_number);
+      break;
+
+    case 'J':
+      {
+	const char *lituse;
+
+	if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
+	  {
+	    x = XVECEXP (x, 0, 0);
+	    lituse = "lituse_tlsgd";
+	  }
+	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
+	  {
+	    x = XVECEXP (x, 0, 0);
+	    lituse = "lituse_tlsldm";
+	  }
+	else if (CONST_INT_P (x))
+	  lituse = "lituse_jsr";
+	else
+	  {
+	    output_operand_lossage ("invalid %%J value");
+	    break;
+	  }
+
+	if (x != const0_rtx)
+	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
+      }
+      break;
+
+    case 'j':
+      {
+	const char *lituse;
+
+#ifdef HAVE_AS_JSRDIRECT_RELOCS
+	lituse = "lituse_jsrdirect";
+#else
+	lituse = "lituse_jsr";
+#endif
+
+	gcc_assert (INTVAL (x) != 0);
+	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
+      }
+      break;
+    case 'r':
+      /* If this operand is the constant zero, write it as "$31".  */
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fprintf (file, "$31");
+      else
+	output_operand_lossage ("invalid %%r value");
+      break;
+
+    case 'R':
+      /* Similar, but for floating-point.  */
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (x == CONST0_RTX (GET_MODE (x)))
+	fprintf (file, "$f31");
+      else
+	output_operand_lossage ("invalid %%R value");
+      break;
+
+    case 'N':
+      /* Write the 1's complement of a constant.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%N value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
+      break;
+
+    case 'P':
+      /* Write 1 << C, for a constant C.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%P value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
+      break;
+
+    case 'h':
+      /* Write the high-order 16 bits of a constant, sign-extended.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%h value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
+      break;
+
+    case 'L':
+      /* Write the low-order 16 bits of a constant, sign-extended.  */
+      if (!CONST_INT_P (x))
+	output_operand_lossage ("invalid %%L value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
+      break;
+
+    case 'm':
+      /* Write mask for ZAP insn.  */
+      if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
+
+	  for (i = 0; i < 8; i++, value >>= 8)
+	    if (value & 0xff)
+	      mask |= (1 << i);
+
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
+	}
+      else
+	output_operand_lossage ("invalid %%m value");
+      break;
+
+    case 'M':
+      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
+      if (!mode_width_operand (x, VOIDmode))
+	output_operand_lossage ("invalid %%M value");
+
+      fprintf (file, "%s",
+	       (INTVAL (x) == 8
+		  ? "b"
+		  : INTVAL (x) == 16 ? "w" : INTVAL (x) == 32 ? "l" : "q"));
+      break;
+
+    case 'U':
+      /* Similar, except do it from the mask.  */
+      if (CONST_INT_P (x))
+	{
+	  HOST_WIDE_INT value = INTVAL (x);
+
+	  if (value == 0xff)
+	    {
+	      fputc ('b', file);
+	      break;
+	    }
+	  if (value == 0xffff)
+	    {
+	      fputc ('w', file);
+	      break;
+	    }
+	  if (value == 0xffffffff)
+	    {
+	      fputc ('l', file);
+	      break;
+	    }
+	  if (value == -1)
+	    {
+	      fputc ('q', file);
+	      break;
+	    }
+	}
+      /* Write "_a" for AUTO_INC_DEC access.  */
+      if (MEM_P (x)
+	  && (GET_CODE (XEXP (x, 0)) == POST_INC
+	      || GET_CODE (XEXP (x, 0)) == POST_DEC
+	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY))
+	{
+	  fprintf (file, "_a");
+	  break;
+	}
+      break;
+
+    case 's':
+      /* Write the constant value divided by 8.  */
+      if (!CONST_INT_P (x) || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
+	  || (INTVAL (x) & 7) != 0)
+	output_operand_lossage ("invalid %%s value");
+
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
+      break;
+
+    case 'C':
+    case 'D':
+    case 'c':
+    case 'd':
+      /* Write out comparison name.  */
+      {
+	enum rtx_code c = GET_CODE (x);
+
+	if (!COMPARISON_P (x))
+	  output_operand_lossage ("invalid %%C value");
+
+	else if (code == 'D')
+	  c = reverse_condition (c);
+	else if (code == 'c')
+	  c = swap_condition (c);
+	else if (code == 'd')
+	  c = swap_condition (reverse_condition (c));
+
+	if (c == LEU)
+	  fprintf (file, "ule");
+	else if (c == LTU)
+	  fprintf (file, "ult");
+	else if (c == UNORDERED)
+	  fprintf (file, "un");
+	else
+	  fprintf (file, "%s", GET_RTX_NAME (c));
+      }
+      break;
+
+    case 'E':
+      /* Write the divide or modulus operator.  */
+      switch (GET_CODE (x))
+	{
+	case DIV:
+	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "w" : "l");
+	  break;
+	case UDIV:
+	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "w" : "l");
+	  break;
+	case MOD:
+	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "w" : "l");
+	  break;
+	case UMOD:
+	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "w" : "l");
+	  break;
+	default:
+	  output_operand_lossage ("invalid %%E value");
+	  break;
+	}
+      break;
+
+    case 'A':
+      /* Write "_u" for unaligned access.  */
+      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
+	fprintf (file, "_u");
+      break;
+
+    case 0:
+      if (REG_P (x))
+	fprintf (file, "%s", reg_names[REGNO (x)]);
+      else if (MEM_P (x))
+	{
+	  if (GET_CODE (XEXP (x, 0)) == POST_INC)
+	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
+		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (x, 0)) == POST_DEC)
+	    fprintf (file, "%d(%s)", -GET_MODE_SIZE (GET_MODE (x)),
+		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
+	  else if (GET_CODE (XEXP (x, 0)) == POST_MODIFY)
+	    output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
+	  else
+	    output_address (GET_MODE (x), XEXP (x, 0));
+	}
+      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
+	{
+	  switch (XINT (XEXP (x, 0), 1))
+	    {
+	    case UNSPEC_DTPREL:
+	    case UNSPEC_TPREL:
+	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
+	      break;
+	    default:
+	      output_operand_lossage ("unknown relocation unspec");
+	      break;
+	    }
+	}
+      else
+	output_addr_const (file, x);
+      break;
+
+    default:
+      output_operand_lossage ("invalid %%xn code");
+    }
+}
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+sw_64_print_operand_address (FILE *file, machine_mode /*mode.  */, rtx addr)
+{
+  int basereg = 31;
+  HOST_WIDE_INT offset = 0;
+
+  if (GET_CODE (addr) == AND)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS && CONST_INT_P (XEXP (addr, 1)))
+    {
+      offset = INTVAL (XEXP (addr, 1));
+      addr = XEXP (addr, 0);
+    }
+
+  if (GET_CODE (addr) == LO_SUM)
+    {
+      const char *reloc16, *reloclo;
+      rtx op1 = XEXP (addr, 1);
+
+      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
+	{
+	  op1 = XEXP (op1, 0);
+	  switch (XINT (op1, 1))
+	    {
+	    case UNSPEC_DTPREL:
+	      reloc16 = NULL;
+	      reloclo = (sw_64_tls_size == 16 ? "dtprel" : "dtprello");
+	      break;
+	    case UNSPEC_TPREL:
+	      reloc16 = NULL;
+	      reloclo = (sw_64_tls_size == 16 ? "tprel" : "tprello");
+	      break;
+	    default:
+	      output_operand_lossage ("unknown relocation unspec");
+	      return;
+	    }
+
+	  output_addr_const (file, XVECEXP (op1, 0, 0));
+	}
+      else
+	{
+	  reloc16 = "gprel";
+	  reloclo = "gprellow";
+	  output_addr_const (file, op1);
+	}
+
+      if (offset)
+	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
+
+      addr = XEXP (addr, 0);
+      switch (GET_CODE (addr))
+	{
+	case REG:
+	  basereg = REGNO (addr);
+	  break;
+
+	case SUBREG:
+	  basereg = subreg_regno (addr);
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
+      fprintf (file, "($%d)\t\t!%s", basereg,
+	       (basereg == 29 ? reloc16 : reloclo));
+      return;
+    }
+
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      basereg = REGNO (addr);
+      break;
+
+    case SUBREG:
+      basereg = subreg_regno (addr);
+      break;
+
+    case CONST_INT:
+      offset = INTVAL (addr);
+      break;
+
+    case SYMBOL_REF:
+      gcc_assert (this_is_asm_operands);
+      fprintf (file, "%s", XSTR (addr, 0));
+      return;
+
+    case CONST:
+      gcc_assert (this_is_asm_operands);
+      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
+      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
+	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
+	       INTVAL (XEXP (XEXP (addr, 0), 1)));
+      return;
+
+    default:
+      output_operand_lossage ("invalid operand address");
+      return;
+    }
+
+  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
+}
+
+/* Emit RTL insns to initialize the variable parts of a trampoline at
+   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
+   for the static chain value for the function.  */
+
+static void
+sw_64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr, mem, word1, word2;
+
+  fnaddr = XEXP (DECL_RTL (fndecl), 0);
+
+#ifdef POINTERS_EXTEND_UNSIGNED
+  fnaddr = convert_memory_address (Pmode, fnaddr);
+  chain_value = convert_memory_address (Pmode, chain_value);
+#endif
+
+      /* These 4 instructions are:
+	    ldq $1,24($27)
+	    ldq $27,16($27)
+	    jmp $31,($27),0
+	    nop
+	 We don't bother setting the HINT field of the jump; the nop
+	 is merely there for padding.  */
+      word1 = GEN_INT (HOST_WIDE_INT_C (0x8f7b00108c3b0018));
+      word2 = GEN_INT (HOST_WIDE_INT_C (0x43ff075f0ffb0000));
+
+  /* Store the first two words, as computed above.  */
+  mem = adjust_address (m_tramp, DImode, 0);
+  emit_move_insn (mem, word1);
+  mem = adjust_address (m_tramp, DImode, 8);
+  emit_move_insn (mem, word2);
+
+  /* Store function address and static chain value.  */
+  mem = adjust_address (m_tramp, Pmode, 16);
+  emit_move_insn (mem, fnaddr);
+  mem = adjust_address (m_tramp, Pmode, 24);
+  emit_move_insn (mem, chain_value);
+
+      emit_insn (gen_imb ());
+#ifdef HAVE_ENABLE_EXECUTE_STACK
+      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
+			LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
+#endif
+}
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+
+   ARG is a description of the argument.
+   On Sw_64 the first 6 words of args are normally in registers
+   and the rest are pushed.  */
+
+static rtx
+sw_64_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  int basereg;
+  int num_args;
+
+  /* Don't get confused and pass small structures in FP registers.  */
+  if (arg.aggregate_type_p ())
+    basereg = 16;
+  else
+    {
+      /* With sw_64_split_complex_arg, we shouldn't see any raw complex
+	 values here.  */
+      gcc_checking_assert (!COMPLEX_MODE_P (arg.mode));
+
+      /* Set up defaults for FP operands passed in FP registers, and
+	 integral operands passed in integer registers.  */
+      if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT)
+	basereg = 32 + 16;
+      else
+	basereg = 16;
+    }
+
+    /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
+       the two platforms, so we can't avoid conditional compilation.  */
+  {
+    if (*cum >= 6)
+      return NULL_RTX;
+    num_args = *cum;
+
+    if (arg.end_marker_p ())
+      basereg = 16;
+    else if (targetm.calls.must_pass_in_stack (arg))
+      return NULL_RTX;
+  }
+
+  return gen_rtx_REG (arg.mode, num_args + basereg);
+}
+
+/* Update the data in CUM to advance over an argument ARG.  */
+
+static void
+sw_64_function_arg_advance (cumulative_args_t cum_v,
+			    const function_arg_info &arg)
+{
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  bool onstack = targetm.calls.must_pass_in_stack (arg);
+  int increment = onstack ? 6 : SW_64_ARG_SIZE (arg.mode, arg.type);
+
+  *cum += increment;
+}
+
+static int
+sw_64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
+{
+  int words = 0;
+  CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
+
+  if (*cum < 6 && 6 < *cum + SW_64_ARG_SIZE (arg.mode, arg.type))
+    words = 6 - *cum;
+
+  return words * UNITS_PER_WORD;
+}
+
+/* Return true if ARG must be returned in memory, instead of in registers.  */
+
+static bool
+sw_64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
+{
+  machine_mode mode = VOIDmode;
+  int size;
+
+  if (type)
+    {
+      mode = TYPE_MODE (type);
+
+      /* All aggregates are returned in memory, except on OpenVMS where
+	 records that fit 64 bits should be returned by immediate value
+	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
+      if (AGGREGATE_TYPE_P (type))
+	return true;
+    }
+
+  size = GET_MODE_SIZE (mode);
+  switch (GET_MODE_CLASS (mode))
+    {
+    case MODE_VECTOR_FLOAT:
+      /* Pass all float vectors in memory, like an aggregate.  */
+      return true;
+
+    case MODE_COMPLEX_FLOAT:
+      /* We judge complex floats on the size of their element,
+	 not the size of the whole type.  */
+      size = GET_MODE_UNIT_SIZE (mode);
+      break;
+
+    case MODE_INT:
+    case MODE_FLOAT:
+    case MODE_COMPLEX_INT:
+    case MODE_VECTOR_INT:
+      break;
+
+    default:
+      /* ??? We get called on all sorts of random stuff from
+	 aggregate_value_p.  We must return something, but it's not
+	 clear what's safe to return.  Pretend it's a struct I
+	 guess.  */
+      return true;
+    }
+
+  /* Otherwise types must fit in one register.  */
+  return size > UNITS_PER_WORD;
+}
+
+/* Return true if TYPE should be passed by invisible reference.  */
+
+static bool
+sw_64_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
+{
+  /* Pass float and _Complex float variable arguments by reference.
+     This avoids 64-bit store from a FP register to a pretend args save area
+     and subsequent 32-bit load from the saved location to a FP register.
+
+     Note that 32-bit loads and stores to/from a FP register on sw_64 reorder
+     bits to form a canonical 64-bit value in the FP register.  This fact
+     invalidates compiler assumption that 32-bit FP value lives in the lower
+     32-bits of the passed 64-bit FP value, so loading the 32-bit value from
+     the stored 64-bit location using 32-bit FP load is invalid on sw_64.
+
+     This introduces sort of ABI incompatibility, but until _Float32 was
+     introduced, C-family languages promoted 32-bit float variable arg to
+     a 64-bit double, and it was not allowed to pass float as a varible
+     argument.  Passing _Complex float as a variable argument never
+     worked on sw_64.  Thus, we have no backward compatibility issues
+     to worry about, and passing unpromoted _Float32 and _Complex float
+     as a variable argument will actually work in the future.  */
+
+  if (arg.mode == SFmode || arg.mode == SCmode)
+    return !arg.named;
+
+  return arg.mode == TFmode || arg.mode == TCmode;
+}
+
+/* Define how to find the value returned by a function.  VALTYPE is the
+   data type of the value (as a tree).  If the precise function being
+   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
+   MODE is set instead of VALTYPE for libcalls.
+
+   On Sw_64 the value is found in $0 for integer functions and
+   $f0 for floating-point functions.  */
+
+static rtx
+sw_64_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
+			machine_mode mode)
+{
+  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
+  enum mode_class mclass;
+
+  gcc_assert (!valtype || !sw_64_return_in_memory (valtype, func));
+
+  if (valtype)
+    mode = TYPE_MODE (valtype);
+
+  mclass = GET_MODE_CLASS (mode);
+  switch (mclass)
+    {
+    case MODE_INT:
+      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
+	where we have them returning both SImode and DImode.  */
+       PROMOTE_MODE (mode, dummy, valtype);
+      /* FALLTHRU */
+
+    case MODE_COMPLEX_INT:
+    case MODE_VECTOR_INT:
+      regnum = 0;
+      break;
+
+    case MODE_FLOAT:
+      regnum = 32;
+      break;
+
+    case MODE_COMPLEX_FLOAT:
+      {
+	machine_mode cmode = GET_MODE_INNER (mode);
+
+	return gen_rtx_PARALLEL (
+	  VOIDmode,
+	  gen_rtvec (2,
+		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
+					const0_rtx),
+		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
+					GEN_INT (GET_MODE_SIZE (cmode)))));
+      }
+
+    case MODE_RANDOM:
+    default:
+      gcc_unreachable ();
+    }
+
+  return gen_rtx_REG (mode, regnum);
+}
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+
+static rtx
+sw_64_function_value (const_tree valtype, const_tree fn_decl_or_type,
+		      bool /* outgoing */)
+{
+  return sw_64_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
+}
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+
+static rtx
+sw_64_libcall_value (machine_mode mode, const_rtx /* fun */)
+{
+  return sw_64_function_value_1 (NULL_TREE, NULL_TREE, mode);
+}
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
+
+   On the Sw_64, $0 $1 and $f0 $f1 are the only register thus used.  */
+
+static bool
+sw_64_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
+}
+
+/* TCmode complex values are passed by invisible reference.  We
+   should not split these values.  */
+
+static bool
+sw_64_split_complex_arg (const_tree type)
+{
+  return TYPE_MODE (type) != TCmode;
+}
+
+static tree
+sw_64_build_builtin_va_list (void)
+{
+  tree base, ofs, space, record, type_decl;
+
+  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
+  type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
+			  get_identifier ("__va_list_tag"), record);
+  TYPE_STUB_DECL (record) = type_decl;
+  TYPE_NAME (record) = type_decl;
+
+  /* C++? SET_IS_AGGR_TYPE (record, 1); */
+
+  /* Dummy field to prevent alignment warnings.  */
+  space
+    = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, integer_type_node);
+  DECL_FIELD_CONTEXT (space) = record;
+  DECL_ARTIFICIAL (space) = 1;
+  DECL_IGNORED_P (space) = 1;
+
+  ofs = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__offset"),
+		    integer_type_node);
+  DECL_FIELD_CONTEXT (ofs) = record;
+  DECL_CHAIN (ofs) = space;
+
+  base = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__base"),
+		     ptr_type_node);
+  DECL_FIELD_CONTEXT (base) = record;
+  DECL_CHAIN (base) = ofs;
+
+  TYPE_FIELDS (record) = base;
+  layout_type (record);
+
+  va_list_gpr_counter_field = ofs;
+  return record;
+}
+
+/* Helper function for sw_64_stdarg_optimize_hook.  Skip over casts
+   and constant additions.  */
+
+static gimple *
+va_list_skip_additions (tree lhs)
+{
+  gimple *stmt;
+
+  for (;;)
+    {
+      enum tree_code code;
+
+      stmt = SSA_NAME_DEF_STMT (lhs);
+
+      if (gimple_code (stmt) == GIMPLE_PHI)
+	return stmt;
+
+      if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != lhs)
+	return NULL;
+
+      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
+	return stmt;
+      code = gimple_assign_rhs_code (stmt);
+      if (!CONVERT_EXPR_CODE_P (code)
+	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
+	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
+	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
+	return stmt;
+
+      lhs = gimple_assign_rhs1 (stmt);
+    }
+}
+
+/* Check if LHS = RHS statement is
+   LHS = *(ap.__base + ap.__offset + cst)
+   or
+   LHS = *(ap.__base
+	   + ((ap.__offset + cst <= 47)
+	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
+   If the former, indicate that GPR registers are needed,
+   if the latter, indicate that FPR registers are needed.
+
+   Also look for LHS = (*ptr).field, where ptr is one of the forms
+   listed above.
+
+   On sw_64, cfun->va_list_gpr_size is used as size of the needed
+   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
+   registers are needed and bit 1 set if FPR registers are needed.
+   Return true if va_list references should not be scanned for the
+   current statement.  */
+
+static bool
+sw_64_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
+{
+  tree base, offset, rhs;
+  int offset_arg = 1;
+  gimple *base_stmt;
+
+  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) != GIMPLE_SINGLE_RHS)
+    return false;
+
+  rhs = gimple_assign_rhs1 (stmt);
+  while (handled_component_p (rhs))
+    rhs = TREE_OPERAND (rhs, 0);
+  if (TREE_CODE (rhs) != MEM_REF
+      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
+    return false;
+
+  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
+  if (stmt == NULL || !is_gimple_assign (stmt)
+      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
+    return false;
+
+  base = gimple_assign_rhs1 (stmt);
+  if (TREE_CODE (base) == SSA_NAME)
+    {
+      base_stmt = va_list_skip_additions (base);
+      if (base_stmt && is_gimple_assign (base_stmt)
+	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	base = gimple_assign_rhs1 (base_stmt);
+    }
+
+  if (TREE_CODE (base) != COMPONENT_REF
+      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
+    {
+      base = gimple_assign_rhs2 (stmt);
+      if (TREE_CODE (base) == SSA_NAME)
+	{
+	  base_stmt = va_list_skip_additions (base);
+	  if (base_stmt && is_gimple_assign (base_stmt)
+	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
+	    base = gimple_assign_rhs1 (base_stmt);
+	}
+
+      if (TREE_CODE (base) != COMPONENT_REF
+	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
+	return false;
+
+      offset_arg = 0;
+    }
+
+  base = get_base_address (base);
+  if (TREE_CODE (base) != VAR_DECL
+      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
+    return false;
+
+  offset = gimple_op (stmt, 1 + offset_arg);
+  if (TREE_CODE (offset) == SSA_NAME)
+    {
+      gimple *offset_stmt = va_list_skip_additions (offset);
+
+      if (offset_stmt && gimple_code (offset_stmt) == GIMPLE_PHI)
+	{
+	  HOST_WIDE_INT sub;
+	  gimple *arg1_stmt, *arg2_stmt;
+	  tree arg1, arg2;
+	  enum tree_code code1, code2;
+
+	  if (gimple_phi_num_args (offset_stmt) != 2)
+	    goto escapes;
+
+	  arg1_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
+	  arg2_stmt
+	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
+	  if (arg1_stmt == NULL || !is_gimple_assign (arg1_stmt)
+	      || arg2_stmt == NULL || !is_gimple_assign (arg2_stmt))
+	    goto escapes;
+
+	  code1 = gimple_assign_rhs_code (arg1_stmt);
+	  code2 = gimple_assign_rhs_code (arg2_stmt);
+	  if (code1 == COMPONENT_REF
+	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
+	    /* Do nothing.  */;
+	  else if (code2 == COMPONENT_REF
+		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
+	    {
+	      std::swap (arg1_stmt, arg2_stmt);
+	      code2 = code1;
+	    }
+	  else
+	    goto escapes;
+
+	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
+	    goto escapes;
+
+	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
+	  if (code2 == MINUS_EXPR)
+	    sub = -sub;
+	  if (sub < -48 || sub > -32)
+	    goto escapes;
+
+	  arg1 = gimple_assign_rhs1 (arg1_stmt);
+	  arg2 = gimple_assign_rhs1 (arg2_stmt);
+	  if (TREE_CODE (arg2) == SSA_NAME)
+	    {
+	      arg2_stmt = va_list_skip_additions (arg2);
+	      if (arg2_stmt == NULL || !is_gimple_assign (arg2_stmt)
+		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
+		goto escapes;
+	      arg2 = gimple_assign_rhs1 (arg2_stmt);
+	    }
+	  if (arg1 != arg2)
+	    goto escapes;
+
+	  if (TREE_CODE (arg1) != COMPONENT_REF
+	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
+	      || get_base_address (arg1) != base)
+	    goto escapes;
+
+	  /* Need floating point regs.  */
+	  cfun->va_list_fpr_size |= 2;
+	  return false;
+	}
+      if (offset_stmt && is_gimple_assign (offset_stmt)
+	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
+	offset = gimple_assign_rhs1 (offset_stmt);
+    }
+  if (TREE_CODE (offset) != COMPONENT_REF
+      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
+      || get_base_address (offset) != base)
+    goto escapes;
+  else
+    /* Need general regs.  */
+    cfun->va_list_fpr_size |= 1;
+  return false;
+
+escapes:
+  si->va_list_escapes = true;
+  return false;
+}
+
+/* Perform any needed actions needed for a function that is receiving a
+   variable number of arguments.  */
+
+static void
+sw_64_setup_incoming_varargs (cumulative_args_t pcum,
+			      const function_arg_info &arg, int *pretend_size,
+			      int no_rtl)
+{
+  CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
+
+  /* Skip the current argument.  */
+  targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg);
+
+  /* On SYSV and friends, we allocate space for all 12 arg registers, but
+     only push those that are remaining.  However, if NO registers need to
+     be saved, don't allocate any space.  This is not only because we won't
+     need the space, but because AP includes the current_pretend_args_size
+     and we don't want to mess up any ap-relative addresses already made.
+
+     If we are not to use the floating-point registers, save the integer
+     registers where we would put the floating-point registers.  This is
+     not the most efficient way to implement varargs with just one register
+     class, but it isn't worth doing anything more efficient in this rare
+     case.  */
+  if (cum >= 6)
+    return;
+
+  if (!no_rtl)
+    {
+      int count;
+      alias_set_type set = get_varargs_alias_set ();
+      rtx tmp;
+
+      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
+      if (count > 6 - cum)
+	count = 6 - cum;
+
+      /* Detect whether integer registers or floating-point registers
+	 are needed by the detected va_arg statements.  See above for
+	 how these values are computed.  Note that the "escape" value
+	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
+	 these bits set.  */
+      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
+
+      if (cfun->va_list_fpr_size & 1)
+	{
+	  tmp = gen_rtx_MEM (BLKmode,
+			     plus_constant (Pmode, virtual_incoming_args_rtx,
+					    (cum + 6) * UNITS_PER_WORD));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (16 + cum, tmp, count);
+	}
+
+      if (cfun->va_list_fpr_size & 2)
+	{
+	  tmp = gen_rtx_MEM (BLKmode,
+			     plus_constant (Pmode, virtual_incoming_args_rtx,
+					    cum * UNITS_PER_WORD));
+	  MEM_NOTRAP_P (tmp) = 1;
+	  set_mem_alias_set (tmp, set);
+	  move_block_from_reg (16 + cum + TARGET_FPREGS * 32, tmp, count);
+	}
+    }
+#ifdef SW_64_ENABLE_FULL_ASAN
+  cfun->machine->frame.saved_varargs_size = 12 * UNITS_PER_WORD;
+#else
+  *pretend_size = 12 * UNITS_PER_WORD;
+#endif
+}
+
+static void
+sw_64_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT offset;
+  tree t, offset_field, base_field;
+
+  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
+    return;
+
+  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
+     up by 48, storing fp arg registers in the first 48 bytes, and the
+     integer arg registers in the next 48 bytes.  This is only done,
+     however, if any integer registers need to be stored.
+
+     If no integer registers need be stored, then we must subtract 48
+     in order to account for the integer arg registers which are counted
+     in argsize above, but which are not actually stored on the stack.
+     Must further be careful here about structures straddling the last
+     integer argument register; that futzes with pretend_args_size,
+     which changes the meaning of AP.  */
+
+  if (NUM_ARGS < 6)
+    offset = 6 * UNITS_PER_WORD;
+  else
+#ifdef SW_64_ENABLE_FULL_ASAN
+    offset = -6 * UNITS_PER_WORD + cfun->machine->frame.saved_varargs_size
+	     + crtl->args.pretend_args_size;
+#else
+    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
+#endif
+
+      base_field = TYPE_FIELDS (TREE_TYPE (valist));
+      offset_field = DECL_CHAIN (base_field);
+
+      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), valist,
+			   base_field, NULL_TREE);
+      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), valist,
+			     offset_field, NULL_TREE);
+
+      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
+      t = fold_build_pointer_plus_hwi (t, offset);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+
+      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
+      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
+      TREE_SIDE_EFFECTS (t) = 1;
+      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
+}
+
+static tree
+sw_64_gimplify_va_arg_1 (tree type, tree base, tree offset, gimple_seq *pre_p)
+{
+  tree type_size, ptr_type, addend, t, addr;
+  gimple_seq internal_post;
+
+  /* If the type could not be passed in registers, skip the block
+     reserved for the registers.  */
+  if (must_pass_va_arg_in_stack (type))
+    {
+      t = build_int_cst (TREE_TYPE (offset), 6 * 8);
+      gimplify_assign (offset, build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
+		       pre_p);
+    }
+
+  addend = offset;
+  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
+
+  if (TREE_CODE (type) == COMPLEX_TYPE)
+    {
+      tree real_part, imag_part, real_temp;
+
+      real_part
+	= sw_64_gimplify_va_arg_1 (TREE_TYPE (type), base, offset, pre_p);
+
+      /* Copy the value into a new temporary, lest the formal temporary
+	 be reused out from under us.  */
+      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+      imag_part
+	= sw_64_gimplify_va_arg_1 (TREE_TYPE (type), base, offset, pre_p);
+
+      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
+    }
+  else if (TREE_CODE (type) == REAL_TYPE)
+    {
+      tree fpaddend, cond, fourtyeight;
+
+      fourtyeight = build_int_cst (TREE_TYPE (addend), 6 * 8);
+      fpaddend
+	= fold_build2 (MINUS_EXPR, TREE_TYPE (addend), addend, fourtyeight);
+      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
+      addend
+	= fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, fpaddend, addend);
+    }
+
+  /* Build the final address and force that value into a temporary.  */
+  addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
+  internal_post = NULL;
+  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
+  gimple_seq_add_seq (pre_p, internal_post);
+
+  /* Update the offset field.  */
+  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
+  if (type_size == NULL || TREE_OVERFLOW (type_size))
+    t = size_zero_node;
+  else
+    {
+      t = size_binop (PLUS_EXPR, type_size, size_int (7));
+      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
+      t = size_binop (MULT_EXPR, t, size_int (8));
+    }
+  t = fold_convert (TREE_TYPE (offset), t);
+  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
+		   pre_p);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+static tree
+sw_64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
+		       gimple_seq *post_p)
+{
+  tree offset_field, base_field, offset, base, t, r;
+  bool indirect;
+
+  base_field = TYPE_FIELDS (va_list_type_node);
+  offset_field = DECL_CHAIN (base_field);
+  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), valist,
+		       base_field, NULL_TREE);
+  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), valist,
+			 offset_field, NULL_TREE);
+
+  /* Pull the fields of the structure out into temporaries.  Since we never
+     modify the base field, we can use a formal temporary.  Sign-extend the
+     offset field so that it's the proper width for pointer arithmetic.  */
+  base = get_formal_tmp_var (base_field, pre_p);
+
+  t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
+  offset = get_initialized_tmp_var (t, pre_p, NULL);
+
+  indirect = pass_va_arg_by_reference (type);
+
+  if (indirect)
+    {
+      if (TREE_CODE (type) == COMPLEX_TYPE
+	  && targetm.calls.split_complex_arg (type))
+	{
+	  tree real_part, imag_part, real_temp;
+
+	  tree ptr_type
+	    = build_pointer_type_for_mode (TREE_TYPE (type), ptr_mode, true);
+
+	  real_part = sw_64_gimplify_va_arg_1 (ptr_type, base, offset, pre_p);
+	  real_part = build_va_arg_indirect_ref (real_part);
+
+	  /* Copy the value into a new temporary, lest the formal temporary
+	     be reused out from under us.  */
+	  real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
+
+	  imag_part = sw_64_gimplify_va_arg_1 (ptr_type, base, offset, pre_p);
+	  imag_part = build_va_arg_indirect_ref (imag_part);
+
+	  r = build2 (COMPLEX_EXPR, type, real_temp, imag_part);
+
+	  /* Stuff the offset temporary back into its field.  */
+	  gimplify_assign (unshare_expr (offset_field),
+			   fold_convert (TREE_TYPE (offset_field), offset),
+			   pre_p);
+	  return r;
+	}
+      else
+	type = build_pointer_type_for_mode (type, ptr_mode, true);
+    }
+
+  /* Find the value.  Note that this will be a stable indirection, or
+     a composite of stable indirections in the case of complex.  */
+  r = sw_64_gimplify_va_arg_1 (type, base, offset, pre_p);
+
+  /* Stuff the offset temporary back into its field.  */
+  gimplify_assign (unshare_expr (offset_field),
+		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
+
+  if (indirect)
+    r = build_va_arg_indirect_ref (r);
+
+  return r;
+}
+
+/* Builtins.  */
+
+enum sw_64_builtin
+{
+  SW_64_BUILTIN_CMPBGE,
+  SW_64_BUILTIN_EXTBL,
+  SW_64_BUILTIN_EXTWL,
+  SW_64_BUILTIN_EXTLL,
+  SW_64_BUILTIN_EXTQL,
+  SW_64_BUILTIN_EXTWH,
+  SW_64_BUILTIN_EXTLH,
+  SW_64_BUILTIN_EXTQH,
+  SW_64_BUILTIN_INSBL,
+  SW_64_BUILTIN_INSWL,
+  SW_64_BUILTIN_INSLL,
+  SW_64_BUILTIN_INSQL,
+  SW_64_BUILTIN_INSWH,
+  SW_64_BUILTIN_INSLH,
+  SW_64_BUILTIN_INSQH,
+  SW_64_BUILTIN_MSKBL,
+  SW_64_BUILTIN_MSKWL,
+  SW_64_BUILTIN_MSKLL,
+  SW_64_BUILTIN_MSKQL,
+  SW_64_BUILTIN_MSKWH,
+  SW_64_BUILTIN_MSKLH,
+  SW_64_BUILTIN_MSKQH,
+  SW_64_BUILTIN_UMULH,
+  SW_64_BUILTIN_ZAP,
+  SW_64_BUILTIN_ZAPNOT,
+  SW_64_BUILTIN_AMASK,
+  SW_64_BUILTIN_IMPLVER,
+  SW_64_BUILTIN_RPCC,
+
+  /* TARGET_MAX.  */
+  SW_64_BUILTIN_MINUB8,
+  SW_64_BUILTIN_MINSB8,
+  SW_64_BUILTIN_MINUW4,
+  SW_64_BUILTIN_MINSW4,
+  SW_64_BUILTIN_MAXUB8,
+  SW_64_BUILTIN_MAXSB8,
+  SW_64_BUILTIN_MAXUW4,
+  SW_64_BUILTIN_MAXSW4,
+  SW_64_BUILTIN_PERR,
+  SW_64_BUILTIN_PKLB,
+  SW_64_BUILTIN_PKWB,
+  SW_64_BUILTIN_UNPKBL,
+  SW_64_BUILTIN_UNPKBW,
+
+  /* TARGET_CIX.  */
+  SW_64_BUILTIN_CTTZ,
+  SW_64_BUILTIN_CTLZ,
+  SW_64_BUILTIN_CTPOP,
+  SW_64_BUILTIN_SBT,
+  SW_64_BUILTIN_CBT,
+
+  SW_64_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin[SW_64_BUILTIN_max]
+  = {CODE_FOR_builtin_cmpbge, CODE_FOR_extbl, CODE_FOR_extwl, CODE_FOR_extll,
+     CODE_FOR_extql, CODE_FOR_extwh, CODE_FOR_extlh, CODE_FOR_extqh,
+     CODE_FOR_builtin_insbl, CODE_FOR_builtin_inswl, CODE_FOR_builtin_insll,
+     CODE_FOR_insql, CODE_FOR_inswh, CODE_FOR_inslh, CODE_FOR_insqh,
+     CODE_FOR_mskbl, CODE_FOR_mskwl, CODE_FOR_mskll, CODE_FOR_mskql,
+     CODE_FOR_mskwh, CODE_FOR_msklh, CODE_FOR_mskqh, CODE_FOR_umuldi3_highpart,
+     CODE_FOR_builtin_zap, CODE_FOR_builtin_zapnot, CODE_FOR_builtin_amask,
+     CODE_FOR_builtin_implver, CODE_FOR_builtin_rpcc,
+
+
+     /* TARGET_MAX */
+     CODE_FOR_builtin_minub8, CODE_FOR_builtin_minsb8, CODE_FOR_builtin_minuw4,
+     CODE_FOR_builtin_minsw4, CODE_FOR_builtin_maxub8, CODE_FOR_builtin_maxsb8,
+     CODE_FOR_builtin_maxuw4, CODE_FOR_builtin_maxsw4, CODE_FOR_builtin_perr,
+     CODE_FOR_builtin_pklb, CODE_FOR_builtin_pkwb, CODE_FOR_builtin_unpkbl,
+     CODE_FOR_builtin_unpkbw,
+
+     /* TARGET_CIX */
+     CODE_FOR_ctzdi2, CODE_FOR_clzdi2, CODE_FOR_popcountdi2,
+
+     CODE_FOR_builtin_sbt, CODE_FOR_builtin_cbt};
+
+struct sw_64_builtin_def
+{
+  const char *name;
+  enum sw_64_builtin code;
+  unsigned int target_mask;
+  bool is_const;
+};
+
+static struct sw_64_builtin_def const zero_arg_builtins[]
+  = {{"__builtin_sw_64_implver", SW_64_BUILTIN_IMPLVER, 0, true},
+     {"__builtin_sw_64_rpcc", SW_64_BUILTIN_RPCC, 0, false}};
+
+static struct sw_64_builtin_def const one_arg_builtins[]
+  = {{"__builtin_sw_64_amask", SW_64_BUILTIN_AMASK, 0, true},
+     {"__builtin_sw_64_pklb", SW_64_BUILTIN_PKLB, MASK_MAX, true},
+     {"__builtin_sw_64_pkwb", SW_64_BUILTIN_PKWB, MASK_MAX, true},
+     {"__builtin_sw_64_unpkbl", SW_64_BUILTIN_UNPKBL, MASK_MAX, true},
+     {"__builtin_sw_64_unpkbw", SW_64_BUILTIN_UNPKBW, MASK_MAX, true},
+     {"__builtin_sw_64_cttz", SW_64_BUILTIN_CTTZ, MASK_CIX, true},
+     {"__builtin_sw_64_ctlz", SW_64_BUILTIN_CTLZ, MASK_CIX, true},
+     {"__builtin_sw_64_ctpop", SW_64_BUILTIN_CTPOP, MASK_CIX, true}};
+
+static struct sw_64_builtin_def const two_arg_builtins[]
+  = {{"__builtin_sw_64_cmpbge", SW_64_BUILTIN_CMPBGE, 0, true},
+     {"__builtin_sw_64_extbl", SW_64_BUILTIN_EXTBL, 0, true},
+     {"__builtin_sw_64_extwl", SW_64_BUILTIN_EXTWL, 0, true},
+     {"__builtin_sw_64_extll", SW_64_BUILTIN_EXTLL, 0, true},
+     {"__builtin_sw_64_extql", SW_64_BUILTIN_EXTQL, 0, true},
+     {"__builtin_sw_64_extwh", SW_64_BUILTIN_EXTWH, 0, true},
+     {"__builtin_sw_64_extlh", SW_64_BUILTIN_EXTLH, 0, true},
+     {"__builtin_sw_64_extqh", SW_64_BUILTIN_EXTQH, 0, true},
+     {"__builtin_sw_64_insbl", SW_64_BUILTIN_INSBL, 0, true},
+     {"__builtin_sw_64_inswl", SW_64_BUILTIN_INSWL, 0, true},
+     {"__builtin_sw_64_insll", SW_64_BUILTIN_INSLL, 0, true},
+     {"__builtin_sw_64_insql", SW_64_BUILTIN_INSQL, 0, true},
+     {"__builtin_sw_64_inswh", SW_64_BUILTIN_INSWH, 0, true},
+     {"__builtin_sw_64_inslh", SW_64_BUILTIN_INSLH, 0, true},
+     {"__builtin_sw_64_insqh", SW_64_BUILTIN_INSQH, 0, true},
+     {"__builtin_sw_64_mskbl", SW_64_BUILTIN_MSKBL, 0, true},
+     {"__builtin_sw_64_mskwl", SW_64_BUILTIN_MSKWL, 0, true},
+     {"__builtin_sw_64_mskll", SW_64_BUILTIN_MSKLL, 0, true},
+     {"__builtin_sw_64_mskql", SW_64_BUILTIN_MSKQL, 0, true},
+     {"__builtin_sw_64_mskwh", SW_64_BUILTIN_MSKWH, 0, true},
+     {"__builtin_sw_64_msklh", SW_64_BUILTIN_MSKLH, 0, true},
+     {"__builtin_sw_64_mskqh", SW_64_BUILTIN_MSKQH, 0, true},
+     {"__builtin_sw_64_umulh", SW_64_BUILTIN_UMULH, 0, true},
+     {"__builtin_sw_64_zap", SW_64_BUILTIN_ZAP, 0, true},
+     {"__builtin_sw_64_zapnot", SW_64_BUILTIN_ZAPNOT, 0, true},
+     {"__builtin_sw_64_minub8", SW_64_BUILTIN_MINUB8, MASK_MAX, true},
+     {"__builtin_sw_64_minsb8", SW_64_BUILTIN_MINSB8, MASK_MAX, true},
+     {"__builtin_sw_64_minuw4", SW_64_BUILTIN_MINUW4, MASK_MAX, true},
+     {"__builtin_sw_64_minsw4", SW_64_BUILTIN_MINSW4, MASK_MAX, true},
+     {"__builtin_sw_64_maxub8", SW_64_BUILTIN_MAXUB8, MASK_MAX, true},
+     {"__builtin_sw_64_maxsb8", SW_64_BUILTIN_MAXSB8, MASK_MAX, true},
+     {"__builtin_sw_64_maxuw4", SW_64_BUILTIN_MAXUW4, MASK_MAX, true},
+     {"__builtin_sw_64_maxsw4", SW_64_BUILTIN_MAXSW4, MASK_MAX, true},
+     {"__builtin_sw_64_perr", SW_64_BUILTIN_PERR, MASK_MAX, true},
+     {"__builtin_sw_64_sbt", SW_64_BUILTIN_SBT, MASK_SW8A, true},
+     {"__builtin_sw_64_cbt", SW_64_BUILTIN_CBT, MASK_SW8A, true}};
+
+static GTY (()) tree sw_64_dimode_u;
+static GTY (()) tree sw_64_v8qi_u;
+static GTY (()) tree sw_64_v8qi_s;
+static GTY (()) tree sw_64_v4hi_u;
+static GTY (()) tree sw_64_v4hi_s;
+
+static GTY (()) tree sw_64_builtins[(int) SW_64_BUILTIN_max];
+
+/* Return the sw_64 builtin for CODE.  */
+
+static tree
+sw_64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
+{
+  if (code >= SW_64_BUILTIN_max)
+    return error_mark_node;
+  return sw_64_builtins[code];
+}
+
+/* Helper function of sw_64_init_builtins.  Add the built-in specified
+   by NAME, TYPE, CODE, and ECF.  */
+
+static void
+sw_64_builtin_function (const char *name, tree ftype, enum sw_64_builtin code,
+			unsigned ecf)
+{
+  tree decl = add_builtin_function (name, ftype, (int) code, BUILT_IN_MD, NULL,
+				    NULL_TREE);
+
+  if (ecf & ECF_CONST)
+    TREE_READONLY (decl) = 1;
+  if (ecf & ECF_NOTHROW)
+    TREE_NOTHROW (decl) = 1;
+
+  sw_64_builtins[(int) code] = decl;
+}
+
+/* Helper function of sw_64_init_builtins.  Add the COUNT built-in
+   functions pointed to by P, with function type FTYPE.  */
+
+static void
+sw_64_add_builtins (const struct sw_64_builtin_def *p, size_t count, tree ftype)
+{
+  size_t i;
+
+  for (i = 0; i < count; ++i, ++p)
+    if ((target_flags & p->target_mask) == p->target_mask)
+      sw_64_builtin_function (p->name, ftype, p->code,
+			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
+}
+
+static void
+sw_64_init_builtins (void)
+{
+  tree ftype;
+
+  sw_64_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
+  sw_64_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
+  sw_64_v8qi_s = build_vector_type (intQI_type_node, 8);
+  sw_64_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
+  sw_64_v4hi_s = build_vector_type (intHI_type_node, 4);
+
+  ftype = build_function_type_list (sw_64_dimode_u, NULL_TREE);
+  sw_64_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
+
+  ftype = build_function_type_list (sw_64_dimode_u, sw_64_dimode_u, NULL_TREE);
+  sw_64_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
+
+  ftype = build_function_type_list (sw_64_dimode_u, sw_64_dimode_u,
+				    sw_64_dimode_u, NULL_TREE);
+  sw_64_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+   with result going to TARGET if that's convenient
+   (and in mode MODE if that's convenient).
+   SUBTARGET may be used as the target for computing one of EXP's operands.
+   IGNORE is nonzero if the value is to be ignored.  */
+
+static rtx
+sw_64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+		      machine_mode mode ATTRIBUTE_UNUSED,
+		      int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+  tree arg;
+  call_expr_arg_iterator iter;
+  enum insn_code icode;
+  rtx op[MAX_ARGS], pat;
+  int arity;
+  bool nonvoid;
+
+  if (fcode >= SW_64_BUILTIN_max)
+    internal_error ("bad builtin fcode");
+  icode = code_for_builtin[fcode];
+  if (icode == 0)
+    internal_error ("bad builtin fcode");
+
+  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+  arity = 0;
+  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+    {
+      const struct insn_operand_data *insn_op;
+
+      if (arg == error_mark_node)
+	return NULL_RTX;
+      if (arity > MAX_ARGS)
+	return NULL_RTX;
+
+      insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+      arity++;
+    }
+
+  if (nonvoid)
+    {
+      machine_mode tmode = insn_data[icode].operand[0].mode;
+      if (!target || GET_MODE (target) != tmode
+	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+	target = gen_reg_rtx (tmode);
+    }
+
+  switch (arity)
+    {
+    case 0:
+      pat = GEN_FCN (icode) (target);
+      break;
+    case 1:
+      if (nonvoid)
+	pat = GEN_FCN (icode) (target, op[0]);
+      else
+	pat = GEN_FCN (icode) (op[0]);
+      break;
+    case 2:
+      pat = GEN_FCN (icode) (target, op[0], op[1]);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  if (!pat)
+    return NULL_RTX;
+  emit_insn (pat);
+
+  if (nonvoid)
+    return target;
+  else
+    return const0_rtx;
+}
+
+/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
+   with an 8-bit output vector.  OPINT contains the integer operands; bit N
+   of OP_CONST is set if OPINT[N] is valid.  */
+
+static tree
+sw_64_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  if (op_const == 3)
+    {
+      int i, val;
+      for (i = 0, val = 0; i < 8; ++i)
+	{
+	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
+	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
+	  if (c0 >= c1)
+	    val |= 1 << i;
+	}
+      return build_int_cst (sw_64_dimode_u, val);
+    }
+  else if (op_const == 2 && opint[1] == 0)
+    return build_int_cst (sw_64_dimode_u, 0xff);
+  return NULL;
+}
+
+/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
+   specialized form of an AND operation.  Other byte manipulation instructions
+   are defined in terms of this instruction, so this is also used as a
+   subroutine for other builtins.
+
+   OP contains the tree operands; OPINT contains the extracted integer values.
+   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
+   OPINT may be considered.  */
+
+static tree
+sw_64_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
+			   long op_const)
+{
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT mask = 0;
+      int i;
+
+      for (i = 0; i < 8; ++i)
+	if ((opint[1] >> i) & 1)
+	  mask |= (unsigned HOST_WIDE_INT) 0xff << (i * 8);
+
+      if (op_const & 1)
+	return build_int_cst (sw_64_dimode_u, opint[0] & mask);
+
+      if (op)
+	return fold_build2 (BIT_AND_EXPR, sw_64_dimode_u, op[0],
+			    build_int_cst (sw_64_dimode_u, mask));
+    }
+  else if ((op_const & 1) && opint[0] == 0)
+    return build_int_cst (sw_64_dimode_u, 0);
+  return NULL;
+}
+
+/* Fold the builtins for the EXT family of instructions.  */
+
+static tree
+sw_64_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  long zap_const = 2;
+  tree *zap_op = NULL;
+
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT loc;
+
+      loc = opint[1] & 7;
+      loc *= BITS_PER_UNIT;
+
+      if (loc != 0)
+	{
+	  if (op_const & 1)
+	    {
+	      unsigned HOST_WIDE_INT temp = opint[0];
+	      if (is_high)
+		temp <<= loc;
+	      else
+		temp >>= loc;
+	      opint[0] = temp;
+	      zap_const = 3;
+	    }
+	}
+      else
+	zap_op = op;
+    }
+
+  opint[1] = bytemask;
+  return sw_64_fold_builtin_zapnot (zap_op, opint, zap_const);
+}
+
+/* Fold the builtins for the INS family of instructions.  */
+
+static tree
+sw_64_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  if ((op_const & 1) && opint[0] == 0)
+    return build_int_cst (sw_64_dimode_u, 0);
+
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT temp, loc, byteloc;
+      tree *zap_op = NULL;
+
+      loc = opint[1] & 7;
+      bytemask <<= loc;
+
+      temp = opint[0];
+      if (is_high)
+	{
+	  byteloc = (64 - (loc * 8)) & 0x3f;
+	  if (byteloc == 0)
+	    zap_op = op;
+	  else
+	    temp >>= byteloc;
+	  bytemask >>= 8;
+	}
+      else
+	{
+	  byteloc = loc * 8;
+	  if (byteloc == 0)
+	    zap_op = op;
+	  else
+	    temp <<= byteloc;
+	}
+
+      opint[0] = temp;
+      opint[1] = bytemask;
+      return sw_64_fold_builtin_zapnot (zap_op, opint, op_const);
+    }
+
+  return NULL;
+}
+
+static tree
+sw_64_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
+			  long op_const, unsigned HOST_WIDE_INT bytemask,
+			  bool is_high)
+{
+  if (op_const & 2)
+    {
+      unsigned HOST_WIDE_INT loc;
+
+      loc = opint[1] & 7;
+      bytemask <<= loc;
+
+      if (is_high)
+	bytemask >>= 8;
+
+      opint[1] = bytemask ^ 0xff;
+    }
+
+  return sw_64_fold_builtin_zapnot (op, opint, op_const);
+}
+
+static tree
+sw_64_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
+{
+  tree op0 = fold_convert (vtype, op[0]);
+  tree op1 = fold_convert (vtype, op[1]);
+  tree val = fold_build2 (code, vtype, op0, op1);
+  return fold_build1 (VIEW_CONVERT_EXPR, sw_64_dimode_u, val);
+}
+
+static tree
+sw_64_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp = 0;
+  int i;
+
+  if (op_const != 3)
+    return NULL;
+
+  for (i = 0; i < 8; ++i)
+    {
+      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
+      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
+      if (a >= b)
+	temp += a - b;
+      else
+	temp += b - a;
+    }
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] >> 24) & 0xff00;
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] >> 8) & 0xff00;
+  temp |= (opint[0] >> 16) & 0xff0000;
+  temp |= (opint[0] >> 24) & 0xff000000;
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] & 0xff00) << 24;
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  temp = opint[0] & 0xff;
+  temp |= (opint[0] & 0x0000ff00) << 8;
+  temp |= (opint[0] & 0x00ff0000) << 16;
+  temp |= (opint[0] & 0xff000000) << 24;
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  if (opint[0] == 0)
+    temp = 64;
+  else
+    temp = exact_log2 (opint[0] & -opint[0]);
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp;
+
+  if (op_const == 0)
+    return NULL;
+
+  if (opint[0] == 0)
+    temp = 64;
+  else
+    temp = 64 - floor_log2 (opint[0]) - 1;
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
+{
+  unsigned HOST_WIDE_INT temp, op;
+
+  if (op_const == 0)
+    return NULL;
+
+  op = opint[0];
+  temp = 0;
+  while (op)
+    temp++, op &= op - 1;
+
+  return build_int_cst (sw_64_dimode_u, temp);
+}
+
+static tree
+sw_64_builtin_sbt (int n_args, tree *op, unsigned HOST_WIDE_INT opint[],
+		   long op_const)
+{
+  int i;
+  if (op_const == 0)
+    return NULL;
+
+  if (TREE_CODE (op[0]) == INTEGER_CST)
+    {
+      error ("The first parameter cannot be a constant!");
+      gcc_unreachable ();
+    }
+
+  if ((opint[1] >> 63) & 0x1 & (warning_sbt_num == 1))
+    warning (0, "The second parameter is negative [enabled by default]");
+
+  warning_sbt_num++;
+  return NULL;
+}
+
+static tree
+sw_64_builtin_cbt (int n_args, tree *op, unsigned HOST_WIDE_INT opint[],
+		   long op_const)
+{
+  int i;
+  if (op_const == 0)
+    return NULL;
+
+  if (TREE_CODE (op[0]) == INTEGER_CST)
+    {
+      error ("The first parameter cannot be a constant!");
+      gcc_unreachable ();
+    }
+
+  if ((opint[1] >> 63) & 0x1 & (warning_cbt_num == 1))
+    warning (0, "The second parameter is negative [enabled by default]");
+
+  warning_cbt_num++;
+  return NULL;
+}
+
+/* Fold one of our builtin functions.  */
+
+static tree
+sw_64_fold_builtin (tree fndecl, int n_args, tree *op,
+		    bool ignore ATTRIBUTE_UNUSED)
+{
+  unsigned HOST_WIDE_INT opint[MAX_ARGS];
+  long op_const = 0;
+  int i;
+
+  if (n_args > MAX_ARGS)
+    return NULL;
+
+  for (i = 0; i < n_args; i++)
+    {
+      tree arg = op[i];
+      if (arg == error_mark_node)
+	return NULL;
+
+      opint[i] = 0;
+      if (TREE_CODE (arg) == INTEGER_CST)
+	{
+	  op_const |= 1L << i;
+	  opint[i] = int_cst_value (arg);
+	}
+    }
+
+  switch (DECL_MD_FUNCTION_CODE (fndecl))
+    {
+    case SW_64_BUILTIN_CMPBGE:
+      return sw_64_fold_builtin_cmpbge (opint, op_const);
+
+    case SW_64_BUILTIN_EXTBL:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x01, false);
+    case SW_64_BUILTIN_EXTWL:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x03, false);
+    case SW_64_BUILTIN_EXTLL:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
+    case SW_64_BUILTIN_EXTQL:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0xff, false);
+    case SW_64_BUILTIN_EXTWH:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x03, true);
+    case SW_64_BUILTIN_EXTLH:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
+    case SW_64_BUILTIN_EXTQH:
+      return sw_64_fold_builtin_extxx (op, opint, op_const, 0xff, true);
+
+    case SW_64_BUILTIN_INSBL:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x01, false);
+    case SW_64_BUILTIN_INSWL:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x03, false);
+    case SW_64_BUILTIN_INSLL:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
+    case SW_64_BUILTIN_INSQL:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0xff, false);
+    case SW_64_BUILTIN_INSWH:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x03, true);
+    case SW_64_BUILTIN_INSLH:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
+    case SW_64_BUILTIN_INSQH:
+      return sw_64_fold_builtin_insxx (op, opint, op_const, 0xff, true);
+
+    case SW_64_BUILTIN_MSKBL:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
+    case SW_64_BUILTIN_MSKWL:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
+    case SW_64_BUILTIN_MSKLL:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
+    case SW_64_BUILTIN_MSKQL:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
+    case SW_64_BUILTIN_MSKWH:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
+    case SW_64_BUILTIN_MSKLH:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
+    case SW_64_BUILTIN_MSKQH:
+      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
+
+    case SW_64_BUILTIN_ZAP:
+      opint[1] ^= 0xff;
+      /* FALLTHRU */
+    case SW_64_BUILTIN_ZAPNOT:
+      return sw_64_fold_builtin_zapnot (op, opint, op_const);
+
+    case SW_64_BUILTIN_MINUB8:
+      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v8qi_u);
+    case SW_64_BUILTIN_MINSB8:
+      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v8qi_s);
+    case SW_64_BUILTIN_MINUW4:
+      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v4hi_u);
+    case SW_64_BUILTIN_MINSW4:
+      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v4hi_s);
+    case SW_64_BUILTIN_MAXUB8:
+      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v8qi_u);
+    case SW_64_BUILTIN_MAXSB8:
+      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v8qi_s);
+    case SW_64_BUILTIN_MAXUW4:
+      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v4hi_u);
+    case SW_64_BUILTIN_MAXSW4:
+      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v4hi_s);
+
+    case SW_64_BUILTIN_PERR:
+      return sw_64_fold_builtin_perr (opint, op_const);
+    case SW_64_BUILTIN_PKLB:
+      return sw_64_fold_builtin_pklb (opint, op_const);
+    case SW_64_BUILTIN_PKWB:
+      return sw_64_fold_builtin_pkwb (opint, op_const);
+    case SW_64_BUILTIN_UNPKBL:
+      return sw_64_fold_builtin_unpkbl (opint, op_const);
+    case SW_64_BUILTIN_UNPKBW:
+      return sw_64_fold_builtin_unpkbw (opint, op_const);
+
+    case SW_64_BUILTIN_CTTZ:
+      return sw_64_fold_builtin_cttz (opint, op_const);
+    case SW_64_BUILTIN_CTLZ:
+      return sw_64_fold_builtin_ctlz (opint, op_const);
+    case SW_64_BUILTIN_CTPOP:
+      return sw_64_fold_builtin_ctpop (opint, op_const);
+    case SW_64_BUILTIN_SBT:
+      return sw_64_builtin_sbt (n_args, op, opint, op_const);
+    case SW_64_BUILTIN_CBT:
+      return sw_64_builtin_cbt (n_args, op, opint, op_const);
+    case SW_64_BUILTIN_AMASK:
+    case SW_64_BUILTIN_IMPLVER:
+    case SW_64_BUILTIN_RPCC:
+      /* None of these are foldable at compile-time.  */
+    default:
+      return NULL;
+    }
+}
+
+bool
+sw_64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
+{
+  bool changed = false;
+  gimple *stmt = gsi_stmt (*gsi);
+  tree call = gimple_call_fn (stmt);
+  gimple *new_stmt = NULL;
+
+  if (call)
+    {
+      tree fndecl = gimple_call_fndecl (stmt);
+
+      if (fndecl)
+	{
+	  tree arg0, arg1;
+
+	  switch (DECL_MD_FUNCTION_CODE (fndecl))
+	    {
+	    case SW_64_BUILTIN_UMULH:
+	      arg0 = gimple_call_arg (stmt, 0);
+	      arg1 = gimple_call_arg (stmt, 1);
+
+	      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
+					      MULT_HIGHPART_EXPR, arg0, arg1);
+	      break;
+	    default:
+	      break;
+	    }
+	}
+    }
+
+  if (new_stmt)
+    {
+      gsi_replace (gsi, new_stmt, true);
+      changed = true;
+    }
+
+  return changed;
+}
+
+/* This page contains routines that are used to determine what the function
+   prologue and epilogue code will do and write them out.  */
+
+/* Compute the size of the save area in the stack.  */
+
+/* These variables are used for communication between the following functions.
+   They indicate various things about the current function being compiled
+   that are used to tell what kind of prologue, epilogue and procedure
+   descriptor to generate.  */
+
+/* Nonzero if we need a stack procedure.  */
+enum sw_64_procedure_types
+{
+  PT_NULL = 0,
+  PT_REGISTER = 1,
+  PT_STACK = 2
+};
+static enum sw_64_procedure_types sw_64_procedure_type;
+
+/* Compute register masks for saved registers, register save area size,
+   and total frame size.  */
+static void
+sw_64_compute_frame_layout (void)
+{
+  unsigned HOST_WIDE_INT sa_mask = 0;
+  HOST_WIDE_INT frame_size;
+  int sa_size;
+
+  /* When outputting a thunk, we don't have valid register life info,
+     but assemble_start_function wants to output .frame and .mask
+     directives.  */
+  if (!cfun->is_thunk)
+    {
+      /* One for every register we have to save.  */
+      for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	if (!call_used_or_fixed_reg_p (i) && df_regs_ever_live_p (i)
+	    && i != REG_RA)
+	  sa_mask |= HOST_WIDE_INT_1U << i;
+
+      /* We need to restore these for the handler.  */
+      if (crtl->calls_eh_return)
+	{
+	  for (unsigned i = 0;; ++i)
+	    {
+	      unsigned regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+	      sa_mask |= HOST_WIDE_INT_1U << regno;
+	    }
+	}
+      /* If any register spilled, then spill the return address also.  */
+      /* ??? This is required by the Digital stack unwind specification
+	 and isn't needed if we're doing Dwarf2 unwinding.  */
+      if (sa_mask || sw_64_ra_ever_killed ())
+	sa_mask |= HOST_WIDE_INT_1U << REG_RA;
+    }
+  sa_size = popcount_hwi (sa_mask);
+  frame_size = get_frame_size ();
+
+      /* Our size must be even (multiple of 16 bytes).  */
+      if (sa_size & 1)
+	sa_size++;
+  sa_size *= 8;
+
+    frame_size = (SW_64_ROUND (crtl->outgoing_args_size) + sa_size
+		  + SW_64_ROUND (frame_size + crtl->args.pretend_args_size));
+
+  cfun->machine->sa_mask = sa_mask;
+  cfun->machine->sa_size = sa_size;
+  cfun->machine->frame_size = frame_size;
+}
+
+#undef TARGET_COMPUTE_FRAME_LAYOUT
+#define TARGET_COMPUTE_FRAME_LAYOUT sw_64_layout_frame
+
+/* Return 1 if this function can directly return via $26.  */
+
+bool
+direct_return (void)
+{
+  return (reload_completed && cfun->machine->frame_size == 0);
+}
+
+bool
+sw_64_find_lo_sum_using_gp (rtx insn)
+{
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
+    {
+      const_rtx x = *iter;
+      if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
+	return true;
+    }
+  return false;
+}
+
+static int
+sw_64_does_function_need_gp (void)
+{
+  rtx_insn *insn;
+
+  /* We need the gp to load the address of __mcount.  */
+  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
+    return 1;
+
+  /* The code emitted by sw_64_output_mi_thunk_sysv uses the gp.  */
+  if (cfun->is_thunk)
+    return 1;
+
+  /* The nonlocal receiver pattern assumes that the gp is valid for
+     the nested function.  Reasonable because it's almost always set
+     correctly already.  For the cases where that's wrong, make sure
+     the nested function loads its gp on entry.  */
+  if (crtl->has_nonlocal_goto)
+    return 1;
+
+  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
+     Even if we are a static function, we still need to do this in case
+     our address is taken and passed to something like qsort.  */
+
+  push_topmost_sequence ();
+  insn = get_insns ();
+  pop_topmost_sequence ();
+
+  for (; insn; insn = NEXT_INSN (insn))
+    if (NONDEBUG_INSN_P (insn) && GET_CODE (PATTERN (insn)) != USE
+	&& GET_CODE (PATTERN (insn)) != CLOBBER && get_attr_usegp (insn))
+      return 1;
+
+  return 0;
+}
+
+/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
+   sequences.  */
+
+static rtx_insn *
+set_frame_related_p (void)
+{
+  rtx_insn *seq = get_insns ();
+  rtx_insn *insn;
+
+  end_sequence ();
+
+  if (!seq)
+    return NULL;
+
+  if (INSN_P (seq))
+    {
+      insn = seq;
+      while (insn != NULL_RTX)
+	{
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  insn = NEXT_INSN (insn);
+	}
+      seq = emit_insn (seq);
+    }
+  else
+    {
+      seq = emit_insn (seq);
+      RTX_FRAME_RELATED_P (seq) = 1;
+    }
+  return seq;
+}
+
+#define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
+
+/* Generates a store with the proper unwind info attached.  VALUE is
+   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
+   contains SP+FRAME_BIAS, and that is the unwind info that should be
+   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
+   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
+
+static void
+emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
+		    HOST_WIDE_INT base_ofs, rtx frame_reg)
+{
+  rtx addr, mem;
+  rtx_insn *insn;
+
+  addr = plus_constant (Pmode, base_reg, base_ofs);
+  mem = gen_frame_mem (Pmode, addr);
+
+  insn = emit_move_insn (mem, value);
+  RTX_FRAME_RELATED_P (insn) = 1;
+
+  if (frame_bias || value != frame_reg)
+    {
+      if (frame_bias)
+	{
+	  addr
+	    = plus_constant (Pmode, stack_pointer_rtx, frame_bias + base_ofs);
+	  mem = gen_rtx_MEM (Pmode, addr);
+	}
+
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_SET (mem, frame_reg));
+    }
+}
+
+static void
+emit_frame_store (unsigned int regno, rtx base_reg, HOST_WIDE_INT frame_bias,
+		  HOST_WIDE_INT base_ofs)
+{
+  rtx reg = gen_rtx_REG (DImode, regno);
+  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
+}
+
+static void
+emit_frame_store_32 (unsigned int regno, rtx base_reg, HOST_WIDE_INT frame_bias,
+		     HOST_WIDE_INT base_ofs)
+{
+  rtx reg = gen_rtx_REG (Pmode, regno);
+  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
+}
+
+/* Write function prologue.  */
+static void
+sw64_add_cfa_expression (rtx_insn *insn, unsigned int reg, rtx base,
+			 poly_int64 offset)
+{
+  rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
+  add_reg_note (insn, REG_CFA_EXPRESSION,
+		gen_rtx_SET (mem, regno_reg_rtx[reg]));
+}
+
+void
+sw_64_expand_prologue (void)
+{
+  /* Registers to save.  */
+  unsigned HOST_WIDE_INT sa_mask = cfun->machine->frame.sa_mask;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size = cfun->machine->frame.saved_regs_size;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
+  /* Probed stack size; it additionally includes the size of
+     the "reserve region" if any.  */
+  HOST_WIDE_INT probed_size, sa_bias;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  rtx sa_reg;
+  bool fp_flag = false;
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = frame_size;
+
+#ifdef SW_64_ENABLE_FULL_ASAN
+    reg_offset = aligned_upper_bound (crtl->outgoing_args_size,
+				      STACK_BOUNDARY / BITS_PER_UNIT);
+#else
+    reg_offset = SW_64_ROUND (crtl->outgoing_args_size);
+#endif
+
+  /* Emit an insn to reload GP, if needed.  */
+      sw_64_function_needs_gp = sw_64_does_function_need_gp ();
+      if (sw_64_function_needs_gp)
+	{
+	  if (TARGET_SW_M32)
+	    emit_insn (gen_prologue_ldgp_32 ());
+	  else
+	    emit_insn (gen_prologue_ldgp ());
+	}
+
+  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
+      && (TARGET_SW_32ALIGN || TARGET_SW_SIMD))
+    {
+      rtx const16 = gen_rtx_REG (DImode, 7);
+      sw_64_emit_set_const (const16, DImode, 16, 3, false);
+      emit_insn (gen_anddi3 (const16, const16, stack_pointer_rtx));
+      emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, const16));
+
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-32)));
+      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (0));
+      rtx tmp7 = gen_rtx_MEM (Pmode, mem_address);
+      emit_move_insn (tmp7, gen_rtx_REG (DImode, 7));
+    }
+  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
+     the call to mcount ourselves, rather than having the linker do it
+     magically in response to -pg.  Since _mcount has special linkage,
+     don't represent the call as a call.  */
+  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
+    emit_insn (gen_prologue_mcount ());
+
+  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
+      && flag_sw_hardware_prefetch)
+    {
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256)));
+      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8));
+      rtx tmp16 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16));
+      rtx tmp17 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24));
+      rtx tmp18 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32));
+      rtx tmp19 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40));
+      rtx tmp26 = gen_rtx_MEM (Pmode, mem_address);
+
+      emit_move_insn (tmp16, gen_rtx_REG (DImode, 16));
+      emit_move_insn (tmp17, gen_rtx_REG (DImode, 17));
+      emit_move_insn (tmp18, gen_rtx_REG (DImode, 18));
+      emit_move_insn (tmp19, gen_rtx_REG (DImode, 19));
+      emit_move_insn (tmp26, gen_rtx_REG (DImode, 26));
+
+      rtx tmp_clt = gen_rtx_REG (DImode, 7);
+      rtx tmp_cnt = gen_rtx_REG (DImode, 8);
+      rtx op = gen_rtx_REG (DImode, 17);
+
+      unsigned long clt1, clt2, clt3;
+      unsigned long cnt1, cnt2, cnt3;
+      clt1 = flag_hardware_prefetch_clt % 2;
+      clt2 = (flag_hardware_prefetch_clt >> 1) % 2;
+      clt3 = (flag_hardware_prefetch_clt >> 2) % 2;
+      cnt1 = flag_hardware_prefetch_cnt_l1;
+      cnt2 = flag_hardware_prefetch_cnt_l2;
+      cnt3 = flag_hardware_prefetch_cnt_l3;
+      sw_64_emit_set_const (op, DImode, 0x10, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x11, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x12, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x1, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x4, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x8, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      emit_move_insn (gen_rtx_REG (DImode, 16), tmp16);
+      emit_move_insn (gen_rtx_REG (DImode, 17), tmp17);
+      emit_move_insn (gen_rtx_REG (DImode, 18), tmp18);
+      emit_move_insn (gen_rtx_REG (DImode, 19), tmp19);
+      emit_move_insn (gen_rtx_REG (DImode, 26), tmp26);
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256)));
+    }
+  if (strcmp ("exit", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0)
+    {
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256)));
+      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8));
+      rtx tmp16 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16));
+      rtx tmp17 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24));
+      rtx tmp18 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32));
+      rtx tmp19 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40));
+      rtx tmp26 = gen_rtx_MEM (Pmode, mem_address);
+
+      emit_move_insn (tmp16, gen_rtx_REG (DImode, 16));
+      emit_move_insn (tmp17, gen_rtx_REG (DImode, 17));
+      emit_move_insn (tmp18, gen_rtx_REG (DImode, 18));
+      emit_move_insn (tmp19, gen_rtx_REG (DImode, 19));
+      emit_move_insn (tmp26, gen_rtx_REG (DImode, 26));
+
+      rtx tmp_clt = gen_rtx_REG (DImode, 7);
+      rtx tmp_cnt = gen_rtx_REG (DImode, 8);
+      rtx op = gen_rtx_REG (DImode, 17);
+
+      unsigned long clt1, clt2, clt3;
+      unsigned long cnt1, cnt2, cnt3;
+      clt1 = 1;
+      clt2 = 0;
+      clt3 = 1;
+      cnt1 = 0;
+      cnt2 = 0;
+      cnt3 = 5;
+      sw_64_emit_set_const (op, DImode, 0x10, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x11, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x12, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x1, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x4, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x8, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      emit_move_insn (gen_rtx_REG (DImode, 16), tmp16);
+      emit_move_insn (gen_rtx_REG (DImode, 17), tmp17);
+      emit_move_insn (gen_rtx_REG (DImode, 18), tmp18);
+      emit_move_insn (gen_rtx_REG (DImode, 19), tmp19);
+      emit_move_insn (gen_rtx_REG (DImode, 26), tmp26);
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256)));
+    }
+
+  /* Adjust the stack by the frame size.  If the frame size is > 4096
+     bytes, we need to be sure we probe somewhere in the first and last
+     4096 bytes (we can probably get away without the latter test) and
+     every 8192 bytes in between.  If the frame size is > 32768, we
+     do this in a loop.  Otherwise, we generate the explicit probe
+     instructions.
+
+     Note that we are only allowed to adjust sp once in the prologue.  */
+
+  probed_size = frame_size;
+  if (flag_stack_check || flag_stack_clash_protection)
+    probed_size += get_stack_check_protect ();
+
+  if (probed_size <= 32768)
+    {
+      if (probed_size > 4096)
+	{
+	  int probed;
+
+	  for (probed = 4096; probed < probed_size; probed += 8192)
+	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
+
+	  /* We only have to do this probe if we aren't saving registers or
+	     if we are probing beyond the frame because of -fstack-check.  */
+	  if ((sa_size == 0 && probed_size > probed - 4096) || flag_stack_check
+	      || flag_stack_clash_protection)
+	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
+	}
+
+      if (frame_size != 0)
+	{
+	  if (TARGET_SW_M32)
+	    FRP (emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+					GEN_INT (-frame_size))));
+	  else
+	    FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
+					GEN_INT (-frame_size))));
+	}
+    }
+  else
+    {
+      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
+	 number of 8192 byte blocks to probe.  We then probe each block
+	 in the loop and then set SP to the proper location.  If the
+	 amount remaining is > 4096, we have to do one more probe if we
+	 are not saving any registers or if we are probing beyond the
+	 frame because of -fstack-check.  */
+
+      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
+      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
+      rtx ptr = gen_rtx_REG (DImode, 22);
+      rtx count = gen_rtx_REG (DImode, 23);
+      rtx seq;
+
+      emit_move_insn (count, GEN_INT (blocks));
+      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
+
+      /* Because of the difficulty in emitting a new basic block this
+	 late in the compilation, generate the loop as a single insn.  */
+      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
+
+      if ((leftover > 4096 && sa_size == 0) || flag_stack_check
+	  || flag_stack_clash_protection)
+	{
+	  rtx last = gen_rtx_MEM (Pmode, plus_constant (Pmode, ptr, -leftover));
+	  MEM_VOLATILE_P (last) = 1;
+	  emit_move_insn (last, const0_rtx);
+	}
+
+      if (flag_stack_check || flag_stack_clash_protection)
+	{
+	  /* If -fstack-check is specified we have to load the entire
+	     constant into a register and subtract from the sp in one go,
+	     because the probed stack size is not equal to the frame size.  */
+	  HOST_WIDE_INT lo, hi;
+	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
+	  hi = frame_size - lo;
+
+	  emit_move_insn (ptr, GEN_INT (hi));
+	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
+	  seq = emit_insn (
+	    gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, ptr));
+	}
+      else
+	{
+	  seq = emit_insn (
+	    gen_adddi3 (stack_pointer_rtx, ptr, GEN_INT (-leftover)));
+	}
+
+      /* This alternative is special, because the DWARF code cannot
+	 possibly intuit through the loop above.  So we invent this
+	 note it looks at instead.  */
+      RTX_FRAME_RELATED_P (seq) = 1;
+      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
+		    gen_rtx_SET (stack_pointer_rtx,
+				 plus_constant (Pmode, stack_pointer_rtx,
+						-frame_size)));
+    }
+
+  /* Cope with very large offsets to the register save area.  */
+  sa_bias = 0;
+  sa_reg = stack_pointer_rtx;
+  if (reg_offset + sa_size > 0x8000)
+    {
+      int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
+      rtx sa_bias_rtx;
+
+      if (low + sa_size <= 0x8000)
+	sa_bias = reg_offset - low, reg_offset = low;
+      else
+	sa_bias = reg_offset, reg_offset = 0;
+
+      sa_reg = gen_rtx_REG (DImode, 24);
+      sa_bias_rtx = GEN_INT (sa_bias);
+
+      if (add_operand (sa_bias_rtx, DImode))
+	emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
+      else
+	{
+	  emit_move_insn (sa_reg, sa_bias_rtx);
+	  emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
+	}
+    }
+
+  /* Save register RA next, followed by any other registers
+     that need to be saved.  */
+  for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi (sa_mask))
+    {
+      /* if we need a frame pointer, set it from the stack pointer.  */
+      if (frame_pointer_needed && i != REG_RA && fp_flag == false)
+	{
+	  if (TARGET_SW_M32)
+	    {
+	      emit_frame_store_32 (HARD_FRAME_POINTER_REGNUM, sa_reg, sa_bias,
+				   reg_offset);
+	    }
+	  else
+	    {
+	      emit_frame_store (HARD_FRAME_POINTER_REGNUM, sa_reg, sa_bias,
+				reg_offset);
+	      sa_mask &= ~(HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM);
+	      reg_offset += 8;
+	      fp_flag = true;
+	    }
+	}
+      else
+	{
+	  if (TARGET_SW_M32)
+	    {
+	      emit_frame_store_32 (i, sa_reg, sa_bias, reg_offset);
+	    }
+	  else
+	    {
+	      emit_frame_store (i, sa_reg, sa_bias, reg_offset);
+	      reg_offset += 8;
+	      sa_mask &= ~(HOST_WIDE_INT_1U << i);
+	    }
+	}
+    }
+
+      /* If we need a frame pointer, set it from the stack pointer.  */
+      if (frame_pointer_needed)
+	{
+	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
+	    {
+	      unsigned reg2 = 15; // FP
+	      unsigned reg1 = 26; // R26
+	      long adj_size = SW_64_ROUND (crtl->outgoing_args_size);
+	      if (adj_size > 0x8000)
+		{
+		  int low = ((adj_size & 0xffff) ^ 0x8000) - 0x8000;
+		  HOST_WIDE_INT bias;
+
+		  if (low <= 0x8000)
+		    bias = adj_size - low, adj_size = low;
+		  else
+		    bias = adj_size, adj_size = 0;
+
+		  rtx fp_move;
+		  rtx sa_reg_exp
+		    = plus_constant (Pmode, stack_pointer_rtx, bias);
+		  emit_move_insn (hard_frame_pointer_rtx, sa_reg_exp);
+		  if (adj_size != 0)
+		    fp_move
+		      = gen_adddi3 (hard_frame_pointer_rtx,
+				    hard_frame_pointer_rtx, GEN_INT (adj_size));
+
+		  if ((void *) fp_move == NULL)
+		    printf ("unable gen add3");
+		  emit_insn (fp_move);
+		}
+	      else
+		{
+		  rtx fp_move
+		    = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
+				     GEN_INT (
+				       SW_64_ROUND (crtl->outgoing_args_size)));
+		  FRP (emit_insn (fp_move));
+		}
+	      rtx_insn *insn = get_last_insn ();
+	      if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
+		{
+		  rtx src
+		    = plus_constant (Pmode, stack_pointer_rtx,
+				     SW_64_ROUND (crtl->outgoing_args_size));
+		  add_reg_note (insn, REG_CFA_ADJUST_CFA,
+				gen_rtx_SET (hard_frame_pointer_rtx, src));
+		}
+
+	      emit_insn (
+		gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+	    }
+	  else
+	    /* This must always be the last instruction in the
+	       prologue, thus we emit a special move + clobber.  */
+	    FRP (emit_insn (
+	      gen_init_fp (hard_frame_pointer_rtx, stack_pointer_rtx, sa_reg)));
+	}
+
+  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
+     the prologue, for exception handling reasons, we cannot do this for
+     any insn that might fault.  We could prevent this for mems with a
+     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
+     have to prevent all such scheduling with a blockage.
+
+     Linux, on the other hand, never bothered to implement OSF/1's
+     exception handling, and so doesn't care about such things.  Anyone
+     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
+
+  if (!TARGET_CAN_FAULT_IN_PROLOGUE)
+    emit_insn (gen_blockage ());
+}
+
+/* Count the number of .file directives, so that .loc is up to date.  */
+int num_source_filenames = 0;
+
+/* Output the textual info surrounding the prologue.  */
+
+void
+sw_64_start_function (FILE *file, const char *fnname,
+		      tree decl ATTRIBUTE_UNUSED)
+{
+  unsigned long imask, fmask;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
+  /* The maximum debuggable frame size.  */
+  const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  char *entry_label = (char *) alloca (strlen (fnname) + 6);
+  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
+  int i;
+
+  sw_64_fnname = fnname;
+  const char *main = "main";
+  if (flag_fpcr_set == 4 && strcmp (fnname, main) == 0)
+    stfp3_flag = 1;
+  else
+    stfp3_flag = 0;
+
+    reg_offset = SW_64_ROUND (crtl->outgoing_args_size);
+
+  imask = cfun->machine->frame.sa_mask & 0xffffffffu;
+  fmask = cfun->machine->frame.sa_mask >> 32;
+  /* Issue function start and label.  */
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.ent ", file);
+      assemble_name (file, fnname);
+      putc ('\n', file);
+
+      /* If the function needs GP, we'll write the "..ng" label there.
+	 Otherwise, do it here.  */
+      if (!sw_64_function_needs_gp && !cfun->is_thunk)
+	{
+	  putc ('$', file);
+	  assemble_name (file, fnname);
+	  fputs ("..ng:\n", file);
+	}
+    }
+  /* Nested functions on VMS that are potentially called via trampoline
+     get a special transfer entry point that loads the called functions
+     procedure descriptor and static chain.  */
+  strcpy (entry_label, fnname);
+
+  ASM_OUTPUT_LABEL (file, entry_label);
+  inside_function = TRUE;
+
+  if (TARGET_IEEE_CONFORMANT && !flag_inhibit_size_directive)
+    {
+      /* Set flags in procedure descriptor to request IEEE-conformant
+	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
+	 (/usr/include/pdsc.h).  */
+      fputs ("\t.eflag 48\n", file);
+    }
+
+  /* Set up offsets to sw_64 virtual arg/local debugging pointer.  */
+  sw_64_auto_offset = -frame_size + cfun->machine->frame.saved_varargs_size
+		      + crtl->args.pretend_args_size;
+  sw_64_arg_offset = -frame_size + 48;
+
+  /* Describe our frame.  If the frame size is larger than an integer,
+     print it as zero to avoid an assembler error.  We won't be
+     properly describing such a frame, but that's the best we can do.  */
+  if (!flag_inhibit_size_directive)
+    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
+	     (frame_pointer_needed ? HARD_FRAME_POINTER_REGNUM
+				   : STACK_POINTER_REGNUM),
+	     frame_size >= max_frame_size ? 0 : frame_size,
+	     crtl->args.pretend_args_size);
+
+  /* Describe which registers were spilled.  */
+  if (!flag_inhibit_size_directive)
+    {
+      if (imask)
+	{
+	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
+		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
+
+	  for (i = 0; i < 32; ++i)
+	    if (imask & (1UL << i))
+	      reg_offset += 8;
+	}
+
+      if (fmask)
+	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
+		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
+    }
+}
+
+/* Emit the .prologue note at the scheduled end of the prologue.  */
+
+static void
+sw_64_output_function_end_prologue (FILE *file)
+{
+  if (!flag_inhibit_size_directive)
+    fprintf (file, "\t.prologue %d\n",
+	     sw_64_function_needs_gp || cfun->is_thunk);
+}
+
+/* Write function epilogue.  */
+
+void
+sw_64_expand_epilogue (void)
+{
+  /* Registers to save.  */
+  unsigned HOST_WIDE_INT sa_mask = cfun->machine->frame.sa_mask;
+  /* Stack space needed for pushing registers clobbered by us.  */
+  HOST_WIDE_INT sa_size = cfun->machine->frame.saved_regs_size;
+  /* Complete stack size needed.  */
+  HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
+  /* Offset from base reg to register save area.  */
+  HOST_WIDE_INT reg_offset;
+  int fp_is_frame_pointer, fp_offset;
+  rtx sa_reg, sa_reg_exp = NULL;
+  rtx sp_adj1, sp_adj2, mem, reg, insn;
+  rtx eh_ofs;
+  rtx cfa_restores = NULL_RTX;
+  bool fp_flag = false;
+
+#ifdef SW_64_ENABLE_FULL_ASAN
+    reg_offset = aligned_upper_bound (crtl->outgoing_args_size,
+				      STACK_BOUNDARY / BITS_PER_UNIT);
+#else
+    reg_offset = SW_64_ROUND (crtl->outgoing_args_size);
+#endif
+
+  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
+      && flag_sw_hardware_prefetch)
+    {
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256)));
+      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8));
+      rtx tmp16 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16));
+      rtx tmp17 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24));
+      rtx tmp18 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32));
+      rtx tmp19 = gen_rtx_MEM (Pmode, mem_address);
+      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40));
+      rtx tmp26 = gen_rtx_MEM (Pmode, mem_address);
+
+      emit_move_insn (tmp16, gen_rtx_REG (DImode, 16));
+      emit_move_insn (tmp17, gen_rtx_REG (DImode, 17));
+      emit_move_insn (tmp18, gen_rtx_REG (DImode, 18));
+      emit_move_insn (tmp19, gen_rtx_REG (DImode, 19));
+      emit_move_insn (tmp26, gen_rtx_REG (DImode, 26));
+
+      rtx tmp_clt = gen_rtx_REG (DImode, 7);
+      rtx tmp_cnt = gen_rtx_REG (DImode, 8);
+      rtx op = gen_rtx_REG (DImode, 17);
+
+      unsigned long clt1, clt2, clt3;
+      unsigned long cnt1, cnt2, cnt3;
+      clt1 = 1;
+      clt2 = 0;
+      clt3 = 1;
+      cnt1 = 0;
+      cnt2 = 0;
+      cnt3 = 5;
+      sw_64_emit_set_const (op, DImode, 0x10, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x11, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x12, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x1, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x4, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      sw_64_emit_set_const (op, DImode, 0x8, 3, false);
+      sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false);
+      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
+
+      emit_move_insn (gen_rtx_REG (DImode, 16), tmp16);
+      emit_move_insn (gen_rtx_REG (DImode, 17), tmp17);
+      emit_move_insn (gen_rtx_REG (DImode, 18), tmp18);
+      emit_move_insn (gen_rtx_REG (DImode, 19), tmp19);
+      emit_move_insn (gen_rtx_REG (DImode, 26), tmp26);
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256)));
+    }
+
+  fp_is_frame_pointer = frame_pointer_needed;
+  fp_offset = 0;
+  sa_reg = stack_pointer_rtx;
+
+  if (crtl->calls_eh_return)
+    eh_ofs = EH_RETURN_STACKADJ_RTX;
+  else
+    eh_ofs = NULL_RTX;
+
+  if (sa_size)
+    {
+      /* If we have a frame pointer, restore SP from it.  */
+      if (frame_pointer_needed)
+	{
+	  long adj_size = SW_64_ROUND (crtl->outgoing_args_size);
+	  if (adj_size > 0x8000)
+	    {
+	      int low = ((adj_size & 0xffff) ^ 0x8000) - 0x8000;
+	      HOST_WIDE_INT bias;
+
+	      if (low <= 0x8000)
+		bias = adj_size - low, adj_size = low;
+	      else
+		bias = adj_size, adj_size = 0;
+
+	      rtx sa_reg = stack_pointer_rtx;
+	      rtx sa_reg_exp
+		= plus_constant (Pmode, hard_frame_pointer_rtx, -bias);
+	      emit_move_insn (sa_reg, sa_reg_exp);
+	      if (adj_size != 0)
+		emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
+					  GEN_INT (-adj_size)));
+	    }
+	  else
+	    {
+	      emit_insn (
+		gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
+	      rtx insn
+		= gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
+				 GEN_INT (
+				   -SW_64_ROUND (crtl->outgoing_args_size)));
+	      emit_insn (insn);
+	    }
+	}
+      //	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
+
+      /* Cope with very large offsets to the register save area.  */
+      if (reg_offset + sa_size > 0x8000)
+	{
+	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
+	  HOST_WIDE_INT bias;
+
+	  if (low + sa_size <= 0x8000)
+	    bias = reg_offset - low, reg_offset = low;
+	  else
+	    bias = reg_offset, reg_offset = 0;
+
+	  sa_reg = gen_rtx_REG (Pmode, 22);
+	  sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
+
+	  emit_move_insn (sa_reg, sa_reg_exp);
+	}
+
+      /* Restore registers in order, excepting a true frame pointer.  */
+      for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi (sa_mask))
+	{
+	  if (fp_is_frame_pointer && i != REG_RA && fp_flag == false)
+	    {
+	      emit_insn (gen_blockage ());
+	      mem = gen_frame_mem (DImode,
+				   plus_constant (Pmode, sa_reg, reg_offset));
+	      emit_move_insn (hard_frame_pointer_rtx, mem);
+	      cfa_restores
+		= alloc_reg_note (REG_CFA_RESTORE, hard_frame_pointer_rtx,
+				  cfa_restores);
+	      sa_mask &= ~(1UL << HARD_FRAME_POINTER_REGNUM);
+	      reg_offset += 8;
+	      fp_offset = reg_offset;
+	      fp_flag = true;
+	    }
+	  else
+	    {
+	      mem = gen_frame_mem (Pmode,
+				   plus_constant (Pmode, sa_reg, reg_offset));
+	      reg = gen_rtx_REG (Pmode, i);
+	      emit_move_insn (reg, mem);
+	      cfa_restores
+		= alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
+	      reg_offset += 8;
+	      sa_mask &= ~(HOST_WIDE_INT_1U << i);
+	    }
+	}
+    }
+
+  if (frame_size || eh_ofs)
+    {
+      sp_adj1 = stack_pointer_rtx;
+
+      if (eh_ofs)
+	{
+	  sp_adj1 = gen_rtx_REG (Pmode, 23);
+	  emit_move_insn (sp_adj1,
+			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
+	}
+
+      /* If the stack size is large, begin computation into a temporary
+	 register so as not to interfere with a potential fp restore,
+	 which must be consecutive with an SP restore.  */
+      if (frame_size < 32768 && !cfun->calls_alloca)
+	sp_adj2 = GEN_INT (frame_size);
+      else if (frame_size < 0x40007fffL)
+	{
+	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
+
+	  sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
+	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
+	    sp_adj1 = sa_reg;
+	  else
+	    {
+	      sp_adj1 = gen_rtx_REG (Pmode, 23);
+	      emit_move_insn (sp_adj1, sp_adj2);
+	    }
+	  sp_adj2 = GEN_INT (low);
+	}
+      else
+	{
+	  rtx tmp = gen_rtx_REG (Pmode, 23);
+	  sp_adj2 = sw_64_emit_set_const (tmp, Pmode, frame_size, 3, false);
+	  if (!sp_adj2)
+	    {
+	      /* We can't drop new things to memory this late, afaik,
+		 so build it up by pieces.  */
+	      sp_adj2 = sw_64_emit_set_long_const (tmp, frame_size);
+	      gcc_assert (sp_adj2);
+	    }
+	}
+
+      /* Restore the stack pointer.  */
+      emit_insn (gen_blockage ());
+      if (sp_adj2 == const0_rtx)
+	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
+      else
+	insn = emit_move_insn (stack_pointer_rtx,
+			       gen_rtx_PLUS (Pmode, sp_adj1, sp_adj2));
+      REG_NOTES (insn) = cfa_restores;
+      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    {
+      gcc_assert (cfa_restores == NULL);
+    }
+  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
+      && (TARGET_SW_32ALIGN || TARGET_SW_SIMD))
+    {
+      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (0));
+      rtx tmp7 = gen_rtx_MEM (Pmode, mem_address);
+      emit_move_insn (gen_rtx_REG (DImode, 7), tmp7);
+      rtx const16 = gen_rtx_REG (DImode, 7);
+      emit_insn (
+	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (32)));
+      emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, const16));
+    }
+}
+
+/* Output the rest of the textual info surrounding the epilogue.  */
+
+void
+sw_64_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
+{
+  rtx_insn *insn;
+
+  /* We output a nop after noreturn calls at the very end of the function to
+     ensure that the return address always remains in the caller's code range,
+     as not doing so might confuse unwinding engines.  */
+  insn = get_last_insn ();
+  if (!INSN_P (insn))
+    insn = prev_active_insn (insn);
+  if (insn && CALL_P (insn))
+    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
+
+  /* End the function.  */
+  if (!flag_inhibit_size_directive)
+    {
+      fputs ("\t.end ", file);
+      assemble_name (file, fnname);
+      putc ('\n', file);
+    }
+  inside_function = FALSE;
+}
+
+/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
+
+   In order to avoid the hordes of differences between generated code
+   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
+   lots of code loading up large constants, generate rtl and emit it
+   instead of going straight to text.
+
+   Not sure why this idea hasn't been explored before...  */
+
+static void
+sw_64_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
+			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+			   tree function)
+{
+  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
+  HOST_WIDE_INT hi, lo;
+  rtx this_rtx, funexp;
+  rtx_insn *insn;
+
+  /* We always require a valid GP.  */
+  if (TARGET_SW_M32)
+    emit_insn (gen_prologue_ldgp_32 ());
+  else
+    emit_insn (gen_prologue_ldgp ());
+  emit_note (NOTE_INSN_PROLOGUE_END);
+
+  /* Find the "this" pointer.  If the function returns a structure,
+     the structure return pointer is in $16.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    this_rtx = gen_rtx_REG (Pmode, 17);
+  else
+    this_rtx = gen_rtx_REG (Pmode, 16);
+
+  /* Add DELTA.  When possible we use ldih+ldi.  Otherwise load the
+     entire constant for the add.  */
+  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
+  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+  if (hi + lo == delta)
+    {
+      if (hi)
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
+      if (lo)
+	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
+    }
+  else
+    {
+      rtx tmp = sw_64_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
+  if (vcall_offset)
+    {
+      rtx tmp, tmp2;
+
+      tmp = gen_rtx_REG (Pmode, 0);
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
+
+      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
+      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
+      if (hi + lo == vcall_offset)
+	{
+	  if (hi)
+	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
+	}
+      else
+	{
+	  tmp2
+	    = sw_64_emit_set_long_const (gen_rtx_REG (Pmode, 1), vcall_offset);
+	  emit_insn (gen_adddi3 (tmp, tmp, tmp2));
+	  lo = 0;
+	}
+      if (lo)
+	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
+      else
+	tmp2 = tmp;
+      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
+
+      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
+    }
+
+  /* Generate a tail call to the target function.  */
+  if (!TREE_USED (function))
+    {
+      assemble_external (function);
+      TREE_USED (function) = 1;
+    }
+  funexp = XEXP (DECL_RTL (function), 0);
+  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
+  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
+  SIBLING_CALL_P (insn) = 1;
+
+  /* Run just enough of rest_of_compilation to get the insns emitted.
+     There's not really enough bulk here to make other passes such as
+     instruction scheduling worth while.  */
+  insn = get_insns ();
+  shorten_branches (insn);
+  assemble_start_function (thunk_fndecl, fnname);
+  final_start_function (insn, file, 1);
+  final (insn, file, 1);
+  final_end_function ();
+  assemble_end_function (thunk_fndecl, fnname);
+}
+
+
+/* Debugging support.  */
+
+#include "gstab.h"
+
+/* Name of the file containing the current function.  */
+
+static const char *current_function_file = "";
+
+/* Offsets to sw_64 virtual arg/local debugging pointers.  */
+
+long sw_64_arg_offset;
+long sw_64_auto_offset;
+
+/* Emit a new filename to a stream.  */
+
+void
+sw_64_output_filename (FILE *stream, const char *name)
+{
+  static int first_time = TRUE;
+
+  if (first_time)
+    {
+      first_time = FALSE;
+      ++num_source_filenames;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t ");
+      output_quoted_string (stream, name);
+      fprintf (stream, "\n");
+    }
+
+  else if (name != current_function_file
+	   && strcmp (name, current_function_file) != 0)
+    {
+      ++num_source_filenames;
+      current_function_file = name;
+      fprintf (stream, "\t.file\t ");
+
+      output_quoted_string (stream, name);
+      fprintf (stream, "\n");
+    }
+}
+
+/* Structure to show the current status of registers and memory.  */
+
+struct shadow_summary
+{
+  struct {
+    unsigned int i     : 31;	/* Mask of int regs.  */
+    unsigned int fp    : 31;	/* Mask of fp regs.  */
+    unsigned int mem   :  1;	/* mem == imem | fpmem.  */
+  } used, defd;
+};
+
+/* Summary the effects of expression X on the machine.  Update SUM, a pointer
+   to the summary structure.  SET is nonzero if the insn is setting the
+   object, otherwise zero.  */
+
+static void
+summarize_insn (rtx x, struct shadow_summary *sum, int set)
+{
+  const char *format_ptr;
+  int i, j;
+
+  if (x == 0)
+    return;
+
+  switch (GET_CODE (x))
+    {
+      /* ??? Note that this case would be incorrect if the Sw_64 had a
+	 ZERO_EXTRACT in SET_DEST.  */
+    case SET:
+      summarize_insn (SET_SRC (x), sum, 0);
+      summarize_insn (SET_DEST (x), sum, 1);
+      break;
+
+    case CLOBBER:
+      summarize_insn (XEXP (x, 0), sum, 1);
+      break;
+
+    case USE:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    case ASM_OPERANDS:
+      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
+	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
+      break;
+
+    case PARALLEL:
+      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
+	summarize_insn (XVECEXP (x, 0, i), sum, 0);
+      break;
+
+    case SUBREG:
+      summarize_insn (SUBREG_REG (x), sum, 0);
+      break;
+
+    case REG:
+      {
+	int regno = REGNO (x);
+	unsigned long mask = ((unsigned long) 1) << (regno % 32);
+
+	if (regno == 31 || regno == 63)
+	  break;
+
+	if (set)
+	  {
+	    if (regno < 32)
+	      sum->defd.i |= mask;
+	    else
+	      sum->defd.fp |= mask;
+	  }
+	else
+	  {
+	    if (regno < 32)
+	      sum->used.i |= mask;
+	    else
+	      sum->used.fp |= mask;
+	  }
+      }
+      break;
+
+    case MEM:
+      if (set)
+	sum->defd.mem = 1;
+      else
+	sum->used.mem = 1;
+
+      /* Find the regs used in memory address computation: */
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    case CONST_INT:
+    case CONST_WIDE_INT:
+    case CONST_DOUBLE:
+    case SYMBOL_REF:
+    case LABEL_REF:
+    case CONST:
+    case SCRATCH:
+    case ASM_INPUT:
+      break;
+
+      /* Handle common unary and binary ops for efficiency.  */
+    case COMPARE:
+    case PLUS:
+    case MINUS:
+    case MULT:
+    case DIV:
+    case MOD:
+    case UDIV:
+    case UMOD:
+    case AND:
+    case IOR:
+    case XOR:
+    case ASHIFT:
+    case ROTATE:
+    case ASHIFTRT:
+    case LSHIFTRT:
+    case ROTATERT:
+    case SMIN:
+    case SMAX:
+    case UMIN:
+    case UMAX:
+    case NE:
+    case EQ:
+    case GE:
+    case GT:
+    case LE:
+    case LT:
+    case GEU:
+    case GTU:
+    case LEU:
+    case LTU:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      summarize_insn (XEXP (x, 1), sum, 0);
+      break;
+
+    case NEG:
+    case NOT:
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+    case TRUNCATE:
+    case FLOAT_EXTEND:
+    case FLOAT_TRUNCATE:
+    case FLOAT:
+    case FIX:
+    case UNSIGNED_FLOAT:
+    case UNSIGNED_FIX:
+    case ABS:
+    case SQRT:
+    case FFS:
+      summarize_insn (XEXP (x, 0), sum, 0);
+      break;
+
+    default:
+      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
+      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+	switch (format_ptr[i])
+	  {
+	  case 'e':
+	    summarize_insn (XEXP (x, i), sum, 0);
+	    break;
+
+	  case 'E':
+	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	      summarize_insn (XVECEXP (x, i, j), sum, 0);
+	    break;
+
+	  case 'i':
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+    }
+}
+
+/* Ensure a sufficient number of `memb' insns are in the code when
+   the user requests code with a trap precision of functions or
+   instructions.
+
+   In naive mode, when the user requests a trap-precision of
+   "instruction", a memb is needed after every instruction that may
+   generate a trap.  This ensures that the code is resumption safe but
+   it is also slow.
+
+   When optimizations are turned on, we delay issuing a memb as long
+   as possible.  In this context, a trap shadow is the sequence of
+   instructions that starts with a (potentially) trap generating
+   instruction and extends to the next memb.  We can delay (and
+   therefore sometimes omit) a memb subject to the following
+   conditions:
+
+   (a) On entry to the trap shadow, if any Sw_64 register or memory
+   location contains a value that is used as an operand value by some
+   instruction in the trap shadow (live on entry), then no instruction
+   in the trap shadow may modify the register or memory location.
+
+   (b) Within the trap shadow, the computation of the base register
+   for a memory load or store instruction may not involve using the
+   result of an instruction that might generate an UNPREDICTABLE
+   result.
+
+   (c) Within the trap shadow, no register may be used more than once
+   as a destination register.  (This is to make life easier for the
+   trap-handler.)
+
+   (d) The trap shadow may not include any branch instructions.  */
+
+static void
+sw_64_handle_trap_shadows (void)
+{
+  struct shadow_summary shadow;
+  int trap_pending, exception_nesting;
+  rtx_insn *i, *n;
+
+  trap_pending = 0;
+  exception_nesting = 0;
+  shadow.used.i = 0;
+  shadow.used.fp = 0;
+  shadow.used.mem = 0;
+  shadow.defd = shadow.used;
+
+  for (i = get_insns (); i; i = NEXT_INSN (i))
+    {
+      if (NOTE_P (i))
+	{
+	  switch (NOTE_KIND (i))
+	    {
+	    case NOTE_INSN_EH_REGION_BEG:
+	      exception_nesting++;
+	      if (trap_pending)
+		goto close_shadow;
+	      break;
+
+	    case NOTE_INSN_EH_REGION_END:
+	      exception_nesting--;
+	      if (trap_pending)
+		goto close_shadow;
+	      break;
+
+	    case NOTE_INSN_EPILOGUE_BEG:
+	      if (trap_pending && sw_64_tp >= SW_64_TP_FUNC)
+		goto close_shadow;
+	      break;
+	    }
+	}
+      else if (trap_pending)
+	{
+	  if (sw_64_tp == SW_64_TP_FUNC)
+	    {
+	      if (JUMP_P (i) && GET_CODE (PATTERN (i)) == RETURN)
+		goto close_shadow;
+	    }
+	  else if (sw_64_tp == SW_64_TP_INSN)
+	    {
+	      if (optimize > 0)
+		{
+		  struct shadow_summary sum;
+
+		  sum.used.i = 0;
+		  sum.used.fp = 0;
+		  sum.used.mem = 0;
+		  sum.defd = sum.used;
+
+		  switch (GET_CODE (i))
+		    {
+		    case INSN:
+		      /* Annoyingly, get_attr_trap will die on these.  */
+		      if (GET_CODE (PATTERN (i)) == USE
+			  || GET_CODE (PATTERN (i)) == CLOBBER)
+			break;
+
+		      summarize_insn (PATTERN (i), &sum, 0);
+
+		      if ((sum.defd.i & shadow.defd.i)
+			  || (sum.defd.fp & shadow.defd.fp))
+			{
+			  /* (c) would be violated.  */
+			  goto close_shadow;
+			}
+
+		      /* Combine shadow with summary of current insn: */
+		      shadow.used.i |= sum.used.i;
+		      shadow.used.fp |= sum.used.fp;
+		      shadow.used.mem |= sum.used.mem;
+		      shadow.defd.i |= sum.defd.i;
+		      shadow.defd.fp |= sum.defd.fp;
+		      shadow.defd.mem |= sum.defd.mem;
+
+		      if ((sum.defd.i & shadow.used.i)
+			  || (sum.defd.fp & shadow.used.fp)
+			  || (sum.defd.mem & shadow.used.mem))
+			{
+			  /* (a) would be violated (also takes care of (b))  */
+			  gcc_assert (get_attr_trap (i) != TRAP_YES
+				      || (!(sum.defd.i & sum.used.i)
+					  && !(sum.defd.fp & sum.used.fp)));
+
+			  goto close_shadow;
+			}
+		      break;
+
+		    case BARRIER:
+		      /* __builtin_unreachable can expand to no code at all,
+			 leaving (barrier) RTXes in the instruction stream.  */
+		      goto close_shadow_notrapb;
+
+		    case JUMP_INSN:
+		    case CALL_INSN:
+		    case CODE_LABEL:
+		      goto close_shadow;
+
+		    case DEBUG_INSN:
+		      break;
+
+		    default:
+		      gcc_unreachable ();
+		    }
+		}
+	      else
+		{
+		close_shadow:
+		  n = emit_insn_before (gen_trapb (), i);
+		  PUT_MODE (n, TImode);
+		  PUT_MODE (i, TImode);
+		close_shadow_notrapb:
+		  trap_pending = 0;
+		  shadow.used.i = 0;
+		  shadow.used.fp = 0;
+		  shadow.used.mem = 0;
+		  shadow.defd = shadow.used;
+		}
+	    }
+	}
+
+      if ((exception_nesting > 0 || sw_64_tp >= SW_64_TP_FUNC)
+	  && NONJUMP_INSN_P (i) && GET_CODE (PATTERN (i)) != USE
+	  && GET_CODE (PATTERN (i)) != CLOBBER && get_attr_trap (i) == TRAP_YES)
+	{
+	  if (optimize && !trap_pending)
+	    summarize_insn (PATTERN (i), &shadow, 0);
+	  trap_pending = 1;
+	}
+    }
+}
+
+/* Sw_64 can only issue instruction groups simultaneously if they are
+   suitably aligned.  This is very processor-specific.  */
+
+/* The instruction group alignment main loop.  */
+
+static void
+sw_64_align_insns_1 (unsigned int max_align,
+		     rtx_insn *(*next_group) (rtx_insn *, int *, int *),
+		     rtx (*next_nop) (int *))
+{
+  /* ALIGN is the known alignment for the insn group.  */
+  unsigned int align;
+  /* OFS is the offset of the current insn in the insn group.  */
+  int ofs;
+  int prev_in_use, in_use, len, ldgp;
+  rtx_insn *i, *next;
+
+  /* Let shorten branches care for assigning alignments to code labels.  */
+  shorten_branches (get_insns ());
+
+  unsigned int option_alignment = align_functions.levels[0].get_value ();
+  if (option_alignment < 4)
+    align = 4;
+  else if ((unsigned int) option_alignment < max_align)
+    align = option_alignment;
+  else
+    align = max_align;
+
+  ofs = prev_in_use = 0;
+  i = get_insns ();
+  if (NOTE_P (i))
+    i = next_nonnote_insn (i);
+
+  ldgp = sw_64_function_needs_gp ? 8 : 0;
+
+  while (i)
+    {
+      next = (*next_group) (i, &in_use, &len);
+
+      /* When we see a label, resync alignment etc.  */
+      if (LABEL_P (i))
+	{
+	  unsigned int new_align
+	    = label_to_alignment (i).levels[0].get_value ();
+	  if (new_align >= align)
+	    {
+	      align = new_align < max_align ? new_align : max_align;
+	      ofs = 0;
+	    }
+
+	  else if (ofs & (new_align - 1))
+	    ofs = (ofs | (new_align - 1)) + 1;
+	  gcc_assert (!len);
+	}
+
+      /* Handle complex instructions special.  */
+      else if (in_use == 0)
+	{
+	  /* Asms will have length < 0.  This is a signal that we have
+	     lost alignment knowledge.  Assume, however, that the asm
+	     will not mis-align instructions.  */
+	  if (len < 0)
+	    {
+	      ofs = 0;
+	      align = 4;
+	      len = 0;
+	    }
+	}
+
+      /* If the known alignment is smaller than the recognized insn group,
+	 realign the output.  */
+      else if ((int) align < len)
+	{
+	  unsigned int new_log_align = len > 8 ? 4 : 3;
+	  rtx_insn *prev, *where;
+
+	  where = prev = prev_nonnote_insn (i);
+	  if (!where || !LABEL_P (where))
+	    where = i;
+
+	  /* Can't realign between a call and its gp reload.  */
+	  if (!(TARGET_EXPLICIT_RELOCS && prev && CALL_P (prev)))
+	    {
+	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
+	      align = 1 << new_log_align;
+	      ofs = 0;
+	    }
+	}
+
+      /* We may not insert padding inside the initial ldgp sequence.  */
+      else if (ldgp > 0)
+	ldgp -= len;
+
+      /* If the group won't fit in the same INT16 as the previous,
+	 we need to add padding to keep the group together.  Rather
+	 than simply leaving the insn filling to the assembler, we
+	 can make use of the knowledge of what sorts of instructions
+	 were issued in the previous group to make sure that all of
+	 the added nops are really free.  */
+      else if (ofs + len > (int) align)
+	{
+	  int nop_count = (align - ofs) / 4;
+	  rtx_insn *where;
+
+	  /* Insert nops before labels, branches, and calls to truly merge
+	     the execution of the nops with the previous instruction group.  */
+	  where = prev_nonnote_insn (i);
+	  if (where)
+	    {
+	      if (LABEL_P (where))
+		{
+		  rtx_insn *where2 = prev_nonnote_insn (where);
+		  if (where2 && JUMP_P (where2))
+		    where = where2;
+		}
+	      else if (NONJUMP_INSN_P (where))
+		where = i;
+	    }
+	  else
+	    where = i;
+
+	  do
+	    emit_insn_before ((*next_nop) (&prev_in_use), where);
+	  while (--nop_count);
+	  ofs = 0;
+	}
+
+      ofs = (ofs + len) & (align - 1);
+      prev_in_use = in_use;
+      i = next;
+    }
+}
+
+static void
+sw_64_align_insns (void)
+{
+  gcc_unreachable ();
+}
+
+/* Insert an unop between sibcall or noreturn function call and GP load.  */
+
+static void
+sw_64_pad_function_end (void)
+{
+  rtx_insn *insn, *next;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (!CALL_P (insn)
+	  || !(SIBLING_CALL_P (insn)
+	       || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
+	continue;
+
+      next = next_active_insn (insn);
+      if (next)
+	{
+	  rtx pat = PATTERN (next);
+
+	  if (GET_CODE (pat) == SET
+	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
+	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
+	    emit_insn_after (gen_unop (), insn);
+	}
+    }
+}
+
+/* Machine dependent reorg pass.  */
+
+static void
+sw_64_reorg (void)
+{
+  /* Workaround for a linker error that triggers when an exception
+     handler immediatelly follows a sibcall or a noreturn function.
+
+In the sibcall case:
+
+     The instruction stream from an object file:
+
+ 1d8:   00 00 fb 6b     jmp     (t12)
+ 1dc:   00 00 ba 27     ldih    gp,0(ra)
+ 1e0:   00 00 bd 23     ldi     gp,0(gp)
+ 1e4:   00 00 7d a7     ldl     t12,0(gp)
+ 1e8:   00 40 5b 6b     call     ra,(t12),1ec <__funcZ+0x1ec>
+
+     was converted in the final link pass to:
+
+   12003aa88:   67 fa ff c3     br      120039428 <...>
+   12003aa8c:   00 00 fe 2f     unop
+   12003aa90:   00 00 fe 2f     unop
+   12003aa94:   48 83 7d a7     ldl     t12,-31928(gp)
+   12003aa98:   00 40 5b 6b     call     ra,(t12),12003aa9c <__func+0x1ec>
+
+And in the noreturn case:
+
+     The instruction stream from an object file:
+
+  54:   00 40 5b 6b     call     ra,(t12),58 <__func+0x58>
+  58:   00 00 ba 27     ldih    gp,0(ra)
+  5c:   00 00 bd 23     ldi     gp,0(gp)
+  60:   00 00 7d a7     ldl     t12,0(gp)
+  64:   00 40 5b 6b     call     ra,(t12),68 <__func+0x68>
+
+     was converted in the final link pass to:
+
+   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
+   fdb28:       00 00 fe 2f     unop
+   fdb2c:       00 00 fe 2f     unop
+   fdb30:       30 82 7d a7     ldl     t12,-32208(gp)
+   fdb34:       00 40 5b 6b     call     ra,(t12),fdb38 <__func+0x68>
+
+     GP load instructions were wrongly cleared by the linker relaxation
+     pass.  This workaround prevents removal of GP loads by inserting
+     an unop instruction between a sibcall or noreturn function call and
+     exception handler prologue.  */
+
+  if (current_function_has_exception_handlers ())
+    sw_64_pad_function_end ();
+}
+
+static void
+sw_64_file_start (void)
+{
+  default_file_start ();
+
+  fputs ("\t.set noreorder\n", asm_out_file);
+  fputs ("\t.set volatile\n", asm_out_file);
+    fputs ("\t.set noat\n", asm_out_file);
+  if (TARGET_EXPLICIT_RELOCS)
+    fputs ("\t.set nomacro\n", asm_out_file);
+  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX
+      | TARGET_SW6A | TARGET_SW6B | TARGET_SW8A)
+    {
+      const char *arch;
+
+      if (sw_64_cpu == PROCESSOR_SW6 || PROCESSOR_SW8 || TARGET_FIX
+	  || TARGET_CIX)
+	{
+	  if (TARGET_SW6A)
+	    arch = "sw6a";
+	  else if (TARGET_SW6B)
+	    arch = "sw6b";
+	  else if (TARGET_SW8A)
+	    arch = "sw8a";
+	  else
+	    arch = "sw6b";
+	}
+      else
+	arch = "sw6b";
+
+      fprintf (asm_out_file, "\t.arch %s\n", arch);
+    }
+}
+
+/* Since we don't have a .dynbss section, we should not allow global
+   relocations in the .rodata section.  */
+
+static int
+sw_64_elf_reloc_rw_mask (void)
+{
+  return flag_pic ? 3 : 2;
+}
+
+/* Return a section for X.  The only special thing we do here is to
+   honor small data.  */
+
+static section *
+sw_64_elf_select_rtx_section (machine_mode mode, rtx x,
+			      unsigned HOST_WIDE_INT align)
+{
+  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
+    /* ??? Consider using mergeable sdata sections.  */
+    return sdata_section;
+  else
+    return default_elf_select_rtx_section (mode, x, align);
+}
+
+static unsigned int
+sw_64_elf_section_type_flags (tree decl, const char *name, int reloc)
+{
+  unsigned int flags = 0;
+
+  if (strcmp (name, ".sdata") == 0 || strncmp (name, ".sdata.", 7) == 0
+      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
+      || strcmp (name, ".sbss") == 0 || strncmp (name, ".sbss.", 6) == 0
+      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
+    flags = SECTION_SMALL;
+
+  flags |= default_section_type_flags (decl, name, reloc);
+  return flags;
+}
+
+/* Structure to collect function names for final output in link section.  */
+/* Note that items marked with GTY can't be ifdef'ed out.  */
+
+enum reloc_kind
+{
+  KIND_LINKAGE,
+  KIND_CODEADDR
+};
+
+struct GTY (()) sw_64_links
+{
+  rtx func;
+  rtx linkage;
+  enum reloc_kind rkind;
+};
+
+rtx
+sw_64_use_linkage (rtx func ATTRIBUTE_UNUSED, bool lflag ATTRIBUTE_UNUSED,
+		   bool rflag ATTRIBUTE_UNUSED)
+{
+  return NULL_RTX;
+}
+
+static void
+sw_64_init_libfuncs (void)
+{
+#ifdef MEM_LIBFUNCS_INIT
+      MEM_LIBFUNCS_INIT;
+#endif
+}
+
+/* On the Sw_64, we use this to disable the floating-point registers
+   when they don't exist.  */
+
+static void
+sw_64_conditional_register_usage (void)
+{
+  int i;
+  if (!TARGET_FPREGS)
+    for (i = 32; i < 63; i++)
+      fixed_regs[i] = call_used_regs[i] = 1;
+}
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+
+static void
+sw_64_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			       bool op0_preserve_value)
+{
+  if (!op0_preserve_value
+      && (*code == GE || *code == GT || *code == GEU || *code == GTU)
+      && (REG_P (*op1) || *op1 == const0_rtx))
+    {
+      std::swap (*op0, *op1);
+      *code = (int) swap_condition ((enum rtx_code) * code);
+    }
+
+  if ((*code == LT || *code == LTU) && CONST_INT_P (*op1)
+      && INTVAL (*op1) == 256)
+    {
+      *code = *code == LT ? LE : LEU;
+      *op1 = GEN_INT (255);
+    }
+}
+
+/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
+
+static void
+sw_64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
+{
+  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
+
+  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
+  tree new_fenv_var, reload_fenv, restore_fnenv;
+  tree update_call, atomic_feraiseexcept, hold_fnclex;
+
+  /* Generate the equivalent of :
+       unsigned long fenv_var;
+       fenv_var = __ieee_get_fp_control ();
+
+       unsigned long masked_fenv;
+       masked_fenv = fenv_var & mask;
+
+       __ieee_set_fp_control (masked_fenv);  */
+
+  fenv_var = create_tmp_var_raw (long_unsigned_type_node);
+  get_fpscr
+    = build_fn_decl ("__ieee_get_fp_control",
+		     build_function_type_list (long_unsigned_type_node, NULL));
+  set_fpscr = build_fn_decl ("__ieee_set_fp_control",
+			     build_function_type_list (void_type_node, NULL));
+  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
+  ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var,
+		    build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
+  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
+  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
+  *hold = build2 (COMPOUND_EXPR, void_type_node,
+		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
+		  hold_fnclex);
+
+  /* Store the value of masked_fenv to clear the exceptions:
+     __ieee_set_fp_control (masked_fenv);  */
+
+  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
+
+  /* Generate the equivalent of :
+       unsigned long new_fenv_var;
+       new_fenv_var = __ieee_get_fp_control ();
+
+       __ieee_set_fp_control (fenv_var);
+
+       __atomic_feraiseexcept (new_fenv_var);  */
+
+  new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
+  reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var,
+			build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
+  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
+  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
+  update_call
+    = build_call_expr (atomic_feraiseexcept, 1,
+		       fold_convert (integer_type_node, new_fenv_var));
+  *update = build2 (COMPOUND_EXPR, void_type_node,
+		    build2 (COMPOUND_EXPR, void_type_node, reload_fenv,
+			    restore_fnenv),
+		    update_call);
+}
+
+/* Implement TARGET_HARD_REGNO_MODE_OK.  On Sw_64, the integer registers
+   can hold any mode.  The floating-point registers can hold 64-bit
+   integers as well, but not smaller values.  */
+
+static bool
+sw_64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+{
+  if (IN_RANGE (regno, 32, 62))
+    return (mode == SFmode || mode == DFmode || mode == DImode || mode == SCmode
+	    || mode == DCmode);
+  return true;
+}
+
+/* Implement TARGET_MODES_TIEABLE_P.  This asymmetric test is true when
+   MODE1 could be put in an FP register but MODE2 could not.  */
+
+static bool
+sw_64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
+{
+  return (sw_64_hard_regno_mode_ok (32, mode1)
+	    ? sw_64_hard_regno_mode_ok (32, mode2)
+	    : true);
+}
+
+/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
+
+/*************************************************
+ *
+ *	float fast_inverse_sqrt (float x)
+ *	{
+ *	    float xhalf = 0.5f * x;
+ *	    int i = *(int *)&x ;
+ *	    i = 0x5f3759df - (i >> 1);
+ *	    x = *(float *)&i;
+ *	    x = x *(1.5f - xhalf * x *x);
+ *	    x = x *(1.5f - xhalf * x *x); // SPEC2006 435 need this
+ *	    return x;
+ *	}
+ *
+ ***************************************************/
+
+/* Load up a constant. all of the vector elements.  */
+static rtx
+sw_64_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
+{
+  rtx reg;
+
+  if (mode == SFmode || mode == DFmode)
+    {
+      rtx d = const_double_from_real_value (dconst, mode);
+      reg = force_reg (mode, d);
+    }
+  else
+    gcc_unreachable ();
+
+  return reg;
+}
+
+void
+sw_64_emit_rsqrt (rtx dst, rtx x, bool note_p)
+{
+  machine_mode mode = GET_MODE (dst);
+  rtx one, xhalf, mhalf, i, magical, x0, x1, x2;
+
+  enum insn_code code = optab_handler (smul_optab, mode);
+  insn_gen_fn gen_mul = GEN_FCN (code);
+  gcc_assert (code != CODE_FOR_nothing);
+
+  enum insn_code code1 = optab_handler (sub_optab, SImode);
+  insn_gen_fn gen_sub = GEN_FCN (code1);
+  gcc_assert (code1 != CODE_FOR_nothing);
+
+  enum insn_code code2 = optab_handler (fnma_optab, mode);
+  insn_gen_fn gen_fnma = GEN_FCN (code2);
+  gcc_assert (code2 != CODE_FOR_nothing);
+
+  enum insn_code code3 = optab_handler (add_optab, mode);
+  insn_gen_fn gen_add = GEN_FCN (code3);
+  gcc_assert (code3 != CODE_FOR_nothing);
+
+  one = sw_64_load_constant_and_splat (mode, dconst1);
+  mhalf = sw_64_load_constant_and_splat (mode, dconsthalf);
+
+  /* xhalf = 0.5f * x.  */
+  xhalf = gen_reg_rtx (mode);
+  emit_insn (gen_mul (xhalf, mhalf, x));
+
+  if (x == CONST0_RTX (mode))
+    gcc_unreachable ();
+
+  /* int i = *(int *)&x.  */
+  rtx vreg = gen_rtx_REG (SFmode, 28);
+
+  emit_insn (
+    gen_rtx_SET (vreg, gen_rtx_UNSPEC (mode, gen_rtvec (1, x), UNSPEC_FIMOVS)));
+
+  /* i = i >> 1.  */
+  i = gen_reg_rtx (DImode);
+  rtx subreg = gen_rtx_SUBREG (SImode, vreg, 0);
+  emit_insn (gen_extendsidi2 (i, subreg));
+  emit_insn (gen_ashrdi3 (i, i, const1_rtx));
+
+  /* magical number: 0x5f3759df.  */
+  magical = gen_reg_rtx (SImode);
+  emit_insn (gen_rtx_SET (magical, GEN_INT (0x5f370000)));
+  emit_insn (
+    gen_rtx_SET (magical, gen_rtx_PLUS (SImode, magical, GEN_INT (0x59df))));
+
+  /* x0 = 0x5f3759df - i.  */
+  subreg = gen_rtx_SUBREG (SImode, i, 0);
+  x0 = gen_reg_rtx (SImode);
+  emit_insn (gen_sub (x0, magical, subreg));
+
+  /* x = *(float *)&x0.  */
+  x = gen_rtx_REG (mode, 60);
+  x0 = gen_rtx_SUBREG (SFmode, x0, 0);
+  emit_insn (gen_rtx_SET (x, x0));
+
+  /* x= x *(1.5f - xhalf * x *x) */
+  rtx number = gen_reg_rtx (mode);
+  emit_insn (gen_add (number, one, mhalf));
+
+  x1 = gen_reg_rtx (mode);
+  emit_insn (gen_mul (x1, x, x));
+  emit_insn (gen_fnma (x1, x1, xhalf, number));
+  emit_insn (gen_mul (x1, x1, x));
+
+  /* second iteration, SPEC2006 435 need this.  */
+  x2 = gen_reg_rtx (mode);
+  emit_insn (gen_mul (x2, x1, x1));
+  emit_insn (gen_fnma (x2, x2, xhalf, number));
+  emit_insn (gen_mul (dst, x2, x1));
+}
+
+rtx
+gen_move_reg (rtx x)
+{
+  rtx temp = gen_reg_rtx (GET_MODE (x));
+  emit_move_insn (temp, x);
+  return temp;
+}
+
+/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
+ *  *    add a reg_note saying that this was a division.  Support both scalar
+ * and
+ *   *       vector divide.  Assumes no trapping math and finite arguments.  */
+void
+sw_64_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
+{
+  machine_mode mode = GET_MODE (dst);
+  rtx one, x0, e0, x1, x2, xprev, eprev, xnext, enext, u, v;
+  int i;
+
+  int passes = flag_sw_recip_precision ? 2 : 1;
+  if (mode == DFmode)
+    passes += 2;
+
+  enum insn_code code = optab_handler (smul_optab, mode);
+  insn_gen_fn gen_mul = GEN_FCN (code);
+  gcc_assert (code != CODE_FOR_nothing);
+
+  enum insn_code code1 = optab_handler (fma_optab, mode);
+  insn_gen_fn gen_fma = GEN_FCN (code1);
+  gcc_assert (code1 != CODE_FOR_nothing);
+
+  enum insn_code code2 = optab_handler (fnma_optab, mode);
+  insn_gen_fn gen_fnma = GEN_FCN (code2);
+  gcc_assert (code2 != CODE_FOR_nothing);
+
+  one = sw_64_load_constant_and_splat (mode, dconst1);
+
+  /* x0 = 1./d estimate */
+
+  x0 = gen_reg_rtx (mode);
+  emit_insn (
+    gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), UNSPEC_FRECX)));
+
+  /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
+  if (passes > 1)
+    {
+      /* e0 = 1. - d * x0  */
+      e0 = gen_reg_rtx (mode);
+      emit_insn (gen_fnma (e0, d, x0, one));
+
+      /* x1 = x0 + e0 * x0  */
+      x1 = gen_reg_rtx (mode);
+      emit_insn (gen_fma (x1, x0, e0, x0));
+
+      for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
+	   ++i, xprev = xnext, eprev = enext)
+	{
+	  /* enext = eprev * eprev  */
+	  enext = gen_reg_rtx (mode);
+	  emit_insn (gen_mul (enext, eprev, eprev));
+
+	  /* xnext = xprev + enext * xprev  */
+	  xnext = gen_reg_rtx (mode);
+	  emit_insn (gen_fma (xnext, xprev, enext, xprev));
+	}
+    }
+  else
+    xprev = x0;
+
+  /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
+  /* u = n * xprev  */
+  u = gen_reg_rtx (mode);
+  emit_insn (gen_mul (u, n, xprev));
+
+  /* v = n - (d * u)  */
+  v = gen_reg_rtx (mode);
+  emit_insn (gen_fnma (v, d, u, n));
+
+  /* dst = (v * xprev) + u  */
+  emit_insn (gen_fma (dst, v, xprev, u));
+}
+
+int
+enable_asan_check_stack ()
+{
+  return asan_sanitize_stack_p ();
+}
+
+static bool
+sw_64_can_change_mode_class (machine_mode from, machine_mode to,
+			     reg_class_t rclass)
+{
+  return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
+	  || !reg_classes_intersect_p (FLOAT_REGS, rclass));
+}
+bool
+sw_64_slow_unaligned_access (machine_mode mode, unsigned int align)
+{
+  return (flag_sw_unalign_byte != 1 || TARGET_SW8A == 0);
+}
+
+static bool
+sw_64_macro_fusion_p ()
+{
+  return (flag_sw_branch_fusion == 1);
+}
+
+static bool
+sw_64_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
+{
+  rtx src, dest;
+  enum rtx_code ccode;
+  rtx compare_set = NULL_RTX, test_if, cond;
+  rtx alu_set = NULL_RTX, addr = NULL_RTX;
+  if (get_attr_type (condjmp) != TYPE_IBR)
+    return false;
+  if (get_attr_type (condgen) != TYPE_ICMP)
+    return false;
+  compare_set = single_set (condgen);
+  if (compare_set == NULL_RTX)
+    {
+      int i;
+      rtx pat = PATTERN (condgen);
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
+	  {
+	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
+	    alu_set = XVECEXP (pat, 0, i);
+	  }
+    }
+  if (compare_set == NULL_RTX)
+    return false;
+  src = SET_SRC (compare_set);
+  if (GET_CODE (src) == UNSPEC)
+    return false;
+  test_if = SET_SRC (pc_set (condjmp));
+  cond = XEXP (test_if, 0);
+  ccode = GET_CODE (cond);
+  return true;
+}
+
+/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
+static unsigned HOST_WIDE_INT
+sw_64_asan_shadow_offset (void)
+{
+  return (HOST_WIDE_INT_1 << 49);
+}
+
+static void
+sw_64_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
+{
+  unsigned long imask = 0;
+  unsigned long fmask = 0;
+  unsigned int i;
+
+  /* When outputting a thunk, we don't have valid register life info,
+     but assemble_start_function wants to output .frame and .mask
+     directives.  */
+  if (cfun->is_thunk)
+    {
+      *imaskP = 0;
+      *fmaskP = 0;
+      return;
+    }
+
+#ifdef SW_64_ENABLE_FULL_ASAN
+  if (frame_pointer_needed)
+    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
+#endif
+
+  /* One for every register we have to save.  */
+  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+    if (!fixed_regs[i] && !call_used_regs[i] && df_regs_ever_live_p (i)
+	&& i != REG_RA)
+      {
+	if (i < 32)
+	  imask |= (1UL << i);
+	else
+	  fmask |= (1UL << (i - 32));
+      }
+
+  /* We need to restore these for the handler.  */
+  if (crtl->calls_eh_return)
+    {
+      for (i = 0;; ++i)
+	{
+	  unsigned regno = EH_RETURN_DATA_REGNO (i);
+	  if (regno == INVALID_REGNUM)
+	    break;
+	  imask |= 1UL << regno;
+	}
+    }
+
+  /* If any register spilled, then spill the return address also.  */
+  /* ??? This is required by the Digital stack unwind specification
+     and isn't needed if we're doing Dwarf2 unwinding.  */
+  if (imask || fmask || sw_64_ra_ever_killed ())
+    imask |= (1UL << REG_RA);
+
+  *imaskP = imask;
+  *fmaskP = fmask;
+}
+
+int
+sw_64_sa_size (void)
+{
+  unsigned long mask[2];
+  int sa_size = 0;
+  int i, j;
+
+  sw_64_sa_mask (&mask[0], &mask[1]);
+
+  for (j = 0; j < 2; ++j)
+    for (i = 0; i < 32; ++i)
+      if ((mask[j] >> i) & 1)
+	sa_size++;
+
+      /* Our size must be even (multiple of 16 bytes).  */
+      if (sa_size & 1)
+	sa_size++;
+  return sa_size * 8;
+}
+
+#if 1
+/* Sw64 stack frames generated by this compiler look like:
+
+  +-------------------------------+
+  |				  |
+  |  incoming stack arguments     |
+  |				  |
+  +-------------------------------+
+  |				  | <-- incoming stack pointer (aligned)
+  |  callee-allocated save area   |
+  |  for register varargs	  |
+  |				  |
+  +-------------------------------+
+  |  local variables		  | <-- frame_pointer_rtx
+  |				  |
+  +-------------------------------+
+  |  padding			  |
+  +-------------------------------+
+  |  callee-saved registers       |  frame.saved_regs_size
+  +-------------------------------+
+  |  FP'			  |
+  +-------------------------------+
+  |  RA'			  |
+  +-------------------------------+ <- hard_frame_pointer_rtx (aligned)
+  |  padding			  |
+  +-------------------------------+
+  |  outgoing stack arguments     | <-- arg_pointer
+  |				  |
+  +-------------------------------+
+  |				  | <-- stack_pointer_rtx (aligned)
+
+   The following registers are reserved during frame layout and should not be
+   used for any other purpose:
+
+	 TODO: add other register purpose
+   - r26(RA), r15(FP): Used by standard frame layout.
+
+   These registers must be avoided in frame layout related code unless the
+   explicit intention is to interact with one of the features listed above.  */
+
+static void
+sw_64_layout_frame (void)
+{
+  poly_int64 offset = 0;
+
+  cfun->machine->frame.emit_frame_pointer
+    = frame_pointer_needed || crtl->calls_eh_return;
+
+  unsigned HOST_WIDE_INT sa_mask = 0;
+  int sa_size;
+
+  /* When outputting a thunk, we don't have valid register life info,
+     but assemble_start_function wants to output .frame and .mask
+     directives.  */
+  if (!cfun->is_thunk)
+    {
+      /* One for every register we have to save.  */
+      for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
+	if (!call_used_or_fixed_reg_p (i) && df_regs_ever_live_p (i)
+	    && i != REG_RA)
+	  sa_mask |= HOST_WIDE_INT_1U << i;
+
+      /* We need to restore these for the handler.  */
+      if (crtl->calls_eh_return)
+	{
+	  for (unsigned i = 0;; ++i)
+	    {
+	      unsigned regno = EH_RETURN_DATA_REGNO (i);
+	      if (regno == INVALID_REGNUM)
+		break;
+	      sa_mask |= HOST_WIDE_INT_1U << regno;
+	    }
+	}
+      /* If any register spilled, then spill the return address also.  */
+      /* ??? This is required by the Digital stack unwind specification
+	 and isn't needed if we're doing Dwarf2 unwinding.  */
+      if (sa_mask || sw_64_ra_ever_killed ())
+	sa_mask |= HOST_WIDE_INT_1U << REG_RA;
+    }
+  sa_size = popcount_hwi (sa_mask);
+  poly_int64 frame_size = get_frame_size ();
+
+  /* Our size must be even (multiple of 16 bytes).  */
+  if (sa_size & 1)
+    sa_size++;
+  sa_size *= 8;
+
+  poly_int64 varargs_and_saved_regs_size
+    = sa_size + cfun->machine->frame.saved_varargs_size
+      + crtl->args.pretend_args_size;
+
+  poly_int64 varargs_size
+    = cfun->machine->frame.saved_varargs_size + crtl->args.pretend_args_size;
+
+  HOST_WIDE_INT extra_alignment
+    = SW_64_ROUND (frame_size + cfun->machine->frame.saved_varargs_size)
+      - cfun->machine->frame.saved_varargs_size;
+
+  poly_int64 outgoing_args = SW_64_ROUND (crtl->outgoing_args_size);
+
+  cfun->machine->frame.local_offset
+    = cfun->machine->frame.saved_varargs_size + crtl->args.pretend_args_size;
+
+  poly_int64 total_size
+    = aligned_upper_bound (varargs_and_saved_regs_size + frame_size,
+			   STACK_BOUNDARY / BITS_PER_UNIT)
+      + outgoing_args;
+
+  cfun->machine->frame.hard_frame_pointer_offset
+    = aligned_upper_bound (varargs_and_saved_regs_size + frame_size,
+			   STACK_BOUNDARY / BITS_PER_UNIT);
+
+  // TODO: does sw64 need this feild?
+  cfun->machine->frame.callee_offset
+    = cfun->machine->frame.hard_frame_pointer_offset;
+
+  cfun->machine->frame.arg_pointer_offset = total_size - varargs_size;
+
+  cfun->machine->frame.sa_mask = sa_mask;
+  cfun->machine->frame.saved_regs_size = sa_size;
+  cfun->machine->frame.frame_size = total_size;
+}
+#endif
+
+/* Define the offset between two registers, one to be eliminated,
+   and the other its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+sw_64_initial_elimination_offset (unsigned int from,
+				  unsigned int to ATTRIBUTE_UNUSED)
+{
+  HOST_WIDE_INT ret;
+#ifdef SW_64_ENABLE_FULL_ASAN
+  if (to == HARD_FRAME_POINTER_REGNUM)
+    {
+      if (from == ARG_POINTER_REGNUM)
+	{
+	  // TODO: in sw64 variable arguments processing, all regs
+	  // and pretending arguments offset a passive, so we have
+	  // to minus varargs size.  May be fix it is a better way?
+	  return cfun->machine->frame.hard_frame_pointer_offset
+		 - cfun->machine->frame.local_offset;
+	}
+
+      if (from == FRAME_POINTER_REGNUM)
+	{
+	  return cfun->machine->frame.hard_frame_pointer_offset
+		 - cfun->machine->frame.local_offset;
+	}
+    }
+
+  if (to == STACK_POINTER_REGNUM)
+    {
+      if (from == ARG_POINTER_REGNUM)
+	{
+	  // TODO: same as HARD_FRAME_POINTER_REGNUM;
+	  return cfun->machine->frame.arg_pointer_offset;
+	}
+      if (from == FRAME_POINTER_REGNUM)
+	{
+	  return cfun->machine->frame.arg_pointer_offset;
+	}
+    }
+
+  return cfun->machine->frame.frame_size;
+#else
+  ret = sw_64_sa_size ();
+  if (!frame_pointer_needed)
+    ret += SW_64_ROUND (crtl->outgoing_args_size);
+
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      break;
+
+    case ARG_POINTER_REGNUM:
+      ret += (SW_64_ROUND (get_frame_size () + crtl->args.pretend_args_size)
+	      - crtl->args.pretend_args_size);
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  return ret;
+#endif
+}
+
+/* Compute the frame size.  SIZE is the size of the "naked" frame
+   and SA_SIZE is the size of the register save area.  */
+
+static HOST_WIDE_INT
+compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
+{
+#ifdef SW_64_ENABLE_FULL_ASAN
+  //  sw_64_layout_frame ();
+  return cfun->machine->frame.frame_size;
+#else
+    return SW_64_ROUND (crtl->outgoing_args_size) + sa_size
+	   + SW_64_ROUND (size + crtl->args.pretend_args_size);
+#endif
+}
+
+/* Initialize the GCC target structure.  */
+#undef TARGET_IN_SMALL_DATA_P
+#define TARGET_IN_SMALL_DATA_P sw_64_in_small_data_p
+
+#undef TARGET_ASM_ALIGNED_HI_OP
+#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
+
+/* Default unaligned ops are provided for ELF systems.  To get unaligned
+   data for non-ELF systems, we have to turn off auto alignment.  */
+#undef TARGET_ASM_RELOC_RW_MASK
+#define TARGET_ASM_RELOC_RW_MASK sw_64_elf_reloc_rw_mask
+#undef TARGET_ASM_SELECT_RTX_SECTION
+#define TARGET_ASM_SELECT_RTX_SECTION sw_64_elf_select_rtx_section
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS sw_64_elf_section_type_flags
+
+#undef TARGET_ASM_FUNCTION_END_PROLOGUE
+#define TARGET_ASM_FUNCTION_END_PROLOGUE sw_64_output_function_end_prologue
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS sw_64_init_libfuncs
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS sw_64_legitimize_address
+#undef TARGET_MODE_DEPENDENT_ADDRESS_P
+#define TARGET_MODE_DEPENDENT_ADDRESS_P sw_64_mode_dependent_address_p
+
+#undef TARGET_ASM_FILE_START
+#define TARGET_ASM_FILE_START sw_64_file_start
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST sw_64_adjust_cost
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE sw_64_issue_rate
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD		       \
+  sw_64_multipass_dfa_lookahead
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS HAVE_AS_TLS
+
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL sw_64_builtin_decl
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS sw_64_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN sw_64_expand_builtin
+#undef TARGET_FOLD_BUILTIN
+#define TARGET_FOLD_BUILTIN sw_64_fold_builtin
+#undef TARGET_GIMPLE_FOLD_BUILTIN
+#define TARGET_GIMPLE_FOLD_BUILTIN sw_64_gimple_fold_builtin
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL sw_64_function_ok_for_sibcall
+#undef TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P sw_64_cannot_copy_insn_p
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P sw_64_legitimate_constant_p
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM sw_64_cannot_force_const_mem
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK sw_64_output_mi_thunk_osf
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK					 \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+#undef TARGET_STDARG_OPTIMIZE_HOOK
+#define TARGET_STDARG_OPTIMIZE_HOOK sw_64_stdarg_optimize_hook
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND sw_64_print_operand
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS sw_64_print_operand_address
+#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
+#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sw_64_print_operand_punct_valid_p
+
+/* Use 16-bits anchor.  */
+#undef TARGET_MIN_ANCHOR_OFFSET
+#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
+#undef TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
+#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
+#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
+
+#undef TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST sw_64_register_move_cost
+#undef TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST sw_64_memory_move_cost
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS sw_64_rtx_costs
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG sw_64_reorg
+
+#undef TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE					   \
+  default_promote_function_mode_always_promote
+#undef TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
+
+#undef TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE sw_64_function_value
+#undef TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE sw_64_libcall_value
+#undef TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P sw_64_function_value_regno_p
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY sw_64_return_in_memory
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE sw_64_pass_by_reference
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS sw_64_setup_incoming_varargs
+#undef TARGET_STRICT_ARGUMENT_NAMING
+#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
+#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
+#undef TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG sw_64_split_complex_arg
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR sw_64_gimplify_va_arg
+#undef TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES sw_64_arg_partial_bytes
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG sw_64_function_arg
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE sw_64_function_arg_advance
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT sw_64_trampoline_init
+
+#undef TARGET_INSTANTIATE_DECLS
+#define TARGET_INSTANTIATE_DECLS sw_64_instantiate_decls
+
+#undef TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD sw_64_secondary_reload
+#undef TARGET_SECONDARY_MEMORY_NEEDED
+#define TARGET_SECONDARY_MEMORY_NEEDED sw_64_secondary_memory_needed
+#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
+#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sw_64_secondary_memory_needed_mode
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P sw_64_scalar_mode_supported_p
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P sw_64_vector_mode_supported_p
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST sw_64_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START sw_64_va_start
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE sw_64_option_override
+
+#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE sw_64_override_options_after_change
+
+#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
+#undef TARGET_MANGLE_TYPE
+#define TARGET_MANGLE_TYPE sw_64_mangle_type
+#endif
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_false
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P sw_64_legitimate_address_p
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE sw_64_conditional_register_usage
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON sw_64_canonicalize_comparison
+
+#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
+#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sw_64_atomic_assign_expand_fenv
+
+#undef TARGET_HARD_REGNO_MODE_OK
+#define TARGET_HARD_REGNO_MODE_OK sw_64_hard_regno_mode_ok
+#undef TARGET_SLOW_UNALIGNED_ACCESS
+#define TARGET_SLOW_UNALIGNED_ACCESS sw_64_slow_unaligned_access
+#undef TARGET_MODES_TIEABLE_P
+#define TARGET_MODES_TIEABLE_P sw_64_modes_tieable_p
+
+#undef TARGET_CAN_CHANGE_MODE_CLASS
+#define TARGET_CAN_CHANGE_MODE_CLASS sw_64_can_change_mode_class
+
+#undef TARGET_SCHED_MACRO_FUSION_P
+#define TARGET_SCHED_MACRO_FUSION_P sw_64_macro_fusion_p
+
+#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
+#define TARGET_SCHED_MACRO_FUSION_PAIR_P sw_64_macro_fusion_pair_p
+#undef TARGET_ASAN_SHADOW_OFFSET
+#define TARGET_ASAN_SHADOW_OFFSET sw_64_asan_shadow_offset
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-sw-64.h"
diff --git a/gcc/config/sw_64/sw_64.h b/gcc/config/sw_64/sw_64.h
new file mode 100644
index 0000000000000000000000000000000000000000..8e3bb0241dae498109f15ca007a9e0eeb4240f06
--- /dev/null
+++ b/gcc/config/sw_64/sw_64.h
@@ -0,0 +1,999 @@
+/* Definitions of target machine for GNU compiler, for Sw_64.
+   Copyright (C) 1992-2020 Free Software Foundation, Inc.
+   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+/* Target CPU builtins.  */
+#define TARGET_CPU_CPP_BUILTINS()					      \
+  do									   \
+    {									  \
+      builtin_define ("__sw_64");					      \
+      builtin_define ("__sw_64__");					    \
+      builtin_assert ("cpu=sw_64");					    \
+      builtin_assert ("machine=sw_64");					\
+      if (TARGET_CIX)							  \
+	{								      \
+	  builtin_define ("__sw_64_cix__");				    \
+	  builtin_assert ("cpu=cix");					  \
+	}								      \
+      if (TARGET_FIX)							  \
+	{								      \
+	  builtin_define ("__sw_64_fix__");				    \
+	  builtin_assert ("cpu=fix");					  \
+	}								      \
+      if (TARGET_BWX)							  \
+	{								      \
+	  builtin_define ("__sw_64_bwx__");				    \
+	  builtin_assert ("cpu=bwx");					  \
+	}								      \
+      if (TARGET_MAX)							  \
+	{								      \
+	  builtin_define ("__sw_64_max__");				    \
+	  builtin_assert ("cpu=max");					  \
+	}								      \
+      if (sw_64_cpu_string)						    \
+	{								      \
+	  if (strcmp (sw_64_cpu_string, "sw6a") == 0)			  \
+	    {								  \
+	      builtin_define ("__sw_64_sw6a__");			       \
+	      builtin_assert ("cpu=sw6a");				     \
+	    }								  \
+	  else if (strcmp (sw_64_cpu_string, "sw6b") == 0)		     \
+	    {								  \
+	      builtin_define ("__sw_64_sw6b__");			       \
+	      builtin_assert ("cpu=sw6b");				     \
+	    }								  \
+	  else if (strcmp (sw_64_cpu_string, "sw8a") == 0)		     \
+	    {								  \
+	      builtin_define ("__sw_64_sw8a__");			       \
+	      builtin_assert ("cpu=sw8a");				     \
+	    }								  \
+	}								      \
+      else /* Presumably sw6b.  */					     \
+	{								      \
+	  builtin_define ("__sw_64_sw6b__");				   \
+	  builtin_assert ("cpu=sw6b");					 \
+	}								      \
+      if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT)			     \
+	builtin_define ("_IEEE_FP");					   \
+      if (TARGET_IEEE_WITH_INEXACT)					    \
+	builtin_define ("_IEEE_FP_INEXACT");				   \
+      if (TARGET_LONG_DOUBLE_128)					      \
+	builtin_define ("__LONG_DOUBLE_128__");				\
+									       \
+      /* Macros dependent on the C dialect.  */				\
+      SUBTARGET_LANGUAGE_CPP_BUILTINS ();				      \
+    }									  \
+  while (0)
+
+#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS
+#define SUBTARGET_LANGUAGE_CPP_BUILTINS()				      \
+  do									   \
+    {									  \
+      if (preprocessing_asm_p ())					      \
+	builtin_define_std ("LANGUAGE_ASSEMBLY");			      \
+      else if (c_dialect_cxx ())					       \
+	{								      \
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS");			   \
+	  builtin_define ("__LANGUAGE_C_PLUS_PLUS__");			 \
+	}								      \
+      else								     \
+	builtin_define_std ("LANGUAGE_C");				     \
+      if (c_dialect_objc ())						   \
+	{								      \
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C");			   \
+	  builtin_define ("__LANGUAGE_OBJECTIVE_C__");			 \
+	}								      \
+    }									  \
+  while (0)
+#endif
+
+/* Run-time compilation parameters selecting different hardware subsets.  */
+
+/* Which processor to schedule for.  The cpu attribute defines a list that
+   mirrors this list, so changes to sw_64.md must be made at the same time.  */
+
+enum processor_type
+{
+  PROCESSOR_SW6, /* SW6 */
+  PROCESSOR_SW8, /* SW8 */
+  PROCESSOR_MAX
+};
+
+extern enum processor_type sw_64_cpu;
+extern enum processor_type sw_64_tune;
+
+enum sw_64_trap_precision
+{
+  SW_64_TP_PROG, /* No precision (default).  */
+  SW_64_TP_FUNC, /* Trap contained within originating function.  */
+  SW_64_TP_INSN  /* Instruction accuracy and code is resumption safe.  */
+};
+
+enum sw_64_fp_rounding_mode
+{
+  SW_64_FPRM_NORM, /* Normal rounding mode.  */
+  SW_64_FPRM_MINF, /* Round towards minus-infinity.  */
+  SW_64_FPRM_CHOP, /* Chopped rounding mode (towards 0).  */
+  SW_64_FPRM_DYN   /* Dynamic rounding mode.  */
+};
+
+enum sw_64_fp_trap_mode
+{
+  SW_64_FPTM_N,		/* Normal trap mode.  */
+  SW_64_FPTM_U,		/* Underflow traps enabled.  */
+  SW_64_FPTM_SU,	/* Software completion, w/underflow traps.  */
+  SW_64_FPTM_SUI	/* Software completion, w/underflow & inexact traps.  */
+};
+
+extern enum sw_64_trap_precision sw_64_tp;
+extern enum sw_64_fp_rounding_mode sw_64_fprm;
+extern enum sw_64_fp_trap_mode sw_64_fptm;
+
+/* Invert the easy way to make options work.  */
+#define TARGET_FP (!TARGET_SOFT_FP)
+
+/* Macros to silence warnings about numbers being signed in traditional
+ *    C and unsigned in ISO C when compiled on 32-bit hosts.  */
+
+#define BITMASK_HIGH (((unsigned long) 1) << 31) /* 0x80000000.  */
+
+/* These are for target os support and cannot be changed at runtime.  */
+#define TARGET_ABI_OPEN_VMS 0
+#define TARGET_ABI_OSF 1
+
+#ifndef TARGET_CAN_FAULT_IN_PROLOGUE
+#define TARGET_CAN_FAULT_IN_PROLOGUE 0
+#endif
+#ifndef TARGET_HAS_XFLOATING_LIBS
+#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128
+#endif
+#ifndef TARGET_PROFILING_NEEDS_GP
+#define TARGET_PROFILING_NEEDS_GP 0
+#endif
+#ifndef HAVE_AS_TLS
+#define HAVE_AS_TLS 0
+#endif
+
+#define TARGET_DEFAULT MASK_FPREGS
+
+#ifndef TARGET_CPU_DEFAULT
+#define TARGET_CPU_DEFAULT 0
+#endif
+
+#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS
+#ifdef HAVE_AS_EXPLICIT_RELOCS
+#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS
+#define TARGET_SUPPORT_ARCH 1
+#else
+#define TARGET_DEFAULT_EXPLICIT_RELOCS 0
+#endif
+#endif
+
+#ifndef TARGET_SUPPORT_ARCH
+#define TARGET_SUPPORT_ARCH 0
+#endif
+
+/* Support for a compile-time default CPU, et cetera.  The rules are:
+   --with-cpu is ignored if -mcpu is specified.
+   --with-tune is ignored if -mtune is specified.  */
+#define OPTION_DEFAULT_SPECS						   \
+  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}"},					\
+  {									    \
+    "tune", "%{!mtune=*:-mtune=%(VALUE)}"				      \
+  }
+
+/* target machine storage layout */
+
+
+/* Define the size of `int'.  The default is the same as the word size.  */
+#define INT_TYPE_SIZE 32
+
+#define LONG_TYPE_SIZE (TARGET_SW_M32 ? 32 : 64)
+
+/* Define the size of `long long'.  The default is the twice the word size.  */
+#define LONG_LONG_TYPE_SIZE 64
+
+/* The two floating-point formats we support are S-floating, which is
+   4 bytes, and T-floating, which is 8 bytes.  `float' is S and `double'
+   and `long double' are T.  */
+
+#define FLOAT_TYPE_SIZE 32
+#define DOUBLE_TYPE_SIZE 64
+#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
+
+/* Work around target_flags dependency in ada/targtyps.c.  */
+#define WIDEST_HARDWARE_FP_SIZE 64
+
+#define WCHAR_TYPE "unsigned int"
+#define WCHAR_TYPE_SIZE 32
+
+/* Define this macro if it is advisable to hold scalars in registers
+   in a wider mode than that declared by the program.  In such cases,
+   the value is constrained to be within the bounds of the declared
+   type, but kept valid in the wider mode.  The signedness of the
+   extension may differ from that of the type.
+
+   For Sw_64, we always store objects in a full register.  32-bit integers
+   are always sign-extended, but smaller objects retain their signedness.
+
+   Note that small vector types can get mapped onto integer modes at the
+   whim of not appearing in sw_64-modes.def.  We never promoted these
+   values before; don't do so now that we've trimmed the set of modes to
+   those actually implemented in the backend.  */
+
+#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)				    \
+  if (!TARGET_SW_M32							   \
+      && (GET_MODE_CLASS (MODE) == MODE_INT				    \
+	  && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE)		 \
+	  && GET_MODE_SIZE (MODE) < UNITS_PER_WORD))			   \
+    {									  \
+      if ((MODE) == SImode)						    \
+	(UNSIGNEDP) = 0;						       \
+      (MODE) = DImode;							 \
+    }
+
+/* Define this if most significant bit is lowest numbered
+   in instructions that operate on numbered bit-fields.
+
+   There are no such instructions on the Sw_64, but the documentation
+   is little endian.  */
+#define BITS_BIG_ENDIAN 0
+
+/* Define this if most significant byte of a word is the lowest numbered.
+   This is false on the Sw_64.  */
+#define BYTES_BIG_ENDIAN 0
+
+/* Define this if most significant word of a multiword number is lowest
+   numbered.
+
+   For Sw_64 we can decide arbitrarily since there are no machine instructions
+   for them.  Might as well be consistent with bytes.  */
+#define WORDS_BIG_ENDIAN 0
+
+/* Width of a word, in units (bytes).  */
+#define UNITS_PER_WORD 8
+
+/* Width in bits of a pointer.
+   See also the macro `Pmode' defined below.  */
+#define POINTER_SIZE (TARGET_SW_M32 ? 32 : 64)
+
+/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
+#define PARM_BOUNDARY 64
+
+/* Boundary (in *bits*) on which stack pointer should be aligned.  */
+#define STACK_BOUNDARY ((TARGET_SW_SIMD || TARGET_SW_32ALIGN) ? 256 : 128)
+
+/* Allocation boundary (in *bits*) for the code of a function.  */
+#define FUNCTION_BOUNDARY 32
+
+/* Alignment of field after `int : 0' in a structure.  */
+#define EMPTY_FIELD_BOUNDARY 64
+
+/* Every structure's size must be a multiple of this.  */
+#define STRUCTURE_SIZE_BOUNDARY 8
+
+/* A bit-field declared as `int' forces `int' alignment for the struct.  */
+#undef PCC_BITFILED_TYPE_MATTERS
+#define PCC_BITFIELD_TYPE_MATTERS 1
+
+/* No data type wants to be aligned rounder than this.  */
+#define BIGGEST_ALIGNMENT ((TARGET_SW_SIMD || TARGET_SW_32ALIGN) ? 256 : 128)
+/* For atomic access to objects, must have at least 32-bit alignment
+   unless the machine has byte operations.  */
+#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32))
+
+/* Align all constants and variables to at least a word boundary so
+   we can pick up pieces of them faster.  */
+/* ??? Only if block-move stuff knows about different source/destination
+   alignment.  */
+#if 0
+#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD)
+#endif
+
+/* Set this nonzero if move instructions will actually fail to work
+   when given unaligned data.
+
+   Since we get an error message when we do one, call them invalid.  */
+
+#define STRICT_ALIGNMENT 1
+
+#define SW64_EXPAND_ALIGNMENT(COND, EXP, ALIGN)				\
+  (((COND) && ((ALIGN) < BITS_PER_WORD)					\
+    && (TREE_CODE (EXP) == ARRAY_TYPE || TREE_CODE (EXP) == UNION_TYPE	 \
+	|| TREE_CODE (EXP) == RECORD_TYPE))				    \
+     ? BITS_PER_WORD							   \
+     : (ALIGN))
+
+/* Similarly, make sure that objects on the stack are sensibly aligned.  */
+#define LOCAL_ALIGNMENT(EXP, ALIGN)					    \
+  SW64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
+
+/* Standard register usage.  */
+
+/* Number of actual hardware registers.
+   The hardware registers are assigned numbers for the compiler
+   from 0 to just below FIRST_PSEUDO_REGISTER.
+   All registers that the compiler knows about must be given numbers,
+   even those that are not normally considered general registers.
+
+   We define all 32 integer registers, even though $31 is always zero,
+   and all 32 floating-point registers, even though $f31 is also
+   always zero.  We do not bother defining the FP status register and
+   there are no other registers.
+
+   Since $31 is always zero, we will use register number 31 as the
+   argument pointer.  It will never appear in the generated code
+   because we will always be eliminating it in favor of the stack
+   pointer or hardware frame pointer.
+
+   Likewise, we use $f31 for the frame pointer, which will always
+   be eliminated in favor of the hardware frame pointer or the
+   stack pointer.  */
+
+#define FIRST_PSEUDO_REGISTER 64
+
+/* 1 for registers that have pervasive standard uses
+   and are not available for the register allocator.  */
+
+#define FIXED_REGISTERS							\
+  {									    \
+    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
+      0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  \
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1			      \
+  }
+
+/* 1 for registers not available across function calls.
+   These must include the FIXED_REGISTERS and also any
+   registers that can be used without being saved.
+   The latter must include the registers where values are returned
+   and the register where structure-value addresses are passed.
+   Aside from that, you can include as many other registers as you like.  */
+#define CALL_USED_REGISTERS						    \
+  {									    \
+    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
+      1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  \
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1			      \
+  }
+
+/* List the order in which to allocate registers.  Each register must be
+   listed once, even those in FIXED_REGISTERS.  */
+
+#define REG_ALLOC_ORDER							\
+  {									    \
+    1, 2, 3, 4, 5, 6, 7, 8,   /* nonsaved integer registers */		 \
+      22, 23, 24, 25, 28,     /* likewise */				   \
+      0,		      /* likewise, but return value */		 \
+      21, 20, 19, 18, 17, 16, /* likewise, but input args */		   \
+      27,		      /* likewise, but SYSV procedure value */	 \
+									       \
+      42, 43, 44, 45, 46, 47, /* nonsaved floating-point registers */	  \
+      54, 55, 56, 57, 58, 59, /* likewise */				   \
+      60, 61, 62,	     /* likewise */				   \
+      32, 33,		      /* likewise, but return values */		\
+      53, 52, 51, 50, 49, 48, /* likewise, but input args */		   \
+									       \
+      9, 10, 11, 12, 13, 14, /* saved integer registers */		     \
+      26,		     /* return address */			      \
+      15,		     /* hard frame pointer */			  \
+									       \
+      34, 35, 36, 37, 38, 39, /* saved floating-point registers */	     \
+      40, 41,		      /* likewise */				   \
+									       \
+      29, 30, 31, 63 /* gp, sp, ap, sfp */				     \
+  }
+
+/* Specify the registers used for certain standard purposes.
+   The values of these macros are register numbers.  */
+
+/* Sw_64 pc isn't overloaded on a register that the compiler knows about.  */
+/* #define PC_REGNUM  */
+
+/* Register to use for pushing function arguments.  */
+#define STACK_POINTER_REGNUM 30
+
+/* Base register for access to local variables of the function.  */
+#define HARD_FRAME_POINTER_REGNUM 15
+
+/* Base register for access to arguments of the function.  */
+#define ARG_POINTER_REGNUM 31
+
+/* Base register for access to local variables of function.  */
+#define FRAME_POINTER_REGNUM 63
+
+/* Register in which static-chain is passed to a function.
+
+   For the Sw_64, this is based on an example; the calling sequence
+   doesn't seem to specify this.  */
+#define STATIC_CHAIN_REGNUM 1
+
+/* The register number of the register used to address a table of
+   static data addresses in memory.  */
+#define PIC_OFFSET_TABLE_REGNUM 29
+
+/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM'
+   is clobbered by calls.  */
+/* ??? It is and it isn't.  It's required to be valid for a given
+   function when the function returns.  It isn't clobbered by
+   current_file functions.  Moreover, we do not expose the ldgp
+   until after reload, so we're probably safe.  */
+/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */
+
+/* Define the classes of registers for register constraints in the
+   machine description.  Also define ranges of constants.
+
+   One of the classes must always be named ALL_REGS and include all hard regs.
+   If there is more than one class, another class must be named NO_REGS
+   and contain no registers.
+
+   The name GENERAL_REGS must be the name of a class (or an alias for
+   another name such as ALL_REGS).  This is the class of registers
+   that is allowed by "g" or "r" in a register constraint.
+   Also, registers outside this class are allocated only when
+   instructions express preferences for them.
+
+   The classes must be numbered in nondecreasing order; that is,
+   a larger-numbered class must never be contained completely
+   in a smaller-numbered class.
+
+   For any two classes, it is very desirable that there be another
+   class that represents their union.  */
+
+enum reg_class
+{
+  NO_REGS,
+  R0_REG,
+  R24_REG,
+  R25_REG,
+  R27_REG,
+  GENERAL_REGS,
+  FLOAT_REGS,
+  ALL_REGS,
+  LIM_REG_CLASSES
+};
+
+#define N_REG_CLASSES (int) LIM_REG_CLASSES
+
+/* Give names of register classes as strings for dump file.  */
+
+#define REG_CLASS_NAMES							\
+  {									    \
+    "NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG", "GENERAL_REGS",      \
+      "FLOAT_REGS", "ALL_REGS"						 \
+  }
+
+/* Define which registers fit in which classes.
+   This is an initializer for a vector of HARD_REG_SET
+   of length N_REG_CLASSES.  */
+
+#define REG_CLASS_CONTENTS						     \
+  {									    \
+    {0x00000000, 0x00000000},   /* NO_REGS */				  \
+      {0x00000001, 0x00000000}, /* R0_REG */				   \
+      {0x01000000, 0x00000000}, /* R24_REG */				  \
+      {0x02000000, 0x00000000}, /* R25_REG */				  \
+      {0x08000000, 0x00000000}, /* R27_REG */				  \
+      {0xffffffff, 0x80000000}, /* GENERAL_REGS */			     \
+      {0x00000000, 0x7fffffff}, /* FLOAT_REGS */			       \
+    {									  \
+      0xffffffff, 0xffffffff						   \
+    }									  \
+  }
+
+/* The same information, inverted:
+   Return the class number of the smallest class containing
+   reg number REGNO.  This could be a conditional expression
+   or could index an array.  */
+
+#define REGNO_REG_CLASS(REGNO)						 \
+  ((REGNO) == 0								\
+     ? R0_REG								  \
+     : (REGNO) == 24							   \
+	 ? R24_REG							     \
+	 : (REGNO) == 25						       \
+	     ? R25_REG							 \
+	     : (REGNO) == 27						   \
+		 ? R27_REG						     \
+		 : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS : GENERAL_REGS)
+
+/* The class value for index registers, and the one for base regs.  */
+#define INDEX_REG_CLASS NO_REGS
+#define BASE_REG_CLASS GENERAL_REGS
+
+/* Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS; but on some machines
+   in some cases it is preferable to use a more restrictive class.  */
+
+#define PREFERRED_RELOAD_CLASS sw_64_preferred_reload_class
+
+/* Provide the cost of a branch.  Exact meaning under development.  */
+#define BRANCH_COST(speed_p, predictable_p) 5
+
+/* Stack layout; function entry, exit and calling.  */
+
+/* Define this if pushing a word on the stack
+   makes the stack pointer a smaller address.  */
+#define STACK_GROWS_DOWNWARD 1
+
+/* Define this to nonzero if the nominal address of the stack frame
+   is at the high-address end of the local variables;
+   that is, each additional local variable allocated
+   goes at a more negative offset in the frame.  */
+//#define FRAME_GROWS_DOWNWARD SW_64_ENABLE_ASAN
+#define FRAME_GROWS_DOWNWARD 1
+
+/* If we generate an insn to push BYTES bytes,
+   this says how many the stack pointer really advances by.
+   On Sw_64, don't define this because there are no push insns.  */
+/*  #define PUSH_ROUNDING(BYTES) */
+
+/* Define this to be nonzero if stack checking is built into the ABI.  */
+#define STACK_CHECK_BUILTIN 1
+
+/* Define this if the maximum size of all the outgoing args is to be
+   accumulated and pushed during the prologue.  The amount can be
+   found in the variable crtl->outgoing_args_size.  */
+#define ACCUMULATE_OUTGOING_ARGS 1
+
+/* Offset of first parameter from the argument pointer register value.  */
+
+#define FIRST_PARM_OFFSET(FNDECL) 0
+
+/* Definitions for register eliminations.
+
+   We have two registers that can be eliminated on the Sw_64.  First, the
+   frame pointer register can often be eliminated in favor of the stack
+   pointer register.  Secondly, the argument pointer register can always be
+   eliminated; it is replaced with either the stack or frame pointer.  */
+
+/* This is an array of structures.  Each structure initializes one pair
+   of eliminable registers.  The "from" register number is given first,
+   followed by "to".  Eliminations of the same "from" register are listed
+   in order of preference.  */
+
+#define ELIMINABLE_REGS							\
+  {									    \
+    {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
+      {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},			 \
+      {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			    \
+    {									  \
+      FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM			  \
+    }									  \
+  }
+
+/* Round up to a multiple of 16 bytes.  */
+#define SW_64_ROUND(X)							 \
+  ((TARGET_SW_32ALIGN || TARGET_SW_SIMD) ? ROUND_UP ((X), 32)		  \
+					 : ROUND_UP ((X), 16))
+
+/* Define the offset between two registers, one to be eliminated, and the other
+   its replacement, at the start of a routine.  */
+#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			   \
+  ((OFFSET) = sw_64_initial_elimination_offset (FROM, TO))
+
+/* Define this if stack space is still allocated for a parameter passed
+   in a register.  */
+/* #define REG_PARM_STACK_SPACE */
+
+/* 1 if N is a possible register number for function argument passing.
+   On Sw_64, these are $16-$21 and $f16-$f21.  */
+
+#define FUNCTION_ARG_REGNO_P(N)						\
+  (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32))
+
+/* Define a data type for recording info about an argument list
+   during the scan of that argument list.  This data type should
+   hold all necessary information about the function itself
+   and about the args processed so far, enough to enable macros
+   such as FUNCTION_ARG to determine where the next arg should go.
+
+   On Sw_64, this is a single integer, which is a number of words
+   of arguments scanned so far.
+   Thus 6 or more means all following args should go on the stack.  */
+
+#define CUMULATIVE_ARGS int
+
+/* Initialize a variable CUM of type CUMULATIVE_ARGS
+   for a call to a function whose data type is FNTYPE.
+   For a library call, FNTYPE is 0.  */
+
+#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS)     \
+  (CUM) = 0
+
+/* Define intermediate macro to compute
+   the size (in registers) of an argument.  */
+
+#define SW_64_ARG_SIZE(MODE, TYPE)					     \
+  ((MODE) == TFmode || (MODE) == TCmode					\
+     ? 1								       \
+     : CEIL (((MODE) == BLKmode ? int_size_in_bytes (TYPE)		     \
+				: GET_MODE_SIZE (MODE)),		       \
+	     UNITS_PER_WORD))
+
+/* Make (or fake) .linkage entry for function call.
+   IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.  */
+
+/* This macro defines the start of an assembly comment.  */
+
+#define ASM_COMMENT_START " #"
+
+/* This macro produces the initial definition of a function.  */
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			    \
+  sw_64_start_function (FILE, NAME, DECL);
+
+/* This macro closes up a function definition for the assembler.  */
+
+#undef ASM_DECLARE_FUNCTION_SIZE
+#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL)			    \
+  sw_64_end_function (FILE, NAME, DECL)
+
+/* Output any profiling code before the prologue.  */
+
+#define PROFILE_BEFORE_PROLOGUE 1
+
+/* Never use profile counters.  */
+
+#define NO_PROFILE_COUNTERS 1
+
+/* Output assembler code to FILE to increment profiler label # LABELNO
+   for profiling a function entry.  Under SYSV, profiling is enabled
+   by simply passing -pg to the assembler and linker.  */
+
+#define FUNCTION_PROFILER(FILE, LABELNO)
+
+/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
+   the stack pointer does not matter.  The value is tested only in
+   functions that have frame pointers.
+   No definition is equivalent to always zero.  */
+
+#define EXIT_IGNORE_STACK 1
+
+/* Define registers used by the epilogue and return instruction.  */
+
+#define EPILOGUE_USES(REGNO) ((REGNO) == 26)
+
+/* Length in units of the trampoline for entering a nested function.  */
+
+#define TRAMPOLINE_SIZE 32
+
+/* The alignment of a trampoline, in bits.  */
+
+#define TRAMPOLINE_ALIGNMENT 64
+
+/* A C expression whose value is RTL representing the value of the return
+   address for the frame COUNT steps up from the current frame.
+   FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of
+   the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined.  */
+
+#define RETURN_ADDR_RTX sw_64_return_addr
+
+/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders
+   can use DWARF_ALT_FRAME_RETURN_COLUMN defined below.  This is just the same
+   as the default definition in dwarf2out.c.  */
+#undef DWARF_FRAME_REGNUM
+#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG)
+
+/* Before the prologue, RA lives in $26.  */
+#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 26)
+#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26)
+#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64)
+#define DWARF_ZERO_REG 31
+
+/* Describe how we implement __builtin_eh_return.  */
+#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 16 : INVALID_REGNUM)
+#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 28)
+#define EH_RETURN_HANDLER_RTX						  \
+  gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx,		 \
+				     crtl->outgoing_args_size))
+
+/* Addressing modes, and classification of registers for them.  */
+
+/* Macros to check register numbers against specific register classes.  */
+
+/* These assume that REGNO is a hard or pseudo reg number.
+   They give nonzero only if REGNO is a hard reg of the suitable class
+   or a pseudo reg currently allocated to a suitable hard reg.
+   Since they use reg_renumber, they are safe only once reg_renumber
+   has been allocated, which happens in reginfo.c during register
+   allocation.  */
+
+#define REGNO_OK_FOR_INDEX_P(REGNO) 0
+#define REGNO_OK_FOR_BASE_P(REGNO)					     \
+  ((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32 || (REGNO) == 63	\
+   || reg_renumber[REGNO] == 63)
+
+/* Maximum number of registers that can appear in a valid memory address.  */
+#define MAX_REGS_PER_ADDRESS 1
+
+/* Recognize any constant value that is a valid address.  For the Sw_64,
+   there are only constants none since we want to use LDI to load any
+   symbolic addresses into registers.  */
+
+#define CONSTANT_ADDRESS_P(X)						  \
+  (CONST_INT_P (X) && ((UINTVAL (X) + 0x8000) < 0x10000))
+
+/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
+   and check its validity for a certain class.
+   We have two alternate definitions for each of them.
+   The usual definition accepts all pseudo regs; the other rejects
+   them unless they have been allocated suitable hard regs.
+   The symbol REG_OK_STRICT causes the latter definition to be used.
+
+   Most source files want to accept pseudo regs in the hope that
+   they will get allocated to the class that the insn wants them to be in.
+   Source files for reload pass need to be strict.
+   After reload, it makes no difference, since pseudo regs have
+   been eliminated by then.  */
+
+/* Nonzero if X is a hard reg that can be used as an index
+   or if it is a pseudo reg.  */
+#define REG_OK_FOR_INDEX_P(X) 0
+
+/* Nonzero if X is a hard reg that can be used as a base reg
+   or if it is a pseudo reg.  */
+#define NONSTRICT_REG_OK_FOR_BASE_P(X)					 \
+  (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
+
+/* ??? Nonzero if X is the frame pointer, or some virtual register
+   that may eliminate to the frame pointer.  These will be allowed to
+   have offsets greater than 32K.  This is done because register
+   elimination offsets will change the hi/lo split, and if we split
+   before reload, we will require additional instructions.  */
+#define NONSTRICT_REG_OK_FP_BASE_P(X)					  \
+  (REGNO (X) == 31 || REGNO (X) == 63					  \
+   || (REGNO (X) >= FIRST_PSEUDO_REGISTER				      \
+       && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER))
+
+/* Nonzero if X is a hard reg that can be used as a base reg.  */
+#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
+
+#ifdef REG_OK_STRICT
+#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P (X)
+#else
+#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P (X)
+#endif
+
+/* Try a machine-dependent way of reloading an illegitimate address
+   operand.  If we find one, push the reload and jump to WIN.  This
+   macro is used in only one place: `find_reloads_address' in reload.c.  */
+
+#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)	    \
+  do									   \
+    {									  \
+      rtx new_x								\
+	= sw_64_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L);       \
+      if (new_x)							       \
+	{								      \
+	  X = new_x;							   \
+	  goto WIN;							    \
+	}								      \
+    }									  \
+  while (0)
+
+/* Specify the machine mode that this machine uses
+   for the index in the tablejump instruction.  */
+#define CASE_VECTOR_MODE SImode
+
+/* Define as C expression which evaluates to nonzero if the tablejump
+   instruction expects the table to contain offsets from the address of the
+   table.
+
+   Do not define this if the table should contain absolute addresses.
+   On the Sw_64, the table is really GP-relative, not relative to the PC
+   of the table, but we pretend that it is PC-relative; this should be OK,
+   but we should try to find some better way sometime.  */
+#define CASE_VECTOR_PC_RELATIVE 1
+
+/* Define this as 1 if `char' should by default be signed; else as 0.  */
+#define DEFAULT_SIGNED_CHAR 1
+
+/* Max number of bytes we can move to or from memory
+   in one reasonably fast instruction.  */
+
+#define MOVE_MAX 8
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+   move-instruction pairs, we will do a movmem or libcall instead.
+
+   Without byte/word accesses, we want no more than four instructions;
+   with, several single byte accesses are better.  */
+
+#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2)
+
+/* Largest number of bytes of an object that can be placed in a register.
+   On the Sw_64 we have plenty of registers, so use TImode.  */
+#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode)
+
+/* Nonzero if access to memory by bytes is no faster than for words.
+   Also nonzero if doing byte operations (specifically shifts) in registers
+   is undesirable.
+
+   On the Sw_64, we want to not use the byte operation and instead use
+   masking operations to access fields; these will save instructions.  */
+
+#define SLOW_BYTE_ACCESS 1
+
+/* Define if operations between registers always perform the operation
+   on the full register even if a narrower mode is specified.  */
+#define WORD_REGISTER_OPERATIONS 1
+
+/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
+   will either zero-extend or sign-extend.  The value of this macro should
+   be the code that says which one of the two operations is implicitly
+   done, UNKNOWN if none.  */
+#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND)
+
+/* Define if loading short immediate values into registers sign extends.  */
+#define SHORT_IMMEDIATES_SIGN_EXTEND 1
+
+/* The CIX ctlz and cttz instructions return 64 for zero.  */
+#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)				 \
+  ((VALUE) = 64, TARGET_CIX ? 1 : 0)
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)				 \
+  ((VALUE) = 64, TARGET_CIX ? 1 : 0)
+
+/* Define the value returned by a floating-point comparison instruction.  */
+
+#define FLOAT_STORE_FLAG_VALUE(MODE)					   \
+  REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE))
+
+/* Specify the machine mode that pointers have.
+   After generation of rtl, the compiler makes no further distinction
+   between pointers and any other objects of this machine mode.  */
+#define Pmode (TARGET_SW_M32 ? SImode : DImode)
+
+/* Mode of a function address in a call instruction (for indexing purposes).  */
+
+#define FUNCTION_MODE Pmode
+
+/* Define this if addresses of constant functions
+   shouldn't be put through pseudo regs where they can be cse'd.
+   Desirable on machines where ordinary constants are expensive
+   but a CALL with constant address is cheap.
+
+   We define this on the Sw_64 so that gen_call and gen_call_value
+   get to see the SYMBOL_REF (for the hint field of the jsr).  It will
+   then copy it into a register, thus actually letting the address be
+   cse'ed.  */
+
+#define NO_FUNCTION_CSE 1
+
+/* Define this to be nonzero if shift instructions ignore all but the low-order
+   few bits.  */
+#define SHIFT_COUNT_TRUNCATED 1
+
+/* Control the assembler format that we output.  */
+
+/* Output to assembler file text saying following lines
+   may contain character constants, extra white space, comments, etc.  */
+#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "")
+
+/* Output to assembler file text saying following lines
+   no longer contain unusual constructs.  */
+#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "")
+
+#define TEXT_SECTION_ASM_OP "\t.text"
+
+/* Output before writable data.  */
+
+#define DATA_SECTION_ASM_OP "\t.data"
+
+/* How to refer to registers in assembler output.
+   This sequence is indexed by compiler's hard-register-number (see above).  */
+
+#define REGISTER_NAMES							 \
+  {									    \
+    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", "$10", "$11",  \
+      "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", "$20", "$21",    \
+      "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP",     \
+      "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9",    \
+      "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18",  \
+      "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27",  \
+      "$f28", "$f29", "$f30", "FP"					     \
+  }
+
+/* Strip name encoding when emitting labels.  */
+
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)				      \
+  do									   \
+    {									  \
+      const char *name_ = NAME;						\
+      if (*name_ == '@' || *name_ == '%')				      \
+	name_ += 2;							    \
+      if (*name_ == '*')						       \
+	name_++;							       \
+      else								     \
+	fputs (user_label_prefix, STREAM);				     \
+      fputs (name_, STREAM);						   \
+    }									  \
+  while (0)
+
+/* Globalizing directive for a label.  */
+#define GLOBAL_ASM_OP "\t.globl "
+
+/* Use dollar signs rather than periods in special g++ assembler names.  */
+
+#undef NO_DOLLAR_IN_LABEL
+
+/* This is how to store into the string LABEL
+   the symbol_ref name of an internal numbered label where
+   PREFIX is the class of label and NUM is the number within the class.
+   This is suitable for output with `assemble_name'.  */
+
+#undef ASM_GENERATE_INTERNAL_LABEL
+#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)			\
+  sprintf ((LABEL), "*$%s%ld", (PREFIX), (long) (NUM))
+
+/* This is how to output an element of a case-vector that is relative.  */
+
+#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		       \
+  fprintf (FILE, "\t.gprel32 $L%d\n", (VALUE))
+
+/* If we use NM, pass -g to it so it only lists globals.  */
+#define NM_FLAGS "-pg"
+
+/* Definitions for debugging.  */
+
+/* Correct the offset of automatic variables and arguments.  Note that
+   the Sw_64 debug format wants all automatic variables and arguments
+   to be in terms of two different offsets from the virtual frame pointer,
+   which is the stack pointer before any adjustment in the function.
+   The offset for the argument pointer is fixed for the native compiler,
+   it is either zero (for the no arguments case) or large enough to hold
+   all argument registers.
+   The offset for the auto pointer is the fourth argument to the .frame
+   directive (local_offset).
+   To stay compatible with the native tools we use the same offsets
+   from the virtual frame pointer and adjust the debugger arg/auto offsets
+   accordingly.  These debugger offsets are set up in output_prolog.  */
+
+extern long sw_64_arg_offset;
+extern long sw_64_auto_offset;
+#define DEBUGGER_AUTO_OFFSET(X)						\
+  ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + sw_64_auto_offset)
+#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + sw_64_arg_offset)
+
+#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME)			       \
+  sw_64_output_filename (STREAM, NAME)
+
+/* By default, turn on GDB extensions.  */
+#define DEFAULT_GDB_EXTENSIONS 1
+
+/* This version don't define SYSTEM_IMPLICIT_EXTERN_C Replace
+ * NO_IMPLICIT_EXTERN_C with SYSTEM_IMPLICIT_EXTERN_C.  */
+/* The system headers under Sw_64 systems are generally C++-aware.  */
+/*#define NO_IMPLICIT_EXTERN_C*/
+
+#define TARGET_SUPPORTS_WIDE_INT 1
+#define SW64_TARGET_SUPPORT_FPCR 1
+
+#define HAVE_POST_INCREMENT (TARGET_SW8A ? 1 : 0)
+#define HAVE_POST_DECREMENT (TARGET_SW8A ? 1 : 0)
+#define HAVE_POST_MODIFY_DISP (TARGET_SW8A ? 1 : 0)
+int
+enable_asan_check_stack ();
+#ifndef SW_64_ENABLE_ASAN
+#define SW_64_ENABLE_FULL_ASAN 1
+#else
+#undef SW_64_ENABLE_FULL_ASAN
+#define SW_64_ENABLE_ASAN 0
+#endif
+#define TARGET_CRC32 0
diff --git a/gcc/config/sw_64/sw_64.md b/gcc/config/sw_64/sw_64.md
new file mode 100644
index 0000000000000000000000000000000000000000..8cf036b1007a428ca68411e7124bd7f3ab9eae91
--- /dev/null
+++ b/gcc/config/sw_64/sw_64.md
@@ -0,0 +1,7865 @@
+;; Machine description for Sw_64 for GNU C compiler
+;; Copyright (C) 1992-2020 Free Software Foundation, Inc.
+;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
+
+;; Uses of UNSPEC in this file:
+
+(define_c_enum "unspec" [
+  UNSPEC_XFLT_COMPARE
+  UNSPEC_ARG_HOME
+  UNSPEC_LDGP1
+  UNSPEC_INSXH
+  UNSPEC_MSKXH
+  UNSPEC_CVTQL
+  UNSPEC_CVTLQ
+  UNSPEC_LDGP2
+  UNSPEC_LITERAL
+  UNSPEC_LITUSE
+  UNSPEC_SIBCALL
+  UNSPEC_SYMBOL
+  UNSPEC_FRINTZ
+  UNSPEC_FRINTP
+  UNSPEC_FRINTG
+  UNSPEC_FRINTN
+  UNSPEC_FRINTI
+  UNSPEC_FRECX
+
+
+  ;; TLS Support
+  UNSPEC_TLSGD_CALL
+  UNSPEC_TLSLDM_CALL
+  UNSPEC_TLSGD
+  UNSPEC_TLSLDM
+  UNSPEC_DTPREL
+  UNSPEC_TPREL
+  UNSPEC_TP
+  UNSPEC_TLSRELGOT
+  UNSPEC_GOTDTPREL
+
+  ;; Builtins
+  UNSPEC_CMPBGE
+  UNSPEC_ZAP
+  UNSPEC_AMASK
+  UNSPEC_IMPLVER
+  UNSPEC_PERR
+  UNSPEC_COPYSIGN
+  UNSPEC_PFSC
+  UNSPEC_PFTC
+  UNSPEC_SBT
+  UNSPEC_CBT
+  UNSPEC_FIMOVS			; SHENJQ20230404_RSQRT
+
+  ;; Atomic operations
+  UNSPEC_MB
+  UNSPEC_ATOMIC
+  UNSPEC_CMPXCHG
+  UNSPEC_XCHG
+  UNSPECV_LDGP2
+  UNSPECV_HARDWARE_PREFETCH_CNT
+
+])
+
+;; UNSPEC_VOLATILE:
+
+(define_c_enum "unspecv" [
+  UNSPECV_IMB
+  UNSPECV_BLOCKAGE
+  UNSPECV_SPECULATION_BARRIER
+  UNSPECV_SETJMPR	; builtin_setjmp_receiver
+  UNSPECV_LONGJMP	; builtin_longjmp
+  UNSPECV_TRAPB
+  UNSPECV_PSPL		; prologue_stack_probe_loop
+  UNSPECV_REALIGN
+  UNSPECV_EHR		; exception_receiver
+  UNSPECV_MCOUNT
+  UNSPECV_FORCE_MOV
+  UNSPECV_LDGP1
+  UNSPECV_PLDGP2	; prologue ldgp
+  UNSPECV_SET_TP
+  UNSPECV_RPCC
+  UNSPECV_SETJMPR_ER	; builtin_setjmp_receiver fragment
+  UNSPECV_LL		; load-locked
+  UNSPECV_SC		; store-conditional
+  UNSPECV_CMPXCHG
+
+  UNSPEC_TIE ;; TIE
+])
+
+;; CQImode must be handled the similarly to HImode
+;; when generating reloads.
+(define_mode_iterator RELOAD12 [QI HI CQI])
+(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")])
+
+;; Other mode iterators
+(define_mode_iterator IMODE [QI HI SI DI])
+(define_mode_iterator I12MODE [QI HI])
+(define_mode_iterator I124MODE [QI HI SI])
+(define_mode_iterator I24MODE [HI SI])
+(define_mode_iterator I248MODE [HI SI DI])
+(define_mode_iterator I48MODE [SI DI])
+
+(define_mode_attr DWI [(SI "DI") (DI "TI")])
+(define_mode_attr modesuffix [(QI "b") (HI "h") (SI "w") (DI "l")
+				(V8QI "b8") (V4HI "w4")
+				(SF "%,") (DF "%-")])
+(define_mode_attr vecmodesuffix [(QI "b8") (HI "w4")])
+
+(define_code_iterator any_maxmin [smax smin umax umin])
+
+(define_code_attr maxmin [(smax "maxs") (smin "mins")
+			  (umax "maxu") (umin "minu")])
+
+(define_mode_iterator SFDF [SF DF])
+(define_mode_attr SD [(SF "s") (DF "d")])
+(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTN
+			    UNSPEC_FRINTG UNSPEC_FRINTI])
+
+;; Standard pattern names for floating-point rounding instructions.
+(define_int_attr frint_pattern [(UNSPEC_FRINTZ "btrunc")
+				(UNSPEC_FRINTP "ceil")
+				(UNSPEC_FRINTN "floor")
+				(UNSPEC_FRINTI "nearbyint")
+				(UNSPEC_FRINTG "round")])
+
+;; frint suffix for floating-point rounding instructions.
+(define_int_attr frint_suffix [(UNSPEC_FRINTZ "_z")
+			       (UNSPEC_FRINTP "_p")
+			       (UNSPEC_FRINTN "_n")
+			       (UNSPEC_FRINTG "_g")
+			       (UNSPEC_FRINTI "")])
+;; endif
+
+;; Where necessary, the suffixes _le and _be are used to distinguish between
+;; little-endian and big-endian patterns.
+;;
+;; Note that the Unicos/Mk assembler does not support the following
+;; opcodes: mov, fmov, nop, fnop, unop.
+
+;; Processor type -- this attribute must exactly match the processor_type
+;; enumeration in sw_64.h.
+
+(define_attr "tune" "sw6,sw8"
+  (const (symbol_ref "((enum attr_tune) sw_64_tune)")))
+
+;; Define an insn type attribute.  This is used in function unit delay
+;; computations, among other purposes.  For the most part, we use the names
+;; defined in the documentation, but add a few that we have to know about
+;; separately.
+
+(define_attr "type"
+  "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,call,iadd,ilog,shift,icmov,fcmov,
+   icmp,imul,fadd,fmul,fmadd,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c,
+   multi,vld,vst,ctpop,none,jsr,vcmp,frint,fp,fminmax,vsum,vinv,vsel,crc32,crc32c"
+  (const_string "iadd"))
+
+;; Describe a user's asm statement.
+(define_asm_attributes
+  [(set_attr "type" "multi")])
+
+;; Define the operand size an insn operates on.  Used primarily by mul
+;; and div operations that have size dependent timings.
+
+(define_attr "opsize" "si,di,udi"
+  (const_string "di"))
+
+;; The TRAP attribute marks instructions that may generate traps
+;; (which are imprecise and may need a trapb if software completion
+;; is desired).
+
+(define_attr "trap" "no,yes"
+  (const_string "no"))
+
+;; The ROUND_SUFFIX attribute marks which instructions require a
+;; rounding-mode suffix.  The value NONE indicates no suffix,
+;; the value NORMAL indicates a suffix controlled by sw_64_fprm.
+
+(define_attr "round_suffix" "none,normal,c"
+  (const_string "none"))
+
+;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix:
+;;   NONE	no suffix
+;;   SU		accepts only /su (cmpt et al)
+;;   SUI	accepts only /sui (cvtqt and cvtqs)
+;;   V_SV	accepts /v and /sv (cvtql only)
+;;   V_SV_SVI	accepts /v, /sv and /svi (cvttq only)
+;;   U_SU_SUI	accepts /u, /su and /sui (most fp instructions)
+;;
+;; The actual suffix emitted is controlled by sw_64_fptm.
+
+(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui"
+  (const_string "none"))
+
+;; The length of an instruction sequence in bytes.
+
+(define_attr "length" ""
+  (const_int 4))
+
+;; The USEGP attribute marks instructions that have relocations that use
+;; the GP.
+
+(define_attr "usegp" "no,yes"
+  (cond [(eq_attr "type" "ldsym,call")
+	   (const_string "yes")
+	 (eq_attr "type" "ild,fld,ist,fst")
+	   (symbol_ref "((enum attr_usegp) sw_64_find_lo_sum_using_gp (insn))")
+	]
+	(const_string "no")))
+
+;; The CANNOT_COPY attribute marks instructions with relocations that
+;; cannot easily be duplicated.  This includes insns with gpdisp relocs
+;; since they have to stay in 1-1 correspondence with one another.  This
+;; also includes call insns, since they must stay in correspondence with
+;; the immediately following gpdisp instructions.
+
+(define_attr "cannot_copy" "false,true"
+  (const_string "false"))
+
+;; Used to control the "enabled" attribute on a per-instruction basis.
+;; For convenience, conflate ABI issues re loading of addresses with
+;; an "isa".
+(define_attr "isa" "base,bwx,max,fix,cix,vms,ner,er,sw6a,sw6b,sw8a"
+  (const_string "base"))
+
+(define_attr "enabled" ""
+  (cond [(eq_attr "isa" "bwx")	(symbol_ref "TARGET_BWX")
+	 (eq_attr "isa" "max")	(symbol_ref "TARGET_MAX")
+	 (eq_attr "isa" "fix")	(symbol_ref "TARGET_FIX")
+	 (eq_attr "isa" "cix")	(symbol_ref "TARGET_CIX")
+	 (eq_attr "isa" "vms")  (symbol_ref "!TARGET_ABI_OSF")
+	 (eq_attr "isa" "ner")	(symbol_ref "!TARGET_EXPLICIT_RELOCS")
+	 (eq_attr "isa" "er")	(symbol_ref "TARGET_EXPLICIT_RELOCS")
+	 (eq_attr "isa" "sw6a")	(symbol_ref "TARGET_SW6A")
+	 (eq_attr "isa" "sw6b")	(symbol_ref "TARGET_SW6B")
+	 (eq_attr "isa" "sw8a")	(symbol_ref "TARGET_SW8A")
+	]
+	(const_int 1)))
+
+;; Include scheduling descriptions.
+
+(include "sw6.md")
+(include "sw8.md")
+
+
+;; Operand and operator predicates and constraints
+
+(include "predicates.md")
+(include "constraints.md")
+
+
+;; First define the arithmetic insns.  Note that the 32-bit forms also
+;; sign-extend.
+
+;; Handle 32-64 bit extension from memory to a floating point register
+;; specially, since this occurs frequently in int->double conversions.
+;;
+;; Note that while we must retain the =f case in the insn for reload's
+;; benefit, it should be eliminated after reload, so we should never emit
+;; code for that case.  But we don't reject the possibility.
+
+(define_expand "extendsidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
+
+(define_insn "*cvtlq"
+  [(set (match_operand:DI 0 "register_operand" "=f")
+	(unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTLQ))]
+  ""
+  "fcvtwl %1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*extendsidi2_1"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,!*f")
+	(sign_extend:DI
+	  (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))]
+  ""
+  "@
+   addw $31,%1,%0
+   ldw%U1 %0,%1
+   flds %0,%1\;fcvtwl %0,%0"
+  [(set_attr "type" "iadd,ild,fld")
+   (set_attr "length" "*,*,8")])
+
+(define_split
+  [(set (match_operand:DI 0 "hard_fp_register_operand")
+	(sign_extend:DI (match_operand:SI 1 "memory_operand")))]
+  "reload_completed"
+  [(set (match_dup 2) (match_dup 1))
+   (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));
+})
+
+;; Optimize sign-extension of SImode loads.  This shows up in the wake of
+;; reload when converting fp->int.
+
+(define_peephole2
+  [(set (match_operand:SI 0 "hard_int_register_operand")
+	(match_operand:SI 1 "memory_operand"))
+   (set (match_operand:DI 2 "hard_int_register_operand")
+	(sign_extend:DI (match_dup 0)))]
+  "true_regnum (operands[0]) == true_regnum (operands[2])
+   || peep2_reg_dead_p (2, operands[0])"
+  [(set (match_dup 2)
+	(sign_extend:DI (match_dup 1)))])
+
+(define_peephole2
+[
+(set (match_operand:DF 0 "register_operand")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "register_operand")
+			    (match_operand:DF 3 "const0_operand")]))
+(set (match_operand:DF 4 "register_operand")
+	(match_operator:DF 5 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 6 "reg_or_0_operand")
+			    (match_operand:DF 7 "reg_or_0_operand")]))
+(set (match_operand:SFDF 8 "register_operand")
+	(if_then_else:SFDF
+	  (match_operand 9 "comparison_operator")
+	  (match_operand:SFDF 10 "reg_or_8bit_operand")
+	  (match_operand:SFDF 11 "reg_or_8bit_operand")))
+]
+"(GET_CODE (operands[1])==LE ||  GET_CODE (operands[1])==LT)
+       && GET_CODE (operands[5])==EQ && GET_CODE (operands[9])==NE && flag_sw_fselect
+"
+
+[
+(set (match_operand:SFDF 8 "reg_or_0_operand")
+	(if_then_else:SFDF
+	 (match_operator 1 "sw_64_fp_comparison_operator"
+			 [(match_operand:SFDF 2 "reg_or_0_operand")
+			  (match_operand:SFDF 3 "const0_operand")])
+	 (match_operand:SFDF 11 "reg_or_0_operand")
+	 (match_operand:SFDF 10 "reg_or_0_operand")))
+]
+)
+(define_peephole2
+[
+(set (match_operand:DF 0 "register_operand")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "const0_operand")
+			    (match_operand:DF 3 "reg_or_0_operand")]))
+(set (match_operand:DF 4 "register_operand")
+	(match_operator:DF 5 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 6 "reg_or_0_operand")
+			    (match_operand:DF 7 "reg_or_0_operand")]))
+(set (match_operand:SFDF 8 "register_operand")
+	(if_then_else:SFDF
+	  (match_operand 9 "comparison_operator")
+	  (match_operand:SFDF 10 "reg_or_8bit_operand")
+	  (match_operand:SFDF 11 "reg_or_8bit_operand")))
+]
+"(GET_CODE (operands[1])==LE ||  GET_CODE (operands[1])==LT)
+       && GET_CODE (operands[5])==EQ && GET_CODE (operands[9])==NE && flag_sw_fselect
+"
+
+[
+(set (match_operand:SFDF 8 "reg_or_0_operand")
+	(if_then_else:SFDF
+	 (match_operator 1 "sw_64_fp_comparison_operator"
+			 [(match_operand:SFDF 3 "reg_or_0_operand")
+			  (match_operand:SFDF 2 "const0_operand")])
+	 (match_operand:SFDF 10 "reg_or_0_operand")
+	 (match_operand:SFDF 11 "reg_or_0_operand")))
+]
+)
+
+(define_peephole2
+[
+(set (match_operand:DF 0 "register_operand")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "register_operand")
+			    (match_operand:DF 3 "const0_operand")]))
+(set (match_operand:DF 4 "register_operand")
+	(match_operator:DF 5 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 6 "register_operand")
+			    (match_operand:DF 7 "const0_operand")]))
+(set (match_operand:SFDF 8 "register_operand")
+	(if_then_else:SFDF
+	  (match_operand 9 "comparison_operator")
+	  (match_operand:SFDF 10 "reg_or_8bit_operand")
+	  (match_operand:SFDF 11 "reg_or_8bit_operand")))
+]
+"GET_CODE (operands[1])==EQ  && GET_CODE (operands[5])==EQ &&
+       (GET_CODE (operands[9])==NE || GET_CODE (operands[9])==EQ)&&
+	       (operands[0] == operands[6]) && flag_sw_fselect"
+[
+(set (match_operand:SFDF 8 "reg_or_0_operand")
+	(if_then_else:SFDF
+	 (match_operator 9 "sw_64_fp_comparison_operator"
+			 [(match_operand:SFDF 2 "reg_or_0_operand")
+			  (match_operand:SFDF 3 "const0_operand")])
+	 (match_operand:SFDF 10 "reg_or_0_operand")
+	 (match_operand:SFDF 11 "reg_or_0_operand")))
+]
+)
+
+
+
+(define_insn "addsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
+	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ")
+		 (match_operand:SI 2 "add_operand" "rI,O,K,L")))]
+  ""
+  "@
+   addw %r1,%2,%0
+   subw %r1,%n2,%0
+   ldi %0,%2(%r1)
+   ldih %0,%h2(%r1)")
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(plus:SI (match_operand:SI 1 "register_operand")
+		 (match_operand:SI 2 "const_int_operand")))]
+  "! add_operand (operands[2], SImode)"
+  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
+  HOST_WIDE_INT rest = val - low;
+
+  operands[3] = GEN_INT (rest);
+  operands[4] = GEN_INT (low);
+})
+
+(define_insn "*addsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		  (match_operand:SI 2 "sext_add_operand" "rI,O"))))]
+  ""
+  "@
+   addw %r1,%2,%0
+   subw %r1,%n2,%0")
+
+(define_insn "*addsi_se2"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+			     (match_operand:DI 2 "sext_add_operand" "rI,O"))
+		    0)))]
+  ""
+  "@
+   addw %r1,%2,%0
+   subw %r1,%n2,%0")
+
+;; (plus:SI (ashift:SI (match_dup 3)-> (plus:SI (mult:SI (match_dup 3)
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	 (plus:SI (match_operand:SI 1 "reg_not_elim_operand")
+		  (match_operand:SI 2 "const_int_operand"))))
+   (clobber (match_operand:SI 3 "reg_not_elim_operand"))]
+  "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0
+   && INTVAL (operands[2]) % 4 == 0"
+  [(set (match_dup 3) (match_dup 4))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3)
+							  (match_dup 5))
+					       (match_dup 1))))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]) / 4;
+  int mult = 4;
+
+  if (val % 2 == 0)
+    val /= 2, mult = 8;
+
+  operands[4] = GEN_INT (val);
+  operands[5] = GEN_INT (mult);
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	 (plus:SI (match_operator:SI 1 "comparison_operator"
+				     [(match_operand 2)
+				      (match_operand 3)])
+		  (match_operand:SI 4 "add_operand"))))
+   (clobber (match_operand:DI 5 "register_operand"))]
+  ""
+  [(set (match_dup 5) (match_dup 6))
+   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))]
+{
+  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
+				operands[2], operands[3]);
+  operands[7] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_expand "adddi3"
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "add_operand")))])
+
+(define_insn "*adddi_er_lo16_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "dtp16_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "ldi %0,%2(%1)\t\t!dtprel")
+
+(define_insn "*adddi_er_hi32_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "dtp32_symbolic_operand"))))]
+  "HAVE_AS_TLS"
+  "ldih %0,%2(%1)\t\t!dtprelhi")
+
+(define_insn "*adddi_er_lo32_dtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "dtp32_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "ldi %0,%2(%1)\t\t!dtprello")
+
+(define_insn "*adddi_er_lo16_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "tp16_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "ldi %0,%2(%1)\t\t!tprel")
+
+(define_insn "*adddi_er_hi32_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "tp32_symbolic_operand"))))]
+  "HAVE_AS_TLS"
+  "ldih %0,%2(%1)\t\t!tprelhi")
+
+(define_insn "*adddi_er_lo32_tp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "tp32_symbolic_operand")))]
+  "HAVE_AS_TLS"
+  "ldi %0,%2(%1)\t\t!tprello")
+
+(define_insn "*adddi_er_high_l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (match_operand:DI 1 "register_operand" "r")
+		 (high:DI (match_operand:DI 2 "local_symbolic_operand"))))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  "ldih %0,%2(%1)\t\t!gprelhigh"
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(high:DI (match_operand:DI 1 "local_symbolic_operand")))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 2) (high:DI (match_dup 1))))]
+  "operands[2] = pic_offset_table_rtx;")
+
+;; We used to expend quite a lot of effort choosing addl/subl/ldi.
+;; With complications like
+;;
+;;   The NT stack unwind code can't handle a subl to adjust the stack
+;;   (that's a bug, but not one we can do anything about).  As of NT4.0 SP3,
+;;   the exception handling code will loop if a subl is used and an
+;;   exception occurs.
+;;
+;;   The 19980616 change to emit prologues as RTL also confused some
+;;   versions of GDB, which also interprets prologues.  This has been
+;;   fixed as of GDB 4.18, but it does not harm to unconditionally
+;;   use ldi here.
+;;
+;; and the fact that the three insns schedule exactly the same, it's
+;; just not worth the effort.
+
+(define_insn "*adddi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,r")
+		 (match_operand:DI 2 "add_operand" "r,K,L")))]
+  ""
+  "@
+   addl %1,%2,%0
+   ldi %0,%2(%1)
+   ldih %0,%h2(%1)")
+
+;; ??? Allow large constants when basing off the frame pointer or some
+;; virtual register that may eliminate to the frame pointer.  This is
+;; done because register elimination offsets will change the hi/lo split,
+;; and if we split before reload, we will require additional instructions.
+
+(define_insn "*adddi_fp_hack"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r")
+		 (match_operand:DI 2 "const_int_operand" "K,L,n")))]
+  "NONSTRICT_REG_OK_FP_BASE_P (operands[1])
+   && INTVAL (operands[2]) >= 0
+   /* This is the largest constant an ldi+ldih pair can add, minus
+      an upper bound on the displacement between SP and AP during
+      register elimination.  See INITIAL_ELIMINATION_OFFSET.  */
+   && INTVAL (operands[2])
+	< (0x7fff8000
+	   - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD
+	   - SW_64_ROUND (crtl->outgoing_args_size)
+	   - (SW_64_ROUND (get_frame_size ()
+			   + max_reg_num () * UNITS_PER_WORD
+			   + crtl->args.pretend_args_size)
+	      - crtl->args.pretend_args_size))"
+  "@
+   ldi %0,%2(%1)
+   ldih %0,%h2(%1)
+   #")
+
+;; Don't do this if we are adjusting SP since we don't want to do it
+;; in two steps.  Don't split FP sources for the reason listed above.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(plus:DI (match_operand:DI 1 "register_operand")
+		 (match_operand:DI 2 "const_int_operand")))]
+  "! add_operand (operands[2], DImode)
+   && operands[0] != stack_pointer_rtx
+   && operands[1] != frame_pointer_rtx
+   && operands[1] != arg_pointer_rtx"
+  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
+{
+  HOST_WIDE_INT val = INTVAL (operands[2]);
+  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
+  HOST_WIDE_INT rest = val - low;
+  rtx rest_rtx = GEN_INT (rest);
+
+  operands[4] = GEN_INT (low);
+  if (satisfies_constraint_L (rest_rtx))
+    operands[3] = rest_rtx;
+  else if (can_create_pseudo_p ())
+    {
+      operands[3] = gen_reg_rtx (DImode);
+      emit_move_insn (operands[3], operands[2]);
+      emit_insn (gen_adddi3 (operands[0], operands[1], operands[3]));
+      DONE;
+    }
+  else
+    FAIL;
+})
+
+; *sadd<monesuffix>->*saddl/*saddq
+(define_insn "*saddl"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI
+	 (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
+		       (match_operand:SI 2 "const48_operand" "I,I"))
+	 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
+  ""
+  "@
+   s%2addw %1,%3,%0
+   s%2subw %1,%n3,%0")
+
+(define_insn "*saddq"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI
+	 (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
+		       (match_operand:DI 2 "const48_operand" "I,I"))
+	 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
+  ""
+  "@
+   s%2addl %1,%3,%0
+   s%2subl %1,%n3,%0")
+
+(define_insn "*saddl_se"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(sign_extend:DI
+	 (plus:SI
+	  (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
+		     (match_operand:SI 2 "const48_operand" "I,I"))
+	 (match_operand:SI 3 "sext_add_operand" "rI,O"))))]
+  ""
+  "@
+   s%2addw %1,%3,%0
+   s%2subw %1,%n3,%0")
+
+(define_insn "*sxaddw"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(plus:SI
+	 (subreg:SI
+	  (ashift:DI
+	   (subreg:DI (match_operand:SI 1 "reg_not_elim_operand" "r,r") 0)
+	   (match_operand:DI 2 "const_int_operand" "I,I"))
+	  0)
+	 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
+  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
+  {
+  switch (which_alternative)
+    {
+    case 0:
+      if (INTVAL (operands[2]) == 3)
+	return "s8addw %1,%3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4addw %1,%3,%0";
+    case 1:
+      if (INTVAL (operands[2]) == 3)
+	return "s8subw %1,%n3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4subw %1,%n3,%0";
+    default:
+      gcc_unreachable ();
+    }
+  })
+
+(define_insn "*sxsubw"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(minus:SI
+	 (subreg:SI
+	  (ashift:DI
+	   (subreg:DI (match_operand:SI 1 "reg_not_elim_operand" "r,r") 0)
+	   (match_operand:DI 2 "const_int_operand" "I,I"))
+	  0)
+	 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
+  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
+  {
+  switch (which_alternative)
+    {
+    case 0:
+      if (INTVAL (operands[2]) == 3)
+	return "s8subw %1,%3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4subw %1,%3,%0";
+    case 1:
+      if (INTVAL (operands[2]) == 3)
+	return "s8addw %1,%n3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4addw %1,%n3,%0";
+    default:
+      gcc_unreachable ();
+    }
+  })
+
+(define_insn "*sxaddl"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(plus:DI
+	 (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
+		       (match_operand:DI 2 "const_int_operand" "I,I"))
+	 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
+  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
+  {
+  switch (which_alternative)
+    {
+    case 0:
+      if (INTVAL (operands[2]) == 3)
+	return "s8addl %1,%3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4addl %1,%3,%0";
+    case 1:
+      if (INTVAL (operands[2]) == 3)
+	return "s8subl %1,%n3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4subl %1,%n3,%0";
+    default:
+      gcc_unreachable ();
+    }
+  })
+
+(define_insn "*sxsubl"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(minus:DI
+	 (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
+		       (match_operand:DI 2 "const_int_operand" "I,I"))
+	 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
+  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
+  {
+  switch (which_alternative)
+    {
+    case 0:
+      if (INTVAL (operands[2]) == 3)
+	return "s8subl %1,%3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4subl %1,%3,%0";
+    case 1:
+      if (INTVAL (operands[2]) == 3)
+	return "s8addl %1,%n3,%0";
+      if (INTVAL (operands[2]) == 2)
+	return "s4addl %1,%n3,%0";
+    default:
+      gcc_unreachable ();
+    }
+  })
+
+
+;; plus:SI (ashift:SI -> plus:SI (mult:SI
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI
+	 (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator"
+					      [(match_operand 2)
+					       (match_operand 3)])
+			   (match_operand:SI 4 "const48_operand"))
+		  (match_operand:SI 5 "sext_add_operand"))))
+   (clobber (match_operand:DI 6 "reg_not_elim_operand"))]
+  ""
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4))
+				 (match_dup 5))))]
+{
+  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
+				operands[2], operands[3]);
+  operands[8] = gen_lowpart (SImode, operands[6]);
+})
+
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(neg:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "sub<modesuffix> $31,%1,%0")
+
+(define_insn "*negsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (neg:SI
+			 (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "subw $31,%1,%0")
+
+(define_insn "sub<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ")
+		       (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "sub<modesuffix> %r1,%2,%0")
+
+(define_insn "*subsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "subw %r1,%2,%0")
+
+(define_insn "*subsi_se2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (match_operand:DI 2 "reg_or_8bit_operand" "rI"))
+		    0)))]
+  ""
+  "subw %r1,%2,%0")
+
+(define_insn "*ssubl"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI
+	 (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
+		       (match_operand:SI 2 "const48_operand" "I"))
+		  (match_operand:SI 3 "reg_or_8bit_operand" "rI")))]
+  ""
+  "s%2subw %1,%3,%0")
+
+(define_insn "*ssubq"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI
+	 (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r")
+		       (match_operand:DI 2 "const48_operand" "I"))
+		  (match_operand:DI 3 "reg_or_8bit_operand" "rI")))]
+  ""
+  "s%2subl %1,%3,%0")
+
+;;"s%P2subw %1,%3,%0"
+(define_insn "*ssubl_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (minus:SI
+	  (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
+		     (match_operand:SI 2 "const48_operand" "I"))
+	 (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "s%2subw %1,%3,%0")
+
+
+(define_insn "mul<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ")
+		      (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))]
+  ""
+  "mul<modesuffix> %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "<mode>")])
+
+(define_insn "*mulsi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
+		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
+  ""
+  "mulw %r1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "si")])
+
+(define_expand "umuldi3_highpart"
+  [(set (match_operand:DI 0 "register_operand")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand"))
+		   (match_operand:DI 2 "reg_or_8bit_operand"))
+	  (const_int 64))))]
+  ""
+{
+  if (REG_P (operands[2]))
+    operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]);
+})
+
+(define_insn "*umuldi3_highpart_reg"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI
+		     (match_operand:DI 1 "register_operand" "r"))
+		   (zero_extend:TI
+		     (match_operand:DI 2 "register_operand" "r")))
+	  (const_int 64))))]
+  ""
+  "umulh %1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "udi")])
+
+(define_insn "*umuldi3_highpart_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(truncate:DI
+	 (lshiftrt:TI
+	  (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r"))
+		   (match_operand:TI 2 "cint8_operand" "I"))
+	  (const_int 64))))]
+  ""
+  "umulh %1,%2,%0"
+  [(set_attr "type" "imul")
+   (set_attr "opsize" "udi")])
+
+(define_expand "umulditi3"
+  [(set (match_operand:TI 0 "register_operand")
+       (mult:TI
+	 (zero_extend:TI (match_operand:DI 1 "reg_no_subreg_operand"))
+	 (zero_extend:TI (match_operand:DI 2 "reg_no_subreg_operand"))))]
+  ""
+{
+  rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode);
+  emit_insn (gen_muldi3 (l, operands[1], operands[2]));
+  emit_insn (gen_umuldi3_highpart (h, operands[1], operands[2]));
+  emit_move_insn (gen_lowpart (DImode, operands[0]), l);
+  emit_move_insn (gen_highpart (DImode, operands[0]), h);
+  DONE;
+})
+
+;; The divide and remainder operations take their inputs from r24 and
+;; r25, put their output in r27, and clobber r23 and r28 on all systems.
+;;
+;; ??? Force sign-extension here because some versions of SYSV and
+;; Interix/NT don't do the right thing if the inputs are not properly
+;; sign-extended.  But Linux, for instance, does not have this
+;; problem.  Is it worth the complication here to eliminate the sign
+;; extension?
+
+(define_code_iterator any_divmod [div mod udiv umod])
+
+(define_expand "<code>si3"
+  [(set (match_dup 3)
+	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))
+   (set (match_dup 4)
+	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
+   (parallel [(set (match_dup 5)
+		   (sign_extend:DI
+		    (any_divmod:SI (match_dup 3) (match_dup 4))))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])
+   (set (match_operand:SI 0 "nonimmediate_operand")
+	(subreg:SI (match_dup 5) 0))]
+  ""
+{
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+  operands[5] = gen_reg_rtx (DImode);
+})
+
+(define_expand "<code>di3"
+  [(parallel [(set (match_operand:DI 0 "register_operand")
+		   (any_divmod:DI
+		    (match_operand:DI 1 "register_operand")
+		    (match_operand:DI 2 "register_operand")))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+  "")
+
+(define_insn "int_div_use_float_si"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+    (clobber (reg:DF 55))
+    (clobber (reg:DF 56))
+    (clobber (reg:DF 60))]
+     "flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==DIV)"
+       "ifmovd %1,$f23
+	fcvtld $f23,$f28
+	fcpys  $f28,$f28,$f23
+	ifmovd %2,$f24
+	fcvtld $f24,$f28
+	fdivd $f23,$f28,$f24
+	fcvtdl_z $f24,$f23
+	fimovd $f23,%0"
+    [(set_attr "type" "fdiv")])
+
+(define_insn "int_divu_use_float_si"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+    (clobber (reg:DF 55))
+    (clobber (reg:DF 56))
+    (clobber (reg:DF 60))]
+     "flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==UDIV)"
+       "zap %1,240,%1
+	zap %2,240,%2
+	ifmovd %1,$f23
+	fcvtld $f23,$f28
+	fcpys  $f28,$f28,$f23
+	ifmovd %2,$f24
+	fcvtld $f24,$f28
+	fdivd $f23,$f28,$f24
+	fcvtdl_z $f24,$f23
+	fimovd $f23,%0"
+     [(set_attr "type" "fdiv")])
+
+(define_insn "int_rem_use_float_si"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+    (clobber (reg:DF 54))
+    (clobber (reg:DF 55))
+    (clobber (reg:DF 56))
+    (clobber (reg:DF 60))]
+     "flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==MOD)"
+     "ifmovd %1,$f24
+	fcvtld $f24,$f28
+	fcpys $f28,$f28,$f24
+	ifmovd %2,$f23
+	fcvtld $f23,$f28
+	fdivd $f24,$f28,$f22
+	fcvtdl_z $f22,$f23
+	fcvtld $f23,$f22
+	fnmad $f22,$f28,$f24,$f23
+	fcvtdl_z $f23,$f22
+	fimovd $f22,%0"
+    [(set_attr "type" "fdiv")])
+
+(define_insn "int_remu_use_float_si"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+    (clobber (reg:DF 54))
+    (clobber (reg:DF 55))
+    (clobber (reg:DF 56))
+    (clobber (reg:DF 60))]
+     "flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==UMOD)"
+     "zap %1,240,%1
+	zap %2,240,%2
+	ifmovd %1,$f22
+	fcvtld $f22,$f24
+	ifmovd %2,$f22
+	fcvtld $f22,$f28
+	fdivd $f24,$f28,$f23
+	fcvtdl_z $f23,$f22
+	fcvtld $f22,$f23
+	fnmad $f23,$f28,$f24,$f22
+	fcvtdl_z $f22,$f23
+	fimovd $f23,%0"
+     [(set_attr "type" "fdiv")])
+
+
+(define_insn_and_split "*divmodsi_internal_er"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+		   (sign_extend:DI (match_dup 3)))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+{
+  if (flag_sw_int_div_opt)
+  {
+    const char *str;
+    operands[4] = GEN_INT (sw_64_next_sequence_number++);
+    switch (GET_CODE (operands[3]))
+      {
+      case DIV:
+	emit_insn (gen_int_div_use_float_si (operands[0], operands[1], operands[2], operands[3]));
+	break;
+      case UDIV:
+	emit_insn (gen_int_divu_use_float_si (operands[0], operands[1], operands[2], operands[3]));
+	break;
+      case MOD:
+	emit_insn (gen_int_rem_use_float_si (operands[0], operands[1], operands[2], operands[3]));
+	break;
+      case UMOD:
+	emit_insn (gen_int_remu_use_float_si (operands[0], operands[1], operands[2], operands[3]));
+	break;
+      default:
+	gcc_unreachable ();
+      }
+  }
+  else
+  {
+  const char *str;
+  switch (GET_CODE (operands[3]))
+    {
+  case DIV:
+      str = "__divw";
+      break;
+    case UDIV:
+      str = "__divwu";
+      break;
+    case MOD:
+      str = "__remw";
+      break;
+    case UMOD:
+      str = "__remwu";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[4] = GEN_INT (sw_64_next_sequence_number++);
+  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
+				  gen_rtx_SYMBOL_REF (DImode, str),
+				  operands[4]));
+  }
+}
+  [(set_attr "type" "call")
+   (set_attr "length" "8")])
+
+(define_insn "*divmodsi_internal_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")])))
+   (use (match_operand:DI 4 "register_operand" "c"))
+   (use (match_operand 5 "const_int_operand"))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
+  {
+      if (flag_sw_int_div_opt)
+      {
+	 switch (GET_CODE (operands[3]))
+	 {
+	    case DIV:
+	    case UDIV:
+	    case MOD:
+	    case UMOD:
+	    return "";
+	 }
+      }
+      else
+      {
+	 return "call $23,($27),__%E3%j5";
+      }
+  }
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*divmodsi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "register_operand" "r")])))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_SW8A && flag_sw_int_divmod"
+  {
+    switch (GET_CODE (operands[3]))
+    {
+      case DIV: return "divw %1,%2,%0";
+      case UDIV: return "udivw %1,%2,%0";
+      case MOD: return "remw %1,%2,%0";
+      case UMOD: return "uremw %1,%2,%0";
+    }
+  }
+  [(set_attr "length" "4")])
+
+(define_insn "int_div_use_float_di"
+[(set (match_operand:DI 0 "register_operand" "=c")
+      (match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+	    (use (match_operand:DI 4 "register_operand" "r"))
+	    (use (match_operand:DI 5 "symbolic_operand"))
+	    (use (match_operand 6 "const_int_operand"))
+	    (use (label_ref:DI (match_operand 7)))
+	    (use (label_ref:DI (match_operand 8)))
+   (clobber (reg:DF 55))
+   (clobber (reg:DI 27))
+   (clobber (reg:DI 28))
+   (clobber (reg:DF 59))
+   (clobber (reg:DF 60))]
+     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1 &&(GET_CODE (operands[3])==DIV)"
+	"srl %1,52,$28
+	srl %2,52,$27
+	bis $28,$27,$28
+	bne $28,%l7
+	ifmovd %1,$f23
+	fcvtld $f23,$f27
+	ifmovd %2,$f28
+	fcvtld $f28,$f23
+	fdivd $f27,$f23,$f28
+	fcvtdl_z $f28,$f23
+	fimovd $f23,%0
+	br %l8
+%l7:
+	ldl %0,%5(%4)\t\t!literal!%6
+	call $23,($27),__%E3%j6
+%l8:"
+    [(set_attr "cannot_copy" "true")
+    (set_attr "type" "fdiv")])
+
+(define_insn "int_divu_use_float_di"
+[(set (match_operand:DI 0 "register_operand" "=c")
+      (match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+	    (use (match_operand:DI 4 "register_operand" "r"))
+	    (use (match_operand:DI 5 "symbolic_operand"))
+	    (use (match_operand 6 "const_int_operand"))
+	    (use (label_ref:DI (match_operand 7)))
+	    (use (label_ref:DI (match_operand 8)))
+   (clobber (reg:DF 55))
+   (clobber (reg:DI 27))
+   (clobber (reg:DI 28))
+   (clobber (reg:DF 59))
+   (clobber (reg:DF 60))]
+     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==UDIV)"
+       "srl %1,52,$28
+	srl %2,52,$27
+	bis $28,$27,$28
+	bne $28,%l7
+	ifmovd %1,$f23
+	fcvtld $f23,$f27
+	ifmovd %2,$f28
+	fcvtld $f28,$f23
+	fdivd $f27,$f23,$f28
+	fcvtdl_z $f28,$f23
+	fimovd $f23,%0
+	br %l8
+%l7:
+       ldl %0,%5(%4)\t\t!literal!%6
+       call $23,($27),__%E3%j6
+%l8:"
+     [(set_attr "cannot_copy" "true")
+     (set_attr "type" "fdiv")])
+
+(define_insn "int_rem_use_float_di"
+[(set (match_operand:DI 0 "register_operand" "=c")
+      (match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+	    (use (match_operand:DI 4 "register_operand" "r"))
+	    (use (match_operand:DI 5 "symbolic_operand"))
+	    (use (match_operand 6 "const_int_operand"))
+	    (use (label_ref:DI (match_operand 7)))
+	    (use (label_ref:DI (match_operand 8)))
+   (clobber (reg:DF 54))
+   (clobber (reg:DF 55))
+   (clobber (reg:DI 27))
+   (clobber (reg:DI 28))
+   (clobber (reg:DF 56))
+   (clobber (reg:DF 60))]
+     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==MOD)"
+     "srl %1,52,$28
+	srl %2,52,$27
+	bis $28,$27,$28
+	bne $28,%l7
+	ifmovd %1,$f22
+	fcvtld $f22,$f24
+	ifmovd %2,$f22
+	fcvtld $f22,$f28
+	fdivd $f24,$f28,$f22
+	fcvtdl_z $f22,$f23
+	fcvtld $f23,$f22
+	fnmad $f22,$f28,$f24,$f23
+	fcvtdl_z $f23,$f22
+	fimovd $f22,%0
+	br  %l8
+%l7:
+       ldl %0,%5(%4)\t\t!literal!%6
+       call $23,($27),__%E3%j6
+%l8:"
+    [(set_attr "cannot_copy" "true")
+    (set_attr "type" "fdiv")])
+
+(define_insn "int_remu_use_float_di"
+[(set (match_operand:DI 0 "register_operand" "=c")
+      (match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+	    (use (match_operand:DI 4 "register_operand" "r"))
+	    (use (match_operand:DI 5 "symbolic_operand"))
+	    (use (match_operand 6 "const_int_operand"))
+	    (use (label_ref:DI (match_operand 7)))
+	    (use (label_ref:DI (match_operand 8)))
+   (clobber (reg:DF 54))
+   (clobber (reg:DF 55))
+   (clobber (reg:DI 27))
+   (clobber (reg:DI 28))
+   (clobber (reg:DF 56))
+   (clobber (reg:DF 60))]
+     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1
+      &&(GET_CODE (operands[3])==UMOD)"
+     "  srl %1,52,$28
+	srl %2,52,$27
+	bis $28,$27,$28
+	bne $28,%l7
+	ifmovd %1,$f22
+	fcvtld $f22,$f24
+	ifmovd %2,$f22
+	fcvtld $f22,$f28
+	fdivd $f24,$f28,$f23
+	fcvtdl_z $f23,$f22
+	fcvtld $f22,$f23
+	fnmad $f23,$f28,$f24,$f22
+	fcvtdl_z $f22,$f23
+	fimovd $f23,%0
+	br  %l8
+%l7:
+       ldl %0,%5(%4)\t\t!literal!%6
+       call $23,($27),__%E3%j6
+%l8:"
+    [(set_attr "cannot_copy" "true")
+    (set_attr "type" "fdiv")])
+
+(define_insn_and_split "*divmoddi_internal_er"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0) (match_dup 3))
+	      (use (match_dup 0))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 23))
+	      (clobber (reg:DI 28))])]
+{
+  if (flag_sw_int_div_opt)
+  {
+    const char *str;
+    operands[4] = GEN_INT (sw_64_next_sequence_number++);
+    operands[7] = gen_label_rtx ();
+    operands[8] = gen_label_rtx ();
+    switch (GET_CODE (operands[3]))
+      {
+      case DIV:
+	str = "__divl";
+	emit_insn (gen_int_div_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
+	break;
+      case UDIV:
+	str = "__divlu";
+	emit_insn (gen_int_divu_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
+	break;
+      case MOD:
+	str = "__reml";
+	emit_insn (gen_int_rem_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
+	break;
+   case UMOD:
+	str = "__remlu";
+	emit_insn (gen_int_remu_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
+	break;
+      default:
+	gcc_unreachable ();
+      }
+  }
+  else
+  {
+  const char *str;
+  switch (GET_CODE (operands[3]))
+    {
+    case DIV:
+      str = "__divl";
+      break;
+    case UDIV:
+      str = "__divlu";
+      break;
+    case MOD:
+      str = "__reml";
+      break;
+    case UMOD:
+      str = "__remlu";
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  operands[4] = GEN_INT (sw_64_next_sequence_number++);
+  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
+				  gen_rtx_SYMBOL_REF (DImode, str),
+				  operands[4]));
+  }
+}
+  [(set_attr "type" "call")
+   (set_attr "length" "8")])
+
+(define_insn "*divmoddi_internal_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=c")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "a")
+			 (match_operand:DI 2 "register_operand" "b")]))
+   (use (match_operand:DI 4 "register_operand" "c"))
+   (use (match_operand 5 "const_int_operand"))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
+   {
+    if (flag_sw_int_div_opt)
+    {
+      switch (GET_CODE (operands[3]))
+      {
+	case DIV:
+	case UDIV:
+	case MOD:
+	case UMOD:
+	return "";
+      }
+    }
+    else
+    {
+      return "call $23,($27),__%E3%j5";
+    }
+   }
+  [(set_attr "type" "call")
+   (set_attr "length" "4")])
+
+(define_insn "*divmoddi_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operator:DI 3 "divmod_operator"
+			[(match_operand:DI 1 "register_operand" "r")
+			 (match_operand:DI 2 "register_operand" "r")]))
+   (clobber (reg:DI 23))
+   (clobber (reg:DI 28))]
+  "TARGET_SW8A && flag_sw_int_divmod"
+  {
+    switch (GET_CODE (operands[3]))
+    {
+      case DIV: return "divl %1,%2,%0";
+      case UDIV: return "udivl %1,%2,%0";
+      case MOD: return "reml %1,%2,%0";
+      case UMOD: return "ureml %1,%2,%0";
+    }
+  }
+  [(set_attr "length" "4")])
+
+;; Next are the basic logical operations.  We only expose the DImode operations
+;; to the rtl expanders, but SImode versions exist for combine as well as for
+;; the atomic operation splitters.
+
+(define_insn "*andsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
+	(and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
+		(match_operand:SI 2 "and_operand" "rI,N,M")))]
+  ""
+  "@
+   and %r1,%2,%0
+   bic %r1,%N2,%0
+   zapnot %r1,%m2,%0"
+  [(set_attr "type" "ilog,ilog,shift")])
+
+(define_insn "anddi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
+	(and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
+		(match_operand:DI 2 "and_operand" "rI,N,M")))]
+  ""
+  "@
+   and %r1,%2,%0
+   bic %r1,%N2,%0
+   zapnot %r1,%m2,%0"
+  [(set_attr "type" "ilog,ilog,shift")])
+
+;; There are times when we can split an AND into two AND insns.  This occurs
+;; when we can first clear any bytes and then clear anything else.  For
+;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07".
+;; Only do this when running on 64-bit host since the computations are
+;; too messy otherwise.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (match_operand:DI 1 "register_operand")
+		(match_operand:DI 2 "const_int_operand")))]
+  "! and_operand (operands[2], DImode)"
+  [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
+{
+  unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]);
+  unsigned HOST_WIDE_INT mask2 = mask1;
+  int i;
+
+  /* For each byte that isn't all zeros, make it all ones.  */
+  for (i = 0; i < 64; i += 8)
+    if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0)
+      mask1 |= (HOST_WIDE_INT) 0xff << i;
+
+  /* Now turn on any bits we've just turned off.  */
+  mask2 |= ~ mask1;
+
+  operands[3] = GEN_INT (mask1);
+  operands[4] = GEN_INT (mask2);
+})
+
+(define_insn "zero_extendqi<mode>2"
+  [(set (match_operand:I248MODE 0 "register_operand" "=r,r")
+	(zero_extend:I248MODE
+	  (match_operand:QI 1 "reg_or_bwx_memory_operand" "r,m")))]
+  ""
+  "@
+   and %1,0xff,%0
+   ldbu%U1 %0,%1"
+  [(set_attr "type" "ilog,ild")
+   (set_attr "isa" "*,bwx")])
+
+(define_insn "zero_extendhi<mode>2"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
+	(zero_extend:I48MODE
+	  (match_operand:HI 1 "reg_or_bwx_memory_operand" "r,m")))]
+  ""
+  "@
+   zapnot %1,3,%0
+   ldhu%U1 %0,%1"
+  [(set_attr "type" "shift,ild")
+   (set_attr "isa" "*,bwx")])
+
+(define_insn "zero_extendsidi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "zapnot %1,15,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "andnot<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(and:I48MODE
+	 (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI"))
+	 (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "bic %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iorsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:SI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   bis %r1,%2,%0
+   ornot %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "iordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:DI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   bis %r1,%2,%0
+   ornot %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*one_cmplsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "one_cmpldi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornot<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(ior:I48MODE
+	 (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI"))
+	 (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))]
+  ""
+  "ornot %r2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xorsi_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:SI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   xor %r1,%2,%0
+   eqv %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "xordi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
+		(match_operand:DI 2 "or_operand" "rI,N")))]
+  ""
+  "@
+   xor %r1,%2,%0
+   eqv %r1,%N2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornot<mode>3"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(not:I48MODE (xor:I48MODE
+		      (match_operand:I48MODE 1 "register_operand" "%rJ")
+		      (match_operand:I48MODE 2 "register_operand" "rI"))))]
+  ""
+  "eqv %r1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+;; Handle FFS and related insns iff we support CIX.
+
+(define_expand "ffsdi2"
+  [(set (match_dup 2)
+	(ctz:DI (match_operand:DI 1 "register_operand")))
+   (set (match_dup 3)
+	(plus:DI (match_dup 2) (const_int 1)))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (eq (match_dup 1) (const_int 0))
+			 (const_int 0) (match_dup 3)))]
+  ""
+{
+  operands[2] = gen_reg_rtx (DImode);
+  operands[3] = gen_reg_rtx (DImode);
+})
+
+(define_insn "clzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(clz:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "ctlz %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ctz:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "cttz %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "popcountdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(popcount:DI (match_operand:DI 1 "register_operand" "r")))]
+  ""
+  "ctpop %1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "popcountsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(popcount:SI (match_operand:SI 1 "register_operand" "r")))]
+  ""
+  "zapnot %1,15,%0\;ctpop %0,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "bswapsi2"
+  [(set (match_operand:SI 0 "register_operand")
+	(bswap:SI (match_operand:SI 1 "register_operand")))]
+  "!optimize_size"
+{
+  if (TARGET_SW8A == 0 || flag_sw_rev != 1)
+   {
+  rtx t0, t1;
+
+  t0 = gen_reg_rtx (DImode);
+  t1 = gen_reg_rtx (DImode);
+
+  emit_insn (gen_inslh (t0, gen_lowpart (DImode, operands[1]), GEN_INT (7)));
+  emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]),
+			      GEN_INT (24)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
+  emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x5)));
+  emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xa)));
+  emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0),
+			 gen_lowpart (SImode, t1)));
+  DONE;
+   }
+  else
+   {
+      emit_insn (gen_bswapsi2_internal (operands[0], operands[1]));
+      DONE;
+   }
+})
+
+(define_expand "bswapdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(bswap:DI (match_operand:DI 1 "register_operand")))]
+  "!optimize_size"
+{
+  if (TARGET_SW8A == 0 || flag_sw_rev != 1)
+   {
+  rtx t0, t1;
+
+  t0 = gen_reg_rtx (DImode);
+  t1 = gen_reg_rtx (DImode);
+
+  /* This method of shifting and masking is not specific to Sw_64, but
+     is only profitable on Sw_64 because of our handy byte zap insn.  */
+
+  emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32)));
+  emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
+  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16)));
+  emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xcc)));
+  emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x33)));
+  emit_insn (gen_iordi3 (t1, t0, t1));
+
+  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8)));
+  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8)));
+  emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xaa)));
+  emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x55)));
+  emit_insn (gen_iordi3 (operands[0], t0, t1));
+  DONE;
+ }
+ else
+   {
+     emit_insn (gen_bswapdi2_internal (operands[0], operands[1]));
+     DONE;
+   }
+})
+
+(define_insn "bswaphi2"
+  [(set (match_operand:HI 0 "register_operand" "=r")
+	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
+  "TARGET_SW8A && flag_sw_rev == 1"
+  "revbh %1,%0"
+  [(set_attr "isa" "sw8a")])
+
+(define_insn "bswapsi2_internal"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bswap:SI (match_operand:SI 1 "register_operand" "r")))]
+  "TARGET_SW8A && flag_sw_rev == 1"
+  "revbw %1,%0"
+  [(set_attr "isa" "sw8a")])
+
+(define_insn "bswapdi2_internal"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(bswap:DI (match_operand:DI 1 "register_operand" "r")))]
+  "TARGET_SW8A && flag_sw_rev == 1"
+  "revbl %1,%0"
+  [(set_attr "isa" "sw8a")])
+
+(define_insn "l<frint_pattern>dfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(fix:DI
+	(unspec:DF [(match_operand:DF 1 "register_operand" "fG")]
+	FRINT)))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmov%-l<frint_suffix> %1, %0"
+  [(set_attr "type" "frint")])
+
+(define_insn "fix_truncdfdi2_8a"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&r,&r")
+	(fix:DI
+	(match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
+ "cmov%-l%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "l<frint_pattern>udfdi2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unsigned_fix:DI
+	(unspec:DF [(match_operand:DF 1 "register_operand" "fG")]
+	FRINT)))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmov%-lu<frint_suffix> %1, %0"
+  [(set_attr "type" "frint")])
+
+(define_insn "fixuns_truncdfdi2_internal"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&r,&r")
+	(unsigned_fix:DI
+	  (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
+  "cmov%-lu%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "i<frint_pattern>dfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(fix:SI
+	(unspec:DF [(match_operand:DF 1 "register_operand" "fG")]
+	FRINT)))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmov%-w<frint_suffix> %1, %0"
+  [(set_attr "type" "frint")])
+
+;; CMOVDW_Z PART1
+(define_insn "fix_truncdfsi2_8a"
+  [(set (match_operand:SI 0 "reg_no_subreg_operand" "=&r,&r")
+	(fix:SI
+	  (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
+  "cmov%-w%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+;; CMOVDW_Z PART2
+(define_expand "fix_truncdfsi2"
+  [(set (match_operand:SI 0 "reg_no_subreg_operand")
+	(fix:SI (match_operand:DF 1 "reg_or_0_operand")))]
+  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1")
+
+(define_insn "i<frint_pattern>udfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=&r,&r")
+	(unsigned_fix:SI
+	(unspec:DF [(match_operand:DF 1 "register_operand" "fG,fG")]
+	FRINT)))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmov%-wu<frint_suffix> %1, %0"
+  [(set_attr "type" "frint")])
+
+;; CMOVDWU_Z PART1
+(define_insn "*fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "reg_no_subreg_operand" "=&r,&r")
+	(unsigned_fix:SI
+	  (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
+  "cmov%-wu%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+;; CMOVDWU_Z PART2
+(define_expand "fixuns_truncdfsi2"
+  [(set (match_operand:SI 0 "reg_no_subreg_operand")
+	(unsigned_fix:SI (match_operand:DF 1 "reg_or_0_operand")))]
+  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1")
+
+(define_insn "floatdisf2_8a"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovls %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatunsdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(unsigned_float:SF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovuls %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+  (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(float:SF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovws %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatunssisf2"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(unsigned_float:SF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovuws %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatdidf2_8a"
+  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1 && TARGET_FP"
+  "cmovl%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatunsdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
+	(unsigned_float:DF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovuld %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
+	(float:DF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovwd %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "floatunssidf2"
+  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
+	(unsigned_float:DF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
+  "TARGET_SW8A && flag_sw_cmov == 1"
+  "cmovuwd %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "builtin_sbt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (unspec:DI
+		       [(match_operand:DI 2 "reg_or_6bit_operand" "rI")]
+			UNSPEC_SBT)
+		(match_operand:DI 1 "register_operand" "r")))]
+  "flag_sw_bitop"
+  "sbt %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "builtin_cbt"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (unspec:DI
+			[(match_operand:DI 2 "reg_or_6bit_operand" "rI")]
+			 UNSPEC_CBT)
+		(match_operand:DI 1 "register_operand" "r")))]
+  "flag_sw_bitop"
+  "cbt %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "lshrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(lshiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:SI 2 "reg_or_5bit_operand" "rY")))]
+  "TARGET_SW8A && flag_sw_shift_word == 1"
+  "srlw %r1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "isa" "sw8a")])
+
+(define_insn "ashrsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(ashiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:SI 2 "reg_or_5bit_operand" "rY")))]
+  "TARGET_SW8A && flag_sw_shift_word == 1"
+  "sraw %r1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "isa" "sw8a")])
+
+(define_insn "rotlsi3"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(subreg:SI
+	(zero_extend:DI (rotate:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
+				   (match_operand:SI 2 "reg_or_5bit_operand" "rY"))) 0))]
+  "TARGET_SW8A && flag_sw_shift_word == 1"
+  "rolw %r1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "isa" "sw8a")])
+
+(define_insn "rotldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(rotate:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		   (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  "TARGET_SW8A && flag_sw_shift_word == 1"
+  "roll %r1,%2,%0"
+  [(set_attr "type" "shift")
+   (set_attr "isa" "sw8a")])
+
+;; Next come the shifts and the various extract and insert operations.
+
+(define_insn "ashldi3"
+  [(set (match_operand:DI 0 "register_operand" "=r,r")
+	(ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ")
+		   (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))]
+  ""
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (operands[2] == const1_rtx)
+	return "addl %r1,%r1,%0";
+      else
+      return "sll %r1,%2,%0";
+    case 1:
+     if (TARGET_SW8A == 0 || flag_sw_shift_word != 1)
+	return "sll %r1,%2,%0";
+      else
+	return "slll %r1,%2,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "iadd,shift")])
+
+(define_expand "ashlsi3"
+  [(set (match_operand:SI 0 "register_operand")
+	(ashift:SI (match_operand:SI 1 "reg_or_0_operand")
+		   (match_operand:SI 2 "reg_or_5bit_operand")))])
+
+(define_insn "*ashlsi3_sll"
+  [(set (match_operand:SI 0 "register_operand" "=r,&r")
+	(ashift:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,rJ")
+		   (match_operand:SI 2 "reg_or_5bit_operand" "P,rS")))]
+  "TARGET_SW8A == 0 || flag_sw_shift_word != 1"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (operands[2] == const1_rtx)
+	return "addw %r1,%r1,%0";
+      else
+	return "s%P2addw %r1,0,%0";
+    case 1:
+      if (REG_P (operands[2]))
+	return "and %2,31,%0\;sll %r1,%0,%0";
+      else
+	return "sll %r1,%2,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "iadd,shift")])
+
+(define_insn "*ashlsi3_sllw"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(ashift:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,rJ")
+		   (match_operand:SI 2 "reg_or_5bit_operand" "P,rY")))]
+  "TARGET_SW8A && flag_sw_shift_word == 1"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      if (operands[2] == const1_rtx)
+	return "addw %r1,%r1,%0";
+      else
+	return "s%P2addw %r1,0,%0";
+    case 1:
+      return "sllw %r1,%2,%0";
+    default:
+      gcc_unreachable ();
+    }
+}
+  [(set_attr "type" "iadd,shift")
+   (set_attr "isa" "*,sw8a")])
+
+(define_insn "*ashldi_se"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	 (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			       (match_operand:DI 2 "const_int_operand" "P"))
+		    0)))]
+  "IN_RANGE (INTVAL (operands[2]), 1, 3)"
+{
+  if (operands[2] == const1_rtx)
+    return "addw %r1,%r1,%0";
+  else
+    return "s%P2addw %r1,0,%0";
+}
+  [(set_attr "type" "iadd")])
+
+(define_insn "lshrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  ""
+{
+  if (TARGET_SW8A == 0 || flag_sw_shift_word != 1)
+    return "srl %r1,%2,%0";
+  else
+    return "srll %r1,%2,%0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn "ashrdi3"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
+  ""
+{
+  if (TARGET_SW8A == 0 || flag_sw_shift_word != 1)
+    return "sra %r1,%2,%0";
+  else
+    return "sral %r1,%2,%0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn "extendqi<mode>2"
+  [(set (match_operand:I24MODE 0 "register_operand" "=r")
+	(sign_extend:I24MODE
+	 (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "extendqidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:QI 1 "general_operand")))]
+  ""
+{
+    operands[1] = force_reg (QImode, operands[1]);
+})
+
+(define_insn "*extendqidi2_bwx"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "sextb %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extendhisi2"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "sexth %1,%0"
+  [(set_attr "type" "shift")])
+
+(define_expand "extendhidi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extend:DI (match_operand:HI 1 "general_operand")))]
+  ""
+{
+    operands[1] = force_reg (HImode, operands[1]);
+})
+
+(define_insn "*extendhidi2_bwx"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
+  ""
+  "sexth %1,%0"
+  [(set_attr "type" "shift")])
+
+;; Here's how we sign extend an unaligned byte and halfword.  Doing this
+;; as a pattern saves one instruction.  The code is similar to that for
+;; the unaligned loads (see below).
+;;
+;; Operand 1 is the address, operand 0 is the result.
+
+(define_expand "unaligned_extendqidi"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (minus:DI (const_int 64)
+			     (ashift:DI
+			      (and:DI (match_dup 2) (const_int 7))
+			      (const_int 3)))))
+   (set (match_operand:QI 0 "register_operand")
+	(ashiftrt:DI (match_dup 4) (const_int 56)))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[2] = get_unaligned_offset (operands[1], 1);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+(define_expand "unaligned_extendhidi"
+  [(set (match_dup 3)
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8))))
+   (set (match_dup 4)
+	(ashift:DI (match_dup 3)
+		   (minus:DI (const_int 64)
+			     (ashift:DI
+			      (and:DI (match_dup 2) (const_int 7))
+			      (const_int 3)))))
+   (set (match_operand:HI 0 "register_operand")
+	(ashiftrt:DI (match_dup 4) (const_int 48)))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[2] = get_unaligned_offset (operands[1], 2);
+  operands[3] = gen_reg_rtx (DImode);
+  operands[4] = gen_reg_rtx (DImode);
+})
+
+;; add if condition
+(define_insn "*extxl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (match_operand:DI 2 "mode_width_operand" "n")
+			 (match_operand:DI 3 "mul8_operand" "I")))]
+  ""
+{
+  if (INTVAL (operands[2])==8)
+    return "extlb %r1,%s3,%0";
+  else if (INTVAL (operands[2])==16)
+    return "extlh %r1,%s3,%0";
+  else if (INTVAL (operands[2])==32)
+    return "extlw %r1,%s3,%0";
+  else if (INTVAL (operands[2])==64)
+    return "extll %r1,%s3,%0";
+}
+  [(set_attr "type" "shift")])
+
+;; add if condition
+(define_insn "extxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI
+	  (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (match_operand:DI 2 "mode_width_operand" "n")
+	  (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+		     (const_int 3))))]
+  ""
+{
+  if (INTVAL (operands[2])==8)
+    return "extlb %r1,%3,%0";
+  else if (INTVAL (operands[2])==16)
+    return "extlh %r1,%3,%0";
+  else if (INTVAL (operands[2])==32)
+    return "extlw %r1,%3,%0";
+  else if (INTVAL (operands[2])==64)
+    return "extll %r1,%3,%0";
+}
+  [(set_attr "type" "shift")])
+
+;; Combine has some strange notion of preserving existing undefined behavior
+;; in shifts larger than a word size.  So capture these patterns that it
+;; should have turned into zero_extracts.
+
+;; add if condition
+(define_insn "*extxl_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			     (const_int 3)))
+	     (match_operand:DI 3 "mode_mask_operand" "n")))]
+  ""
+{
+  if (INTVAL (operands[3]) == 0xff)
+    return "extlb %r1,%2,%0";
+  else if (INTVAL (operands[3]) == 0xffff)
+    return "extlh %r1,%2,%0";
+  else if (INTVAL (operands[3]) == 0xffffffff)
+    return "extlw %r1,%2,%0";
+  else if (INTVAL (operands[3]) == -1)
+    return "extll %r1,%2,%0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn "*extql_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		     (const_int 3))))]
+  ""
+  "extll %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extqh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (match_operand:DI 1 "reg_or_0_operand" "rJ")
+	  (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  ""
+  "exthl %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extwh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		 (const_int 65535))
+	 (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  ""
+  "exthh %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "extlh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI
+	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+		 (const_int 2147483647))
+	 (minus:DI (const_int 64)
+		    (ashift:DI
+		     (and:DI
+		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+		      (const_int 7))
+		     (const_int 3)))))]
+  ""
+  "exthw %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+;; This converts an extXl into an extXh with an appropriate adjustment
+;; to the address calculation.
+
+(define_insn "insbl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:QI 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "inslb %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "inswl_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:HI 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "inslh %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insll_const"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:SI 1 "register_operand" "r"))
+		   (match_operand:DI 2 "mul8_operand" "I")))]
+  ""
+  "inslw %1,%s2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insbl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:QI 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  ""
+  "inslb %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "inswl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:HI 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  ""
+  "inslh %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insll"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (zero_extend:DI
+		    (match_operand:SI 1 "register_operand" "r"))
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  ""
+  "inslw %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "insql"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(ashift:DI (match_operand:DI 1 "register_operand" "r")
+		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
+			      (const_int 3))))]
+  ""
+  "insll %1,%2,%0"
+  [(set_attr "type" "shift")])
+
+;; Combine has this sometimes habit of moving the and outside of the
+;; shift, making life more interesting.
+
+(define_insn "*insxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
+			   (match_operand:DI 2 "mul8_operand" "I"))
+		(match_operand:DI 3 "const_int_operand" "i")))]
+  "((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
+    == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
+	== (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
+	== (unsigned HOST_WIDE_INT) INTVAL (operands[3]))"
+{
+#if HOST_BITS_PER_WIDE_INT == 64
+  if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "inslb %1,%s2,%0";
+  if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "inslh %1,%s2,%0";
+  if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
+      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
+    return "inslw %1,%s2,%0";
+#endif
+  gcc_unreachable ();
+}
+  [(set_attr "type" "shift")])
+
+;; We do not include the insXh insns because they are complex to express
+;; and it does not appear that we would ever want to generate them.
+;;
+;; Since we need them for block moves, though, cop out and use unspec.
+
+(define_insn "insxh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "mode_width_operand" "n")
+		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_INSXH))]
+  ""
+{
+  if (INTVAL (operands[2])==16)
+    return "inshh %r1,%3,%0";
+  else if (INTVAL (operands[2])==32)
+    return "inshw %r1,%3,%0";
+  else if (INTVAL (operands[2])==64)
+    return "inshl %r1,%3,%0";
+}
+  [(set_attr "type" "shift")])
+
+(define_insn "mskxl"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (not:DI (ashift:DI
+			 (match_operand:DI 2 "mode_mask_operand" "n")
+			 (ashift:DI
+			  (match_operand:DI 3 "reg_or_8bit_operand" "rI")
+			  (const_int 3))))
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  ""
+{
+  if (INTVAL (operands[2]) == 0xff)
+    return "masklb %r1,%3,%0";
+  else if (INTVAL (operands[2]) == 0xffff)
+    return "masklh %r1,%3,%0";
+  else if (INTVAL (operands[2]) == 0xffffffff)
+    return "masklw %r1,%3,%0";
+  else if (INTVAL (operands[2]) == -1)
+    return "maskll %r1,%3,%0";
+}
+  [(set_attr "type" "shift")])
+
+;; We do not include the mskXh insns because it does not appear we would
+;; ever generate one.
+;;
+;; Again, we do for block moves and we use unspec again.
+
+(define_insn "mskxh"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "mode_width_operand" "n")
+		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_MSKXH))]
+  ""
+{
+  if (INTVAL (operands[2])==16)
+    return "maskhh %r1,%3,%0";
+  else if (INTVAL (operands[2])==32)
+    return "maskhw %r1,%3,%0";
+  else if (INTVAL (operands[2])==64)
+    return "maskhl %r1,%3,%0";
+}
+  [(set_attr "type" "shift")])
+
+
+(define_insn_and_split "*ze_and_ne"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			 (const_int 1)
+			 (match_operand 2 "const_int_operand" "I")))]
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  "#"
+  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 3)))
+   (set (match_dup 0)
+	(ne:DI (match_dup 0) (const_int 0)))]
+  "operands[3] = GEN_INT (1 << INTVAL (operands[2]));")
+
+;; Floating-point operations.  All the double-precision insns can extend
+;; from single, so indicate that.  The exception are the ones that simply
+;; play with the sign bits; it's not clear what to do there.
+
+(define_mode_iterator FMODE [SF DF])
+
+(define_mode_attr opmode [(SF "si") (DF "di")])
+
+(define_insn "abs<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "fcpys $f31,%R1,%0"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "*nabs<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(neg:FMODE
+	 (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP"
+  "fcpysn $f31,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_expand "abstf2"
+  [(parallel [(set (match_operand:TF 0 "register_operand")
+		   (abs:TF (match_operand:TF 1 "reg_or_0_operand")))
+	      (use (match_dup 2))])]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "operands[2] = force_reg (DImode, GEN_INT (HOST_WIDE_INT_1U << 63));")
+
+(define_insn_and_split "*abstf_internal"
+  [(set (match_operand:TF 0 "register_operand" "=r")
+	(abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "sw_64_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;")
+
+(define_insn "neg<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(neg:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "fcpysn %R1,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_expand "negtf2"
+  [(parallel [(set (match_operand:TF 0 "register_operand")
+		   (neg:TF (match_operand:TF 1 "reg_or_0_operand")))
+	      (use (match_dup 2))])]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));")
+
+(define_insn_and_split "*negtf_internal"
+  [(set (match_operand:TF 0 "register_operand" "=r")
+	(neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
+   (use (match_operand:DI 2 "register_operand" "r"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  "sw_64_split_tfmode_frobsign (operands, gen_xordi3); DONE;")
+
+(define_insn "copysign<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		       (match_operand:FMODE 2 "reg_or_0_operand" "fG")]
+		      UNSPEC_COPYSIGN))]
+  "TARGET_FP"
+  "fcpys %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*ncopysign<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(neg:FMODE
+	 (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG")
+			(match_operand:FMODE 2 "reg_or_0_operand" "fG")]
+		       UNSPEC_COPYSIGN)))]
+  "TARGET_FP"
+  "fcpysn %R2,%R1,%0"
+  [(set_attr "type" "fadd")])
+
+(define_insn "*add<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
+	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fadd<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*add<mode>3_same"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 1"
+  "fadd<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "add<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_FP"
+  ""
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "*fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		   (match_operand:SF 2 "register_operand" "f")
+		   (match_operand:SF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 0"
+  "fmas %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fmasf4_same"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		   (match_operand:SF 2 "register_operand" "f")
+		   (match_operand:SF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 1"
+  "fmas %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF (match_operand:SF 1 "register_operand" "f")
+		   (match_operand:SF 2 "register_operand" "f")
+		   (match_operand:SF 3 "register_operand" "f")))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(fma:DF (match_operand:DF 1 "register_operand" "f")
+		   (match_operand:DF 2 "register_operand" "f")
+		   (match_operand:DF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 0"
+  "fmad %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fmadf4_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "register_operand" "f")
+		   (match_operand:DF 2 "register_operand" "f")
+		   (match_operand:DF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 1"
+  "fmad %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF (match_operand:DF 1 "register_operand" "f")
+		   (match_operand:DF 2 "register_operand" "f")
+		   (match_operand:DF 3 "register_operand" "f")))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fmssf4"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(fma:SF
+	       (match_operand:SF 1 "register_operand" "f")
+	       (match_operand:SF 2 "register_operand" "f")
+	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 0"
+  "fmss %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fmssf4_same"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF
+	       (match_operand:SF 1 "register_operand" "f")
+	       (match_operand:SF 2 "register_operand" "f")
+	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 1"
+  "fmss %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fmssf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF
+	       (match_operand:SF 1 "register_operand" "f")
+	       (match_operand:SF 2 "register_operand" "f")
+	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fmsdf4"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(fma:DF
+		   (match_operand:DF 1 "register_operand" "f")
+		   (match_operand:DF 2 "register_operand" "f")
+		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 0"
+  "fmsd %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fmsdf4_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF
+		   (match_operand:DF 1 "register_operand" "f")
+		   (match_operand:DF 2 "register_operand" "f")
+		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 1"
+  "fmsd %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fmsdf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF
+		   (match_operand:DF 1 "register_operand" "f")
+		   (match_operand:DF 2 "register_operand" "f")
+		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(fma:SF
+	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
+	       (match_operand:SF 2 "register_operand" "f")
+	       (match_operand:SF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 0"
+  "fnmas %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fnmasf4_same"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF
+	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
+	       (match_operand:SF 2 "register_operand" "f")
+	       (match_operand:SF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 1"
+  "fnmas %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fnmasf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF
+	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
+	       (match_operand:SF 2 "register_operand" "f")
+	       (match_operand:SF 3 "register_operand" "f")))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fnmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(fma:DF
+		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		   (match_operand:DF 2 "register_operand" "f")
+		   (match_operand:DF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 0"
+  "fnmad %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fnmadf4_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF
+		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		   (match_operand:DF 2 "register_operand" "f")
+		   (match_operand:DF 3 "register_operand" "f")))]
+  "flag_sw_sdsame == 1"
+  "fnmad %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fnmadf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF
+		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		   (match_operand:DF 2 "register_operand" "f")
+		   (match_operand:DF 3 "register_operand" "f")))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fnmssf4"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(fma:SF
+	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
+	       (match_operand:SF 2 "register_operand" "f")
+	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 0"
+  "fnmss %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fnmssf4_same"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF
+	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
+	       (match_operand:SF 2 "register_operand" "f")
+	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 1"
+  "fnmss %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fnmssf4"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(fma:SF
+	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
+	       (match_operand:SF 2 "register_operand" "f")
+	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*fnmsdf4"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(fma:DF
+		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		   (match_operand:DF 2 "register_operand" "f")
+		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 0"
+  "fnmsd %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*fnmsdf4_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF
+		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		   (match_operand:DF 2 "register_operand" "f")
+		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
+  "flag_sw_sdsame == 1"
+  "fnmsd %R1,%R2,%R3,%0"
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "fnmsdf4"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(fma:DF
+		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
+		   (match_operand:DF 2 "register_operand" "f")
+		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
+  "flag_sw_fma==1 && TARGET_FP"
+  ""
+  [(set_attr "type" "fmadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fadd%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*adddf_ext1_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fadd%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*adddf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fadd%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*adddf_ext2_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(plus:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fadd%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "addtf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_arith (PLUS, operands); DONE;")
+
+(define_insn "*sub<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
+	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		     (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fsub<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*sub<mode>3_same"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		     (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 1"
+  "fsub<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "sub<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		     (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_FP"
+  ""
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "*subdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fsub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*subdf_ext1_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fsub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fsub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*subdf_ext2_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fsub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*subdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fsub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*subdf_ext3_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(minus:DF (float_extend:DF
+		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		  (float_extend:DF
+		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fsub%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "subtf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_arith (MINUS, operands); DONE;")
+
+(define_insn "*mul<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
+	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fmul<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*mul<mode>3_same"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 1"
+  "fmul<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "mul<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
+		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_FP"
+  ""
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "*muldf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fmul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*muldf_ext1_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fmul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*muldf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fmul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*muldf_ext2_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(mult:DF (float_extend:DF
+		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
+		 (float_extend:DF
+		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fmul%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fmul")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "multf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_arith (MULT, operands); DONE;")
+
+(define_insn "div<mode>3_ieee"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fdiv<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+;; Floating point reciprocal approximation
+(define_insn "fre<mode>"
+  [(set (match_operand:SFDF 0 "register_operand" "=f")
+	(unspec:SFDF [(match_operand:SFDF 1 "register_operand" "f")]
+		     UNSPEC_FRECX))]
+  "(flag_sw_recip || flag_sw_recip_precision) && flag_reciprocal_math && TARGET_SW8A"
+  "frec<SD> %1,%0"
+  [(set_attr "type" "fp")])
+
+(define_insn "*div<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fdiv<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*div<mode>3_same"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 1"
+  "fdiv<modesuffix>%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_expand "div<mode>3"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_FP"
+{
+     if ((flag_sw_recip || flag_sw_recip_precision) && flag_reciprocal_math && TARGET_SW8A)
+      {
+	if (operands[1] == CONST0_RTX (<MODE>mode))
+	  operands[1] = gen_move_reg (operands[1]);
+
+	if (operands[2] == CONST0_RTX (<MODE>mode))
+	  operands[2] = gen_move_reg (operands[2]);
+
+	 sw_64_emit_swdiv (operands[0], operands[1], operands[2], true);
+	 DONE;
+      }
+
+})
+
+(define_insn "*div<mode>3_fpr"
+  [(set (match_operand:FMODE 0 "register_operand" "=f")
+	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")
+		   (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP"
+  "fdiv<modesuffix>%/  %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fdiv%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*divdf_ext1_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fdiv%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fdiv%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*divdf_ext2_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fdiv%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+
+(define_insn "*divdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(div:DF (float_extend:DF
+		 (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fdiv%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_insn "*divdf_ext3_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(div:DF (float_extend:DF
+		 (match_operand:SF 1 "reg_or_0_operand" "fG"))
+		(float_extend:DF
+		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fdiv%-%/ %R1,%R2,%0"
+  [(set_attr "type" "fdiv")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")])
+(define_expand "divtf3"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))
+   (use (match_operand:TF 2 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_arith (DIV, operands); DONE;")
+
+;; frint floating-point round to integral standard patterns.
+(define_insn "<frint_pattern><mode>2"
+  [(set (match_operand:SFDF 0 "register_operand" "=f")
+	(unspec:SFDF [(match_operand:SFDF 1 "register_operand" "f")]
+	 FRINT))]
+  "TARGET_SW8A && flag_sw_fprnd"
+  "fri<SD><frint_suffix> %1, %0"
+  [(set_attr "type" "frint")])
+
+(define_insn "*sqrt<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
+	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fsqrt<modesuffix>%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*sqrt<mode>2_same"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 1"
+  "fsqrt<modesuffix>%/ %R1,%0"
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_FP"
+  ""
+  [(set_attr "type" "fsqrt")
+   (set_attr "opsize" "<opmode>")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+;; Define conversion operators between DFmode and SImode, using the cvtql
+;; instruction.  To allow combine et al to do useful things, we keep the
+;; operation as a unit until after reload, at which point we split the
+;; instructions.
+;;
+;; Note that we (attempt to) only consider this optimization when the
+;; ultimate destination is memory.  If we will be doing further integer
+;; processing, it is cheaper to do the truncation in the int regs.
+
+(define_insn "*cvtql"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTQL))]
+  "TARGET_FP && flag_sw_sdsame == 0"
+  "fcvtlw%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "v_sv")])
+(define_insn "*cvtql_same"
+  [(set (match_operand:SF 0 "register_operand" "=f")
+	(unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")]
+		   UNSPEC_CVTQL))]
+  "TARGET_FP && flag_sw_sdsame == 1"
+  "fcvtlw%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "v_sv")])
+
+(define_insn_and_split "*fix_truncdfsi_ieee"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator"
+	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+{
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+;; mieee-opt
+(define_insn_and_split "*fix_truncdfsi_internal"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator"
+	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+{
+  //operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*fix_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f,&f")
+	(match_operator:DI 2 "fix_operator"
+	  [(match_operand:DF 1 "reg_or_0_operand" "fG,fG")]))]
+  "TARGET_FP && ((flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B)"
+  "fcvt%-l%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*fix_truncdfdi2_same"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f,f")
+	(match_operator:DI 2 "fix_operator"
+	  [(match_operand:DF 1 "reg_or_0_operand" "fG,fG")]))]
+  "TARGET_FP && ((flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B)"
+  "fcvt%-l%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "fix_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(fix:DI (match_operand:DF 1 "reg_or_0_operand")))]
+  "TARGET_FP")
+
+(define_expand "fixuns_truncdfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand")))]
+  "TARGET_FP"
+{
+  if ((TARGET_SW8A == 1 && flag_sw_cmov != 1) || TARGET_SW6B)
+  {
+  rtx reg1 = gen_reg_rtx (DFmode);
+  rtx reg2 = gen_reg_rtx (DFmode);
+  rtx reg3 = gen_reg_rtx (DImode);
+  rtx_code_label *label1 = gen_label_rtx ();
+  rtx_code_label *label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 63, DFmode);
+
+  emit_move_insn (reg1, const_double_from_real_value (offset, DFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
+  emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1]));
+  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  emit_move_insn (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
+  emit_move_insn (reg3, GEN_INT (BITMASK_HIGH));
+  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], reg2));
+  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+  }
+  else
+  {
+  emit_insn (gen_fixuns_truncdfdi2_internal (operands[0], operands[1]));
+  DONE;
+  }
+})
+
+
+;; Likewise between SFmode and SImode.
+
+(define_insn_and_split "*fix_truncsfsi_ieee"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator"
+	    [(float_extend:DF
+	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && ((sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0) || TARGET_SW6B)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+  "operands[5] = adjust_address (operands[0], SFmode, 0);"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+;; mieee-opt
+(define_insn_and_split "*fix_truncsfsi_internal"
+  [(set (match_operand:SI 0 "memory_operand" "=m")
+	(subreg:SI
+	  (match_operator:DI 4 "fix_operator"
+	    [(float_extend:DF
+	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && ((sw_64_fptm < SW_64_FPTM_SU && flag_sw_cmov == 0) || TARGET_SW6B)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))]))
+   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
+   (set (match_dup 5) (match_dup 3))]
+{
+ // operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
+  operands[5] = adjust_address (operands[0], SFmode, 0);
+}
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*fix_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f,&f")
+	(match_operator:DI 2 "fix_operator"
+	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,fG"))]))]
+  "TARGET_FP && ((flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B)"
+  "fcvt%-l%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*fix_truncsfdi2_same"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f,f")
+	(match_operator:DI 2 "fix_operator"
+	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,fG"))]))]
+  "TARGET_FP && ((flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B)"
+  "fcvt%-l%T2 %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "c")
+   (set_attr "trap_suffix" "v_sv_svi")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "fix_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))]
+  "TARGET_FP && flag_sw_cmov == 0")
+
+(define_expand "fixuns_truncsfdi2"
+  [(set (match_operand:DI 0 "reg_no_subreg_operand")
+	(unsigned_fix:DI
+	  (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))]
+  "TARGET_FP"
+{
+  if ( (TARGET_SW8A == 1 && flag_sw_cmov != 1) || TARGET_SW6B)
+   {
+  rtx reg1 = gen_reg_rtx (SFmode);
+  rtx reg2 = gen_reg_rtx (DFmode);
+  rtx reg3 = gen_reg_rtx (DImode);
+  rtx reg4 = gen_reg_rtx (DFmode);
+  rtx reg5 = gen_reg_rtx (DFmode);
+  rtx_code_label *label1 = gen_label_rtx ();
+  rtx_code_label *label2 = gen_label_rtx ();
+  rtx test;
+  REAL_VALUE_TYPE offset;
+
+  real_2expN (&offset, 63, SFmode);
+
+  emit_move_insn (reg1, const_double_from_real_value (offset, SFmode));
+  do_pending_stack_adjust ();
+
+  test = gen_rtx_GE (SFmode, operands[1], reg1);
+  emit_insn (gen_extendsfdf2 (reg4, reg1));
+  emit_insn (gen_extendsfdf2 (reg2, operands[1]));
+  emit_jump_insn (gen_cbranchdf4 (test, reg2, reg4, label1));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], reg2));
+  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2)));
+  emit_barrier ();
+
+  emit_label (label1);
+  emit_move_insn (reg5, gen_rtx_MINUS (DFmode, reg2, reg4));
+  emit_move_insn (reg3, GEN_INT (BITMASK_HIGH));
+  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
+
+  emit_insn (gen_fix_truncdfdi2 (operands[0], reg5));
+  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
+
+  emit_label (label2);
+
+  /* Allow REG_NOTES to be set on last insn (labels don't have enough
+     fields, and can't be used for REG_NOTES anyway).  */
+  emit_use (stack_pointer_rtx);
+  DONE;
+   }
+   else
+   {
+  rtx reg2 = gen_reg_rtx (DFmode);
+  emit_insn (gen_extendsfdf2 (reg2, operands[1]));
+  emit_insn (gen_fixuns_truncdfdi2_internal (operands[0], reg2));
+  DONE;
+   }
+})
+
+
+
+(define_expand "fix_trunctfdi2"
+  [(use (match_operand:DI 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_cvt (FIX, operands); DONE;")
+
+(define_expand "fixuns_trunctfdi2"
+  [(use (match_operand:DI 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;")
+
+(define_insn "*floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
+  "(flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B"
+  "fcvtl%,%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*floatdisf2_same"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
+  "(flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B"
+  "fcvtl%,%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "floatdisf2"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
+  "TARGET_FP"
+  ""
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn_and_split "*floatsisf2_ieee"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float:SF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:SF (match_dup 2)))]
+  "operands[1] = adjust_address (operands[1], SFmode, 0);")
+
+;; mieee-opt
+(define_insn_and_split "*floatsisf2"
+  [(set (match_operand:SF 0 "register_operand" "=&f")
+	(float:SF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && flag_sw_cmov == 0 && ! TARGET_SW8A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:SF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+  //operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
+})
+
+(define_insn "*floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
+  "flag_sw_sdsame == 0 "
+  "fcvtl%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*floatdidf2_same"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
+  "flag_sw_sdsame == 1 "
+  "fcvtl%-%/ %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "floatdidf2"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
+  "TARGET_FP "
+  ""
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn_and_split "*floatsidf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float:DF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+  "operands[1] = adjust_address (operands[1], SFmode, 0);")
+
+;; mieee-opt
+(define_insn_and_split "*floatsidf2"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float:DF (match_operand:SI 1 "memory_operand" "m")))
+   (clobber (match_scratch:DI 2 "=&f"))
+   (clobber (match_scratch:SF 3 "=&f"))]
+  "TARGET_FP && flag_sw_cmov == 0 && ! TARGET_SW8A"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
+   (set (match_dup 0) (float:DF (match_dup 2)))]
+{
+  operands[1] = adjust_address (operands[1], SFmode, 0);
+//  operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
+ // operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0]));
+})
+
+(define_expand "floatditf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:DI 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_cvt (FLOAT, operands); DONE;")
+
+(define_expand "floatunsditf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:DI 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;")
+
+(define_expand "extendsfdf2"
+  [(set (match_operand:DF 0 "register_operand")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))]
+  "TARGET_FP"
+{
+  if (sw_64_fptm >= SW_64_FPTM_SU)
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+;; The Unicos/Mk assembler doesn't support cvtst, but we've already
+;; asserted that sw_64_fptm == SW_64_FPTM_N.
+
+(define_insn "*cmpsf_internal"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(match_operator:SF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:SF 2 "reg_or_0_operand" "fG,fG")
+			    (match_operand:SF 3 "reg_or_0_operand" "fG,fG")]))]
+  "TARGET_FP && flag_sw_sdsame == 0 && flag_sw_sf_cmpsel"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*cmpsf_internal_same"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(match_operator:SF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:SF 2 "reg_or_0_operand" "fG,fG")
+			    (match_operand:SF 3 "reg_or_0_operand" "fG,fG")]))]
+  "TARGET_FP && flag_sw_sdsame == 1 && flag_sw_sf_cmpsel"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "*extendsfdf2_ieee"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fcvtsd %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+(define_insn "*extendsfdf2_ieee_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
+  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fcvtsd %1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")])
+
+(define_insn "*extendsfdf2_internal_1"
+  [(set (match_operand:DF 0 "register_operand" "=&f,f,m")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_nofcpys == 1"
+  "@
+   fcvtsd %1,%0
+   fld%,%U1 %0,%1
+   fst%-%U0 %1,%0"
+  [(set_attr "type" "fcpys,fld,fst")])
+
+(define_insn "*extendsfdf2_internal_2"
+  [(set (match_operand:DF 0 "register_operand" "=&f,f,m")
+	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_nofcpys == 0"
+  "@
+   fcvtsd %1,%0 \;fcpys %0,%0,%0
+   fld%, %0,%1
+   fst%- %1,%0"
+  [(set_attr "type" "fcpys,fld,fst")])
+
+;; Use register_operand for operand 1 to prevent compress_float_constant
+;; from doing something silly.  When optimizing we'll put things back
+;; together anyway.
+(define_expand "extendsftf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:SF 1 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+  rtx tmp = gen_reg_rtx (DFmode);
+  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+  emit_insn (gen_extenddftf2 (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "extenddftf2"
+  [(use (match_operand:TF 0 "register_operand"))
+   (use (match_operand:DF 1 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;")
+
+(define_insn "*truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 0"
+  "fcvt%-%,%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*truncdfsf2_same"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "flag_sw_sdsame == 1"
+  "fcvt%-%,%/ %R1,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_expand "truncdfsf2"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
+  "TARGET_FP"
+  ""
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "round_suffix" "normal")
+   (set_attr "trap_suffix" "u_su_sui")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_expand "trunctfdf2"
+  [(use (match_operand:DF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;")
+
+(define_expand "trunctfsf2"
+  [(use (match_operand:SF 0 "register_operand"))
+   (use (match_operand:TF 1 "general_operand"))]
+  "TARGET_FP && TARGET_HAS_XFLOATING_LIBS"
+{
+  rtx tmpf, sticky, arg, lo, hi;
+
+  tmpf = gen_reg_rtx (DFmode);
+  sticky = gen_reg_rtx (DImode);
+  arg = copy_to_mode_reg (TFmode, operands[1]);
+  lo = gen_lowpart (DImode, arg);
+  hi = gen_highpart (DImode, arg);
+
+  /* Convert the low word of the TFmode value into a sticky rounding bit,
+     then or it into the low bit of the high word.  This leaves the sticky
+     bit at bit 48 of the fraction, which is representable in DFmode,
+     which prevents rounding error in the final conversion to SFmode.  */
+
+  emit_insn (gen_rtx_SET (sticky, gen_rtx_NE (DImode, lo, const0_rtx)));
+  emit_insn (gen_iordi3 (hi, hi, sticky));
+  emit_insn (gen_trunctfdf2 (tmpf, arg));
+  emit_insn (gen_truncdfsf2 (operands[0], tmpf));
+  DONE;
+})
+
+;; Next are all the integer comparisons, and conditional moves and branches
+;; and some of the related define_expand's and define_split's.
+
+(define_insn "*setcc_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "sw_64_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmp%C1 %2,%3,%0"
+  [(set_attr "type" "icmp")])
+
+;; Yes, we can technically support reg_or_8bit_operand in operand 2,
+;; but that's non-canonical rtl and allowing that causes inefficiencies
+;; from cse on.
+(define_insn "*setcc_swapped_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "sw_64_swapped_comparison_operator"
+			   [(match_operand:DI 2 "register_operand" "r")
+			    (match_operand:DI 3 "reg_or_0_operand" "rJ")]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmp%c1 %r3,%2,%0"
+  [(set_attr "type" "icmp")])
+
+;; Use match_operator rather than ne directly so that we can match
+;; multiple integer modes.
+(define_insn "*setne_internal"
+  [(set (match_operand 0 "register_operand" "=r")
+	(match_operator 1 "signed_comparison_operator"
+			  [(match_operand:DI 2 "register_operand" "r")
+			   (const_int 0)]))]
+  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
+   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
+   && GET_CODE (operands[1]) == NE
+   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
+  "cmpult $31,%2,%0"
+  [(set_attr "type" "icmp")])
+
+;; The mode folding trick can't be used with const_int operands, since
+;; reload needs to know the proper mode.
+;;
+;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand
+;; in order to create more pairs of constants.  As long as we're allowing
+;; two constants at the same time, and will have to reload one of them...
+
+(define_insn "*mov<mode>cc_internal"
+  [(set (match_operand:IMODE 0 "register_operand" "=r,r,r,r")
+	(if_then_else:IMODE
+	 (match_operator 2 "signed_comparison_operator"
+			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
+			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
+	 (match_operand:IMODE 1 "add_operand" "rI,0,rI,0")
+	 (match_operand:IMODE 5 "add_operand" "0,rI,0,rI")))]
+  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
+  "@
+   sel%C2 %r3,%1,%0,%0
+   sel%D2 %r3,%5,%0,%0
+   sel%c2 %r4,%1,%0,%0
+   sel%d2 %r4,%5,%0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*mov<mode>cc_lbc"
+  [(set (match_operand:IMODE 0 "register_operand" "=r,r")
+	(if_then_else:IMODE
+	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   sellbc %r2,%1,%0,%0
+   sellbs %r2,%3,%0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_insn "*mov<mode>cc_lbs"
+  [(set (match_operand:IMODE 0 "register_operand" "=r,r")
+	(if_then_else:IMODE
+	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0")
+	 (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))]
+  ""
+  "@
+   sellbs %r2,%1,%0,%0
+   sellbc %r2,%3,%0,%0"
+  [(set_attr "type" "icmov")])
+
+;; For ABS, we have two choices, depending on whether the input and output
+;; registers are the same or not.
+(define_expand "absdi2"
+  [(set (match_operand:DI 0 "register_operand")
+	(abs:DI (match_operand:DI 1 "register_operand")))]
+  ""
+{
+  if (rtx_equal_p (operands[0], operands[1]))
+    emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode)));
+  else
+    emit_insn (gen_absdi2_diff (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "absdi2_same"
+  [(set (match_operand:DI 1 "register_operand")
+	(neg:DI (match_operand:DI 0 "register_operand")))
+   (set (match_dup 0)
+	(if_then_else:DI (ge (match_dup 0) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))])
+
+(define_expand "absdi2_diff"
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI (match_operand:DI 1 "register_operand")))
+   (set (match_dup 0)
+	(if_then_else:DI (lt (match_dup 1) (const_int 0))
+			 (match_dup 0)
+			 (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(abs:DI (match_dup 0)))
+   (clobber (match_operand:DI 1 "register_operand"))]
+  ""
+  [(set (match_dup 1) (neg:DI (match_dup 0)))
+   (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(abs:DI (match_operand:DI 1 "register_operand")))]
+  "! rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 0) (neg:DI (match_dup 1)))
+   (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI (abs:DI (match_dup 0))))
+   (clobber (match_operand:DI 1 "register_operand"))]
+  ""
+  [(set (match_dup 1) (neg:DI (match_dup 0)))
+   (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(neg:DI (abs:DI (match_operand:DI 1 "register_operand"))))]
+  "! rtx_equal_p (operands[0], operands[1])"
+  [(set (match_dup 0) (neg:DI (match_dup 1)))
+   (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0))
+				       (match_dup 0) (match_dup 1)))])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:I12MODE 0 "register_operand" "=r")
+	(any_maxmin:I12MODE
+	 (match_operand:I12MODE 1 "reg_or_0_operand" "%rJ")
+	 (match_operand:I12MODE 2 "reg_or_8bit_operand" "rI")))]
+  "TARGET_MAX"
+  "<maxmin><vecmodesuffix> %r1,%2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "smaxdi3"
+  [(set (match_dup 3)
+	(le:DI (match_operand:DI 1 "reg_or_0_operand")
+	       (match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (eq (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(smax:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_insn "*smax_const0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(smax:DI (match_operand:DI 1 "register_operand" "0")
+		 (const_int 0)))]
+  ""
+  "sellt %0,0,%0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_expand "smindi3"
+  [(set (match_dup 3)
+	(lt:DI (match_operand:DI 1 "reg_or_0_operand")
+	       (match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(smin:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_insn "*smin_const0"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(smin:DI (match_operand:DI 1 "register_operand" "0")
+		 (const_int 0)))]
+  ""
+  "selgt %0,0,%0,%0"
+  [(set_attr "type" "icmov")])
+
+(define_expand "umaxdi3"
+  [(set (match_dup 3)
+	(leu:DI (match_operand:DI 1 "reg_or_0_operand")
+		(match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (eq (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(umax:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_expand "umindi3"
+  [(set (match_dup 3)
+	(ltu:DI (match_operand:DI 1 "reg_or_0_operand")
+		(match_operand:DI 2 "reg_or_8bit_operand")))
+   (set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI (ne (match_dup 3) (const_int 0))
+			 (match_dup 1) (match_dup 2)))]
+  ""
+  "operands[3] = gen_reg_rtx (DImode);")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(umin:DI (match_operand:DI 1 "reg_or_0_operand")
+		 (match_operand:DI 2 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
+  "operands[2] != const0_rtx"
+  [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
+				       (match_dup 1) (match_dup 2)))])
+
+(define_insn "*bcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+			  (const_int 0)])
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  ""
+  "b%C1 %r2,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn_and_split "*branchcombine"
+  [(set (pc)
+	(if_then_else (match_operator 1 "sw_64_branch_combination"
+	   [(match_operand:DI 2 "register_operand")
+	    (match_operand:DI 3 "reg_or_8bit_operand")])
+	   (label_ref (match_operand 0))
+	   (pc)))]
+"flag_sw_branch_combination==1
+ && (can_create_pseudo_p ()) && operands[3]!=CONST0_RTX (DImode)"
+"#"
+"&& 1"
+  [(parallel
+    [(set (pc)
+	(if_then_else
+	  (match_op_dup 1
+	     [(match_dup 2)
+	      (match_dup 3)])
+	 (label_ref (match_dup 0))
+	 (pc)))
+    (clobber (match_dup 4))])]
+{
+  operands[4]=gen_reg_rtx (DImode);
+})
+
+(define_insn "bcc_ne"
+  [(parallel
+    [(set (pc)
+       (if_then_else
+	 (match_operator 1 "sw_64_comparison_operator"
+	    [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+	     (match_operand:DI 3 "reg_or_8bit_operand" "rI")])
+	 (label_ref (match_operand 0))
+	 (pc)))
+     (clobber (match_operand:DI 4 "register_operand" "=r"))])]
+  "flag_sw_branch_combination==1"
+  "cmp%C1 %r2,%3,%r4
+   bne %r4,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "bcc_eq"
+  [(parallel
+    [(set (pc)
+       (if_then_else
+	 (match_operator 1 "sw_64_swapped_branch_combination"
+	    [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+	     (match_operand:DI 3 "reg_or_8bit_operand" "rI")])
+	 (label_ref (match_operand 0))
+	 (pc)))
+     (clobber (match_operand:DI 4 "register_operand" "=r"))])]
+  "flag_sw_branch_combination==1"
+  "cmp%D1 %r2,%3,%r4
+   beq %r4,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*bcc_reverse"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DI 2 "register_operand" "r")
+			  (const_int 0)])
+
+	 (pc)
+	 (label_ref (match_operand 0))))]
+  ""
+  "b%c1 %2,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*blbs_normal"
+  [(set (pc)
+	(if_then_else
+	 (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  ""
+  "blbs %r1,%0"
+  [(set_attr "type" "ibr")])
+
+(define_insn "*blbc_normal"
+  [(set (pc)
+	(if_then_else
+	 (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
+			      (const_int 1)
+			      (const_int 0))
+	     (const_int 0))
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  ""
+  "blbc %r1,%0"
+  [(set_attr "type" "ibr")])
+
+(define_split
+  [(parallel
+    [(set (pc)
+	  (if_then_else
+	   (match_operator 1 "comparison_operator"
+	     [(zero_extract:DI (match_operand:DI 2 "register_operand")
+			       (const_int 1)
+			       (match_operand:DI 3 "const_int_operand"))
+	      (const_int 0)])
+	   (label_ref (match_operand 0))
+	   (pc)))
+     (clobber (match_operand:DI 4 "register_operand"))])]
+  "INTVAL (operands[3]) != 0"
+  [(set (match_dup 4)
+	(lshiftrt:DI (match_dup 2) (match_dup 3)))
+   (set (pc)
+	(if_then_else (match_op_dup 1
+				    [(zero_extract:DI (match_dup 4)
+						      (const_int 1)
+						      (const_int 0))
+				     (const_int 0)])
+		      (label_ref (match_dup 0))
+		      (pc)))]
+)
+
+
+;; The following are the corresponding floating-point insns.  Recall
+;; we need to have variants that expand the arguments from SFmode
+;; to DFmode.
+
+(define_insn "*cmpdf_internal"
+  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG,fG")
+			    (match_operand:DF 3 "reg_or_0_operand" "fG,fG")]))]
+  "TARGET_FP && flag_sw_sdsame == 0"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+(define_insn "*cmpdf_internal_same"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG,fG")
+			    (match_operand:DF 3 "reg_or_0_operand" "fG,fG")]))]
+  "TARGET_FP && flag_sw_sdsame == 1"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")
+   (set (attr "enabled")
+     (cond [(eq_attr "alternative" "0")
+	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
+	   ]
+	   (symbol_ref "true")))])
+
+(define_insn "*cmpdf_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+(define_insn "*cmpdf_ext1_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+(define_insn "*cmpdf_ext2_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*cmpdf_ext3"
+  [(set (match_operand:DF 0 "register_operand" "=&f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+(define_insn "*cmpdf_ext3_same"
+  [(set (match_operand:DF 0 "register_operand" "=f")
+	(match_operator:DF 1 "sw_64_fp_comparison_operator"
+			   [(float_extend:DF
+			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			    (float_extend:DF
+			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
+  "fcmp%C1%/ %R2,%R3,%0"
+  [(set_attr "type" "fadd")
+   (set_attr "trap" "yes")
+   (set_attr "trap_suffix" "su")])
+
+(define_insn "*mov<mode>cc_internal"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+	(if_then_else:FMODE
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:FMODE 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP"
+  "@
+   fsel%C3 %R4,%R1,%0,%0
+   fsel%D3 %R4,%R5,%0,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext1"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+  "@
+   fsel%C3 %R4,%R1,%0,%0
+   fsel%D3 %R4,%R5,%0,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext2"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:DF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+  "@
+   fsel%C3 %R4,%R1,%0,%0
+   fsel%D3 %R4,%R5,%0,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext3"
+  [(set (match_operand:SF 0 "register_operand" "=f,f")
+	(if_then_else:SF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (match_operand:SF 1 "reg_or_0_operand" "fG,0")
+	 (match_operand:SF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+  "@
+   fsel%C3 %R4,%R1,%0,%0
+   fsel%D3 %R4,%R5,%0,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_insn "*movdfcc_ext4"
+  [(set (match_operand:DF 0 "register_operand" "=f,f")
+	(if_then_else:DF
+	 (match_operator 3 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
+			  (match_operand:DF 2 "const0_operand" "G,G")])
+	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
+	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+  "@
+   fsel%C3 %R4,%R1,%0,%0
+   fsel%D3 %R4,%R5,%0,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_expand "smaxdf3"
+  [(set (match_dup 3)
+	(le:DF (match_operand:DF 1 "reg_or_0_operand")
+	       (match_operand:DF 2 "reg_or_0_operand")))
+   (set (match_operand:DF 0 "register_operand")
+	(if_then_else:DF (eq (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "smindf3"
+  [(set (match_dup 3)
+	(lt:DF (match_operand:DF 1 "reg_or_0_operand")
+	       (match_operand:DF 2 "reg_or_0_operand")))
+   (set (match_operand:DF 0 "register_operand")
+	(if_then_else:DF (ne (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "smaxsf3"
+  [(set (match_dup 3)
+	(le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))
+	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand"))))
+   (set (match_operand:SF 0 "register_operand")
+	(if_then_else:SF (eq (match_dup 3) (match_dup 4))
+			 (match_dup 1) (match_dup 2)))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_expand "sminsf3"
+  [(set (match_dup 3)
+	(lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))
+	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand"))))
+   (set (match_operand:SF 0 "register_operand")
+	(if_then_else:SF (ne (match_dup 3) (match_dup 4))
+		      (match_dup 1) (match_dup 2)))]
+  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
+{
+  operands[3] = gen_reg_rtx (DFmode);
+  operands[4] = CONST0_RTX (DFmode);
+})
+
+(define_insn "*fbcc_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(match_operand:DF 2 "reg_or_0_operand" "fG")
+			  (match_operand:DF 3 "const0_operand" "G")])
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  "TARGET_FP"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+(define_insn "*fbcc_ext_normal"
+  [(set (pc)
+	(if_then_else
+	 (match_operator 1 "signed_comparison_operator"
+			 [(float_extend:DF
+			   (match_operand:SF 2 "reg_or_0_operand" "fG"))
+			  (match_operand:DF 3 "const0_operand" "G")])
+	 (label_ref (match_operand 0))
+	 (pc)))]
+  "TARGET_FP"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+;; These are the main define_expand's used to make conditional branches
+;; and compares.
+
+(define_expand "cbranchsf4"
+  [(use (match_operator 0 "sw_64_cbranch_operator"
+	 [(match_operand:SF 1 "reg_or_0_operand")
+	  (match_operand:SF 2 "reg_or_0_operand")]))
+   (use (match_operand 3))]
+  "TARGET_FP && flag_sw_sf_cmpsel"
+  "sw_64_emit_conditional_branch (operands, SFmode); DONE;")
+
+(define_insn "*sfbcc_normal"
+  [(set (pc)
+    (if_then_else
+     (match_operator 1 "signed_comparison_operator"
+	     [(match_operand:SF 2 "reg_or_0_operand" "fG")
+	      (match_operand:SF 3 "const0_operand" "G")])
+     (label_ref (match_operand 0))
+     (pc)))]
+  "TARGET_FP && flag_sw_sf_cmpsel"
+  "fb%C1 %R2,%0"
+  [(set_attr "type" "fbr")])
+
+(define_insn "*mov<mode>sfcc_internal"
+  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
+    (if_then_else:FMODE
+     (match_operator 3 "signed_comparison_operator"
+	     [(match_operand:SF 4 "reg_or_0_operand" "fG,fG")
+	      (match_operand:SF 2 "const0_operand" "G,G")])
+     (match_operand:FMODE 1 "reg_or_0_operand" "fG,0")
+     (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))]
+  "TARGET_FP && flag_sw_sf_cmpsel"
+  "@
+   fsel%C3 %R4,%R1,%0,%0
+   fsel%D3 %R4,%R5,%0,%0"
+  [(set_attr "type" "fcmov")])
+
+(define_expand "cbranchdf4"
+  [(use (match_operator 0 "sw_64_cbranch_operator"
+	 [(match_operand:DF 1 "reg_or_0_operand")
+	  (match_operand:DF 2 "reg_or_0_operand")]))
+   (use (match_operand 3))]
+  "TARGET_FP"
+  "sw_64_emit_conditional_branch (operands, DFmode); DONE;")
+
+(define_expand "cbranchtf4"
+  [(use (match_operator 0 "sw_64_cbranch_operator"
+	 [(match_operand:TF 1 "general_operand")
+	  (match_operand:TF 2 "general_operand")]))
+   (use (match_operand 3))]
+  "TARGET_HAS_XFLOATING_LIBS"
+  "sw_64_emit_conditional_branch (operands, TFmode); DONE;")
+
+(define_expand "cbranchdi4"
+  [(use (match_operator 0 "sw_64_cbranch_operator"
+	 [(match_operand:DI 1 "general_operand")
+	  (match_operand:DI 2 "general_operand")]))
+   (use (match_operand 3))]
+  ""
+  "sw_64_emit_conditional_branch (operands, DImode); DONE;")
+
+(define_expand "cstoredf4"
+  [(use (match_operator:DI 1 "sw_64_cbranch_operator"
+	 [(match_operand:DF 2 "reg_or_0_operand")
+	  (match_operand:DF 3 "reg_or_0_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  "TARGET_FP"
+{
+  if (sw_64_emit_setcc (operands, DFmode))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "cstoretf4"
+  [(use (match_operator:DI 1 "sw_64_cbranch_operator"
+	 [(match_operand:TF 2 "general_operand")
+	  (match_operand:TF 3 "general_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  "TARGET_HAS_XFLOATING_LIBS"
+{
+  if (sw_64_emit_setcc (operands, TFmode))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "cstoredi4"
+  [(use (match_operator:DI 1 "sw_64_cbranch_operator"
+	 [(match_operand:DI 2 "general_operand")
+	  (match_operand:DI 3 "general_operand")]))
+   (clobber (match_operand:DI 0 "register_operand"))]
+  ""
+{
+  if (sw_64_emit_setcc (operands, DImode))
+    DONE;
+  else
+    FAIL;
+})
+
+;; These are the main define_expand's used to make conditional moves.
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:I48MODE 0 "register_operand")
+	(if_then_else:I48MODE
+	  (match_operand 1 "comparison_operator")
+	  (match_operand:I48MODE 2 "reg_or_8bit_operand")
+	  (match_operand:I48MODE 3 "reg_or_8bit_operand")))]
+  ""
+{
+  operands[1] = sw_64_emit_conditional_move (operands[1], <MODE>mode);
+  if (operands[1] == 0)
+    FAIL;
+})
+
+(define_expand "mov<mode>cc"
+  [(set (match_operand:FMODE 0 "register_operand")
+	(if_then_else:FMODE
+	  (match_operand 1 "comparison_operator")
+	  (match_operand:FMODE 2 "reg_or_8bit_operand")
+	  (match_operand:FMODE 3 "reg_or_8bit_operand")))]
+  ""
+{
+  operands[1] = sw_64_emit_conditional_move (operands[1], <MODE>mode);
+  if (operands[1] == 0)
+    FAIL;
+})
+
+;; These define_split definitions are used in cases when comparisons have
+;; not be stated in the correct way and we need to reverse the second
+;; comparison.  For example, x >= 7 has to be done as x < 6 with the
+;; comparison that tests the result being reversed.  We have one define_split
+;; for each use of a comparison.  They do not match valid insns and need
+;; not generate valid insns.
+;;
+;; We can also handle equality comparisons (and inequality comparisons in
+;; cases where the resulting add cannot overflow) by doing an add followed by
+;; a comparison with zero.  This is faster since the addition takes one
+;; less cycle than a compare when feeding into a conditional move.
+;; For this case, we also have an SImode pattern since we can merge the add
+;; and sign extend and the order doesn't matter.
+;;
+;; We do not do this for floating-point, since it isn't clear how the "wrong"
+;; operation could have been generated.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:DI 2 "reg_or_0_operand")
+			  (match_operand:DI 3 "reg_or_cint_operand")])
+	 (match_operand:DI 4 "reg_or_cint_operand")
+	 (match_operand:DI 5 "reg_or_cint_operand")))
+   (clobber (match_operand:DI 6 "register_operand"))]
+  "operands[3] != const0_rtx"
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
+
+  /* If we are comparing for equality with a constant and that constant
+     appears in the arm when the register equals the constant, use the
+     register since that is more likely to match (and to produce better code
+     if both would).  */
+
+  if (code == EQ && CONST_INT_P (operands[3])
+      && rtx_equal_p (operands[4], operands[3]))
+    operands[4] = operands[2];
+
+  else if (code == NE && CONST_INT_P (operands[3])
+	   && rtx_equal_p (operands[5], operands[3]))
+    operands[5] = operands[2];
+
+  if (code == NE || code == EQ
+      || (extended_count (operands[2], DImode, unsignedp) >= 1
+	  && extended_count (operands[3], DImode, unsignedp) >= 1))
+    {
+      if (CONST_INT_P (operands[3]))
+	operands[7] = gen_rtx_PLUS (DImode, operands[2],
+				    GEN_INT (- INTVAL (operands[3])));
+      else
+	operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]);
+
+      operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx);
+    }
+
+  else if (code == EQ || code == LE || code == LT
+	   || code == LEU || code == LTU)
+    {
+      operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]);
+      operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx);
+    }
+  else
+    {
+      operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode,
+				    operands[2], operands[3]);
+      operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx);
+    }
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(if_then_else:DI
+	 (match_operator 1 "comparison_operator"
+			 [(match_operand:SI 2 "reg_or_0_operand")
+			  (match_operand:SI 3 "reg_or_cint_operand")])
+	 (match_operand:DI 4 "reg_or_8bit_operand")
+	 (match_operand:DI 5 "reg_or_8bit_operand")))
+   (clobber (match_operand:DI 6 "register_operand"))]
+  "operands[3] != const0_rtx
+   && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)"
+  [(set (match_dup 6) (match_dup 7))
+   (set (match_dup 0)
+	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
+{
+  enum rtx_code code = GET_CODE (operands[1]);
+  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
+  rtx tem;
+
+  if ((code != NE && code != EQ
+       && ! (extended_count (operands[2], DImode, unsignedp) >= 1
+	     && extended_count (operands[3], DImode, unsignedp) >= 1)))
+    FAIL;
+
+  if (CONST_INT_P (operands[3]))
+    tem = gen_rtx_PLUS (SImode, operands[2],
+			GEN_INT (- INTVAL (operands[3])));
+  else
+    tem = gen_rtx_MINUS (SImode, operands[2], operands[3]);
+
+  operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem);
+  operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+				operands[6], const0_rtx);
+})
+
+;; Prefer to use cmp and arithmetic when possible instead of a cmove.
+
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(match_operand:DI 2 "reg_or_0_operand")
+			    (const_int 0)])
+	  (match_operand 3 "const_int_operand")
+	  (match_operand 4 "const_int_operand")))]
+  ""
+  [(const_int 0)]
+{
+  if (sw_64_split_conditional_move (GET_CODE (operands[1]), operands[0],
+				    operands[2], operands[3], operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+;; ??? Why combine is allowed to create such non-canonical rtl, I don't know.
+;; Oh well, we match it in movcc, so it must be partially our fault.
+(define_split
+  [(set (match_operand 0 "register_operand")
+	(if_then_else (match_operator 1 "signed_comparison_operator"
+			   [(const_int 0)
+			    (match_operand:DI 2 "reg_or_0_operand")])
+	  (match_operand 3 "const_int_operand")
+	  (match_operand 4 "const_int_operand")))]
+  ""
+  [(const_int 0)]
+{
+  if (sw_64_split_conditional_move (swap_condition (GET_CODE (operands[1])),
+				    operands[0], operands[2], operands[3],
+				    operands[4]))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn_and_split "*cmp_sadd_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(plus:DI (if_then_else:DI
+		   (match_operator 1 "sw_64_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:DI 3 "const48_operand" "I")
+		   (const_int 0))
+		 (match_operand:DI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:DI (mult:DI (match_dup 5) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+})
+
+(define_insn_and_split "*cmp_sadd_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(plus:SI (if_then_else:SI
+		   (match_operator 1 "sw_64_zero_comparison_operator"
+		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		      (const_int 0)])
+		   (match_operand:SI 3 "const48_operand" "I")
+		   (const_int 0))
+		 (match_operand:SI 4 "sext_add_operand" "rIO")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(plus:SI (mult:SI (match_dup 6) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = gen_lowpart (DImode, operands[0]);
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_sadd_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (plus:SI (if_then_else:SI
+		     (match_operator 1 "sw_64_zero_comparison_operator"
+		       [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+			(const_int 0)])
+		     (match_operand:SI 3 "const48_operand" "I")
+		     (const_int 0))
+		   (match_operand:SI 4 "sext_add_operand" "rIO"))))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3))
+				 (match_dup 4))))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_ssub_di"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(minus:DI (if_then_else:DI
+		    (match_operator 1 "sw_64_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:DI 3 "const48_operand" "I")
+		    (const_int 0))
+		  (match_operand:DI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:DI (mult:DI (match_dup 5) (match_dup 3))
+		  (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+})
+
+(define_insn_and_split "*cmp_ssub_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(minus:SI (if_then_else:SI
+		    (match_operator 1 "sw_64_zero_comparison_operator"
+		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
+		       (const_int 0)])
+		    (match_operand:SI 3 "const48_operand" "I")
+		    (const_int 0))
+		  (match_operand:SI 4 "reg_or_8bit_operand" "rI")))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(minus:SI (mult:SI (match_dup 6) (match_dup 3))
+		 (match_dup 4)))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = gen_lowpart (DImode, operands[0]);
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+(define_insn_and_split "*cmp_ssub_sidi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (minus:SI (if_then_else:SI
+		      (match_operator 1 "sw_64_zero_comparison_operator"
+			[(match_operand:DI 2 "reg_or_0_operand" "rJ")
+			 (const_int 0)])
+		      (match_operand:SI 3 "const48_operand" "I")
+		      (const_int 0))
+		    (match_operand:SI 4 "reg_or_8bit_operand" "rI"))))
+   (clobber (match_scratch:DI 5 "=r"))]
+  ""
+  "#"
+  ""
+  [(set (match_dup 5)
+	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
+   (set (match_dup 0)
+	(sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3))
+				  (match_dup 4))))]
+{
+  if (can_create_pseudo_p ())
+    operands[5] = gen_reg_rtx (DImode);
+  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
+    operands[5] = operands[0];
+
+  operands[6] = gen_lowpart (SImode, operands[5]);
+})
+
+;; Here are the CALL and unconditional branch insns.  Calls on NT and SYSV
+;; work differently, so we have different patterns for each.
+
+(define_expand "call"
+  [(use (match_operand:DI 0))
+   (use (match_operand 1))
+   (use (match_operand 2))
+   (use (match_operand 3))]
+  ""
+{
+    emit_call_insn (gen_call_osf (operands[0], operands[1]));
+  DONE;
+})
+
+(define_expand "sibcall"
+  [(parallel [(call (mem:DI (match_operand 0))
+			    (match_operand 1))
+	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+  operands[0] = XEXP (operands[0], 0);
+})
+
+(define_expand "call_osf"
+  [(parallel [(call (mem:DI (match_operand 0))
+		    (match_operand 1))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[0]));
+
+  operands[0] = XEXP (operands[0], 0);
+  if (! call_operand (operands[0], Pmode))
+    operands[0] = copy_to_mode_reg (Pmode, operands[0]);
+})
+
+
+(define_expand "call_value"
+  [(use (match_operand 0))
+   (use (match_operand:DI 1))
+   (use (match_operand 2))
+   (use (match_operand 3))
+   (use (match_operand 4))]
+  ""
+{
+    emit_call_insn (gen_call_value_osf (operands[0], operands[1],
+					operands[2]));
+  DONE;
+})
+
+(define_expand "sibcall_value"
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand 1))
+			 (match_operand 2)))
+	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+  operands[1] = XEXP (operands[1], 0);
+})
+
+(define_expand "call_value_osf"
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand 1))
+			 (match_operand 2)))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  ""
+{
+  gcc_assert (MEM_P (operands[1]));
+
+  operands[1] = XEXP (operands[1], 0);
+  if (! call_operand (operands[1], Pmode))
+    operands[1] = copy_to_mode_reg (Pmode, operands[1]);
+})
+
+(define_insn "*call_osf_1_er_noreturn"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   call $26,($27),0
+   bsr $26,%0\t\t!samegp
+   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1_er_setfpec0"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3"
+  "@
+   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%0\t\t!samegp
+   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*call_osf_1_er_setfpec1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1 "
+  "@
+   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%0\t\t!samegp
+   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*call_osf_1_er"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS"
+  "@
+   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%0\t\t!samegp
+   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.  Consider the case of { bar (); while (1); }.
+;;(define_peephole2
+;;  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand"))
+;;		    (match_operand 1))
+;;	      (use (reg:DI 29))
+;;	      (clobber (reg:DI 26))])]
+;;  "TARGET_EXPLICIT_RELOCS && reload_completed
+;;   && ! samegp_function_operand (operands[0], Pmode)
+;;   && (peep2_regno_dead_p (1, 29)
+;;       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+;;  [(parallel [(call (mem:DI (match_dup 2))
+;;		    (match_dup 1))
+;;	      (use (reg:DI 29))
+;;	      (use (match_dup 0))
+;;	      (use (match_dup 3))
+;;	      (clobber (reg:DI 26))])]
+;;{
+;;  if (CONSTANT_P (operands[0]))
+;;    {
+;;      operands[2] = gen_rtx_REG (Pmode, 27);
+;;      operands[3] = GEN_INT (sw_64_next_sequence_number++);
+;;      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
+;;				      operands[0], operands[3]));
+;;    }
+;;  else
+;;    {
+;;      operands[2] = operands[0];
+;;      operands[0] = const0_rtx;
+;;      operands[3] = const0_rtx;
+;;    }
+;;})
+
+;;(define_peephole2
+;;  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand"))
+;;		    (match_operand 1))
+;;	      (use (reg:DI 29))
+;;	      (clobber (reg:DI 26))])]
+;;  "TARGET_EXPLICIT_RELOCS && reload_completed
+;;   && ! samegp_function_operand (operands[0], Pmode)
+;;   && ! (peep2_regno_dead_p (1, 29)
+;;	 || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+;;  [(parallel [(call (mem:DI (match_dup 2))
+;;		    (match_dup 1))
+;;	      (set (match_dup 5)
+;;		   (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1))
+;;	      (use (match_dup 0))
+;;	      (use (match_dup 4))
+;;	      (clobber (reg:DI 26))])
+;;   (set (match_dup 5)
+;;	(unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))]
+;;{
+;;  if (CONSTANT_P (operands[0]))
+;;    {
+;;      operands[2] = gen_rtx_REG (Pmode, 27);
+;;      operands[4] = GEN_INT (sw_64_next_sequence_number++);
+;;      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
+;;				      operands[0], operands[4]));
+;;    }
+;;  else
+;;    {
+;;      operands[2] = operands[0];
+;;      operands[0] = const0_rtx;
+;;      operands[4] = const0_rtx;
+;;    }
+;;  operands[3] = GEN_INT (sw_64_next_sequence_number++);
+;;  operands[5] = pic_offset_table_rtx;
+;;})
+
+
+(define_insn "*call_osf_2_er_nogp"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (use (match_operand 2))
+   (use (match_operand 3 "const_int_operand"))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS"
+  "call $26,(%0),%2%J3"
+  [(set_attr "type" "call")])
+
+
+(define_insn "*call_osf_2_er_setfpec0"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 2))
+   (use (match_operand 3 "const_int_operand"))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3 "
+  "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4"
+  [(set_attr "type" "call")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_osf_2_er_setfpec1"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 2))
+   (use (match_operand 3 "const_int_operand"))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1 "
+  "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4"
+  [(set_attr "type" "call")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_osf_2_er"
+  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
+	 (match_operand 1))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 2))
+   (use (match_operand 3 "const_int_operand"))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS"
+  "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4"
+  [(set_attr "type" "call")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_osf_1_noreturn"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   call $26,($27),0
+   bsr $26,$%0..ng
+   call $26,%0"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_osf_1"
+  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
+	 (match_operand 1))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS"
+  "@
+   call $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%0..ng
+   call $26,%0\;ldgp $29,0($26)"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*sibcall_osf_1_er"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
+	 (match_operand 1))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "TARGET_EXPLICIT_RELOCS"
+  "@
+   br $31,%0\t\t!samegp
+   ldl $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,8")])
+
+;; Note that the assembler expands "jmp foo" with $at, which
+;; doesn't do what we want.
+(define_insn "*sibcall_osf_1"
+  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
+	 (match_operand 1))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "! TARGET_EXPLICIT_RELOCS"
+  "@
+   br $31,$%0..ng
+   ldi $27,%0\;jmp $31,($27),%0"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,8")])
+
+;; Call subroutine returning any type.
+
+(define_expand "untyped_call"
+  [(parallel [(call (match_operand 0)
+		    (const_int 0))
+	      (match_operand 1)
+	      (match_operand 2)])]
+  ""
+{
+  int i;
+
+  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
+
+  for (i = 0; i < XVECLEN (operands[2], 0); i++)
+    {
+      rtx set = XVECEXP (operands[2], 0, i);
+      emit_move_insn (SET_DEST (set), SET_SRC (set));
+    }
+
+  /* The optimizer does not know that the call sets the function value
+     registers we stored in the result block.  We avoid problems by
+     claiming that all hard registers are used and clobbered at this
+     point.  */
+  emit_insn (gen_blockage ());
+
+  DONE;
+})
+
+;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
+;; all of memory.  This blocks insns from being moved across this point.
+
+(define_insn "blockage"
+  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
+  ""
+  ""
+  [(set_attr "length" "0")
+   (set_attr "type" "none")])
+
+(define_insn "jump"
+  [(set (pc)
+	(label_ref (match_operand 0)))]
+  ""
+  "br $31,%l0"
+  [(set_attr "type" "ibr")])
+
+;; "ret $31,($26),1"
+(define_expand "return"
+  [(return)]
+  "direct_return ()")
+
+(define_insn "*return_internal"
+  [(return)]
+  "reload_completed"
+{
+    return "ret $31,($26),1";
+}
+
+  [(set_attr "type" "ibr")])
+
+(define_insn "indirect_jump"
+  [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "tablejump"
+  [(parallel [(set (pc)
+		   (match_operand 0 "register_operand"))
+	      (use (label_ref:DI (match_operand 1)))])]
+  ""
+{
+      rtx dest = gen_reg_rtx (DImode);
+      emit_insn (gen_extendsidi2 (dest, operands[0]));
+      emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest));
+      operands[0] = dest;
+})
+
+(define_insn "*tablejump_internal"
+  [(set (pc)
+	(match_operand:DI 0 "register_operand" "r"))
+   (use (label_ref (match_operand 1)))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+;; call_pal->sys_call 0x86
+;; Cache flush.  Used by sw_64_trampoline_init.  0x86 is PAL_imb, but we don't
+;; want to have to include pal.h in our .s file.
+(define_insn "imb"
+  [(unspec_volatile [(const_int 0)] UNSPECV_IMB)]
+  ""
+  "sys_call 0x86"
+  [(set_attr "type" "callpal")])
+
+(define_expand "clear_cache"
+  [(match_operand:DI 0)		; region start
+   (match_operand:DI 1)]		; region end
+  ""
+{
+  emit_insn (gen_imb ());
+  DONE;
+})
+
+;; call_pal ->sys_call 0x80
+;; BUGCHK is documented common to SYSV PALcode.
+(define_insn "trap"
+  [(trap_if (const_int 1) (const_int 0))
+   (use (reg:DI 29))]
+  ""
+  "sys_call 0x80"
+  [(set_attr "type" "callpal")])
+
+;; For userland, we load the thread pointer from the TCB.
+;; For the kernel, we load the per-cpu private value.
+
+;; call_pal->sys_call xx
+(define_insn "get_thread_pointerdi"
+  [(set (match_operand:DI 0 "register_operand" "=v")
+	(unspec:DI [(const_int 0)] UNSPEC_TP))]
+  ""
+{
+  if (TARGET_TLS_KERNEL)
+    return "sys_call 0x32";
+  else if (flag_sw_rtid == 1)
+    return "rtid %0";
+  else
+    return "sys_call 0x9e";
+    ;;return "rtid %0";
+}
+  [(set_attr "type" "callpal")])
+
+;; For completeness, and possibly a __builtin function, here's how to
+;; set the thread pointer.  Since we don't describe enough of this
+;; quantity for CSE, we have to use a volatile unspec, and then there's
+;; not much point in creating an R16_REG register class.
+
+(define_expand "set_thread_pointerdi"
+  [(set (reg:DI 16) (match_operand:DI 0 "input_operand"))
+   (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
+  "")
+
+;; call_pal->sys_call xx
+(define_insn "*set_tp"
+  [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
+  ""
+{
+  if (TARGET_TLS_KERNEL)
+    return "sys_call 0x31";
+  else
+    return "sys_call 0x9f";
+}
+  [(set_attr "type" "callpal")])
+
+
+;; Finally, we have the basic data motion insns.  The byte and word insns
+;; are done via define_expand.  Start with the floating-point insns, since
+;; they are simpler.
+
+(define_expand "movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand")
+	(match_operand:SF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], SFmode))
+    operands[1] = force_reg (SFmode, operands[1]);
+})
+
+(define_insn "*movsf"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
+	(match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
+  "register_operand (operands[0], SFmode)
+   || reg_or_0_operand (operands[1], SFmode)"
+  "@
+   fcpys %R1,%R1,%0
+   fld%,%U1 %0,%1
+   bis $31,%r1,%0
+   ldw %0,%1
+   fst%,%U0 %R1,%0
+   stw %r1,%0
+   ifmovs %1,%0
+   fimovs %1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")
+   (set_attr "isa" "*,*,*,*,*,*,fix,fix")])
+
+(define_expand "movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand")
+	(match_operand:DF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], DFmode))
+    operands[1] = force_reg (DFmode, operands[1]);
+})
+(define_insn "*movdf"
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
+	(match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
+  "register_operand (operands[0], DFmode)
+   || reg_or_0_operand (operands[1], DFmode)"
+  "@
+   fcpys %R1,%R1,%0
+   fld%-%U1 %0,%1
+   bis $31,%r1,%0
+   ldl %0,%1
+   fst%-%U0 %R1,%0
+   stl %r1,%0
+   ifmovd %1,%0
+   fimovd %1,%0"
+  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")
+   (set_attr "isa" "*,*,*,*,*,*,fix,fix")])
+
+;; Subregs suck for register allocation.  Pretend we can move TFmode
+;; data between general registers until after reload.
+;; ??? Is this still true now that we have the lower-subreg pass?
+
+(define_expand "movtf"
+  [(set (match_operand:TF 0 "nonimmediate_operand")
+	(match_operand:TF 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], TFmode))
+    operands[1] = force_reg (TFmode, operands[1]);
+})
+
+(define_insn_and_split "*movtf_internal"
+  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,m")
+	(match_operand:TF 1 "input_operand" "rmG,rG"))]
+  "register_operand (operands[0], TFmode)
+   || reg_or_0_operand (operands[1], TFmode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+  "sw_64_split_tmode_pair (operands, TFmode, true);")
+
+;; We do two major things here: handle mem->mem and construct long
+;; constants.
+
+(define_expand "movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand")
+	(match_operand:SI 1 "general_operand"))]
+  ""
+{
+  if (sw_64_expand_mov (SImode, operands))
+    DONE;
+})
+
+(define_insn "*movsi"
+  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,r")
+	(match_operand:SI 1 "input_operand" "rJ,K,L,T,s,n,m,rJ,s"))]
+  "register_operand (operands[0], SImode)
+   || reg_or_0_operand (operands[1], SImode)"
+  "@
+   bis $31,%r1,%0
+   ldi %0,%1($31)
+   ldih %0,%h1($31)
+   #
+   #
+   #
+   ldw%U1 %0,%1
+   stw%U0 %r1,%0
+   ldi %0,%1"
+  [(set_attr "type" "ilog,iadd,iadd,iadd,iadd,multi,ild,ist,ldsym")
+   (set_attr "isa" "*,*,*,*,*,*,*,*,vms")])
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operand:SI 1 "non_add_const_operand"))]
+  ""
+  [(const_int 0)]
+{
+  if (sw_64_split_const_mov (SImode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_insn "*movdi_er_low_l"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
+		   (match_operand:DI 2 "local_symbolic_operand")))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+  if (true_regnum (operands[1]) == 29)
+    return "ldi %0,%2(%1)\t\t!gprel";
+  else
+    return "ldi %0,%2(%1)\t\t!gprellow";
+}
+  [(set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "small_symbolic_operand"))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(lo_sum:DI (match_dup 2) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "local_symbolic_operand"))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(plus:DI (match_dup 2) (high:DI (match_dup 1))))
+   (set (match_dup 0)
+	(lo_sum:DI (match_dup 0) (match_dup 1)))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_split
+  [(match_operand 0 "some_small_symbolic_operand")]
+  ""
+  [(match_dup 0)]
+  "operands[0] = split_small_symbolic_operand (operands[0]);")
+
+;; Accepts any symbolic, not just global, since function calls that
+;; don't go via bsr still use !literal in hopes of linker relaxation.
+(define_insn "movdi_er_high_g"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_LITERAL))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldl %0,%2(%1)\t\t!literal";
+  else
+    return "ldl %0,%2(%1)\t\t!literal!%3";
+}
+  [(set_attr "type" "ldsym")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "global_symbolic_operand"))]
+  "TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)
+		    (const_int 0)] UNSPEC_LITERAL))]
+  "operands[2] = pic_offset_table_rtx;")
+
+(define_insn "movdi_er_tlsgd"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_TLSGD))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldi %0,%2(%1)\t\t!tlsgd";
+  else
+    return "ldi %0,%2(%1)\t\t!tlsgd!%3";
+}
+[(set_attr "cannot_copy" "true")])
+
+
+(define_insn "*movdi_er_tlsrelgot"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_TLSRELGOT))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[3]) == 0)
+    return "ldih %0,%2(%1)\t\t!tlsrel_got";
+  else
+    return "ldih %0,%2(%1)\t\t!tlsrel_got!%3";
+}
+[(set_attr "cannot_copy" "true")])
+
+
+(define_insn "movdi_er_tlsldm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand")]
+		   UNSPEC_TLSLDM))]
+  "HAVE_AS_TLS"
+{
+  if (INTVAL (operands[2]) == 0)
+    return "ldi %0,%&(%1)\t\t!tlsldm";
+  else
+    return "ldi %0,%&(%1)\t\t!tlsldm!%2";
+}
+[(set_attr "cannot_copy" "true")])
+
+;; insert ldih insn with tlsrelgot relocation before ldl insn with gotdtprel relocation.
+(define_insn "*movdi_er_gotdtprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_GOTDTPREL))]
+  "HAVE_AS_TLS"
+{
+    if (INTVAL (operands[3]) == 0)
+	return "ldl %0,%2(%1)\t\t!gotdtprel";
+    else
+	return "ldl %0,%2(%1)\t\t!gotdtprel!%3";
+}
+[(set_attr "type" "ild")
+ (set_attr "usegp" "yes")])
+
+(define_insn "*movdi_er_gotdtp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")]
+		   UNSPEC_DTPREL))]
+  "HAVE_AS_TLS"
+  "ldl %0,%2(%1)\t\t!gotdtprel"
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gotdtp_symbolic_operand"))]
+  "HAVE_AS_TLS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)] UNSPEC_DTPREL))]
+{
+  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
+  operands[2] = pic_offset_table_rtx;
+})
+
+(define_insn "*movdi_er_gottprel"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")
+		    (match_operand 3 "const_int_operand")]
+		   UNSPEC_TPREL))]
+  "HAVE_AS_TLS"
+{
+    if (INTVAL (operands[3]) == 0)
+	return "ldl %0,%2(%1)\t\t!gottprel";
+    else
+	return "ldl %0,%2(%1)\t\t!gottprel!%3";
+}
+[(set_attr "type" "ild")
+ (set_attr "usegp" "yes")])
+
+(define_insn "*movdi_er_gottp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand:DI 2 "symbolic_operand")]
+		   UNSPEC_TPREL))]
+  "HAVE_AS_TLS"
+  "ldl %0,%2(%1)\t\t!gottprel"
+  [(set_attr "type" "ild")
+   (set_attr "usegp" "yes")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "gottp_symbolic_operand"))]
+  "HAVE_AS_TLS && reload_completed"
+  [(set (match_dup 0)
+	(unspec:DI [(match_dup 2)
+		    (match_dup 1)] UNSPEC_TPREL))]
+{
+  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
+  operands[2] = pic_offset_table_rtx;
+})
+(define_insn "*movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand"
+				"=r,r,r,r,r,r,r,r, m, *f,*f, Q, r,*f")
+	(match_operand:DI 1 "input_operand"
+				"rJ,K,L,T,s,n,s,m,rJ,*fJ, Q,*f,*f, r"))]
+  "register_operand (operands[0], DImode)
+   || reg_or_0_operand (operands[1], DImode)"
+  "@
+   mov %r1,%0
+   ldi %0,%1($31)
+   ldih %0,%h1($31)
+   #
+   #
+   #
+   ldi %0,%1
+   ldl%A1%U1 %0,%1
+   stl%A0%U0 %r1,%0
+   fmov %R1,%0
+   fldd%U1 %0,%1
+   fstd%U0 %R1,%0
+   fimovd %1,%0
+   ifmovd %1,%0"
+  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof")
+   (set_attr "isa" "*,*,*,er,er,*,ner,*,*,*,*,*,fix,fix")
+   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*,*")])
+
+(define_insn "force_movdi"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")]
+			    UNSPECV_FORCE_MOV))]
+  ""
+  "mov %1,%0"
+  [(set_attr "type" "ilog")])
+
+;; We do three major things here: handle mem->mem, put 64-bit constants in
+;; memory, and construct long 32-bit constants.
+
+(define_expand "movdi"
+  [(set (match_operand:DI 0 "nonimmediate_operand")
+	(match_operand:DI 1 "general_operand"))]
+  ""
+{
+  if (sw_64_expand_mov (DImode, operands))
+    DONE;
+})
+
+;; Split a load of a large constant into the appropriate two-insn
+;; sequence.
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "non_add_const_operand"))]
+  ""
+  [(const_int 0)]
+{
+  if (sw_64_split_const_mov (DImode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; We need to prevent reload from splitting TImode moves, because it
+;; might decide to overwrite a pointer with the value it points to.
+;; In that case we have to do the loads in the appropriate order so
+;; that the pointer is not destroyed too early.
+
+(define_insn_and_split "*movti_internal"
+  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,m")
+	(match_operand:TI 1 "input_operand" "rmJ,rJ"))]
+  "(register_operand (operands[0], TImode)
+    /* Prevent rematerialization of constants.  */
+    && ! CONSTANT_P (operands[1]))
+   || reg_or_0_operand (operands[1], TImode)"
+  "#"
+  "reload_completed"
+  [(set (match_dup 0) (match_dup 2))
+   (set (match_dup 1) (match_dup 3))]
+  "sw_64_split_tmode_pair (operands, TImode, true);")
+
+(define_expand "movti"
+  [(set (match_operand:TI 0 "nonimmediate_operand")
+	(match_operand:TI 1 "general_operand"))]
+  ""
+{
+  if (MEM_P (operands[0])
+      && ! reg_or_0_operand (operands[1], TImode))
+    operands[1] = force_reg (TImode, operands[1]);
+
+  if (operands[1] == const0_rtx)
+    ;
+  /* We must put 64-bit constants in memory.  We could keep the
+     32-bit constants in TImode and rely on the splitter, but
+     this doesn't seem to be worth the pain.  */
+  else if (CONST_SCALAR_INT_P (operands[1]))
+    {
+      rtx in[2], out[2], target;
+
+      gcc_assert (can_create_pseudo_p ());
+
+      split_double (operands[1], &in[0], &in[1]);
+
+      if (in[0] == const0_rtx)
+	out[0] = const0_rtx;
+      else
+	{
+	  out[0] = gen_reg_rtx (DImode);
+	  emit_insn (gen_movdi (out[0], in[0]));
+	}
+
+      if (in[1] == const0_rtx)
+	out[1] = const0_rtx;
+      else
+	{
+	  out[1] = gen_reg_rtx (DImode);
+	  emit_insn (gen_movdi (out[1], in[1]));
+	}
+
+      if (!REG_P (operands[0]))
+	target = gen_reg_rtx (TImode);
+      else
+	target = operands[0];
+
+      emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0]));
+      emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1]));
+
+      if (target != operands[0])
+	emit_insn (gen_rtx_SET (operands[0], target));
+
+      DONE;
+    }
+})
+
+;; These are the partial-word cases.
+;;
+;; First we have the code to load an aligned word.  Operand 0 is the register
+;; in which to place the result.  It's mode is QImode or HImode.  Operand 1
+;; is an SImode MEM at the low-order byte of the proper word.  Operand 2 is the
+;; number of bits within the word that the value is.  Operand 3 is an SImode
+;; scratch register.  If operand 0 is a hard register, operand 3 may be the
+;; same register.  It is allowed to conflict with operand 1 as well.
+
+(define_expand "aligned_loadqi"
+  [(set (match_operand:SI 3 "register_operand")
+	(match_operand:SI 1 "memory_operand"))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (subreg:DI (match_dup 3) 0)
+			 (const_int 8)
+			 (match_operand:DI 2 "const_int_operand")))])
+
+(define_expand "aligned_loadhi"
+  [(set (match_operand:SI 3 "register_operand")
+	(match_operand:SI 1 "memory_operand"))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (subreg:DI (match_dup 3) 0)
+			 (const_int 16)
+			 (match_operand:DI 2 "const_int_operand")))])
+
+;; Similar for unaligned loads, where we use the sequence from the
+;; Sw_64 Architecture manual.  We have to distinguish between little-endian
+;; and big-endian systems as the sequences are different.
+;;
+;; Operand 1 is the address.  Operands 2 and 3 are temporaries, where
+;; operand 3 can overlap the input and output registers.
+
+(define_expand "unaligned_loadqi"
+  [(set (match_operand:DI 2 "register_operand")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 8)
+			 (ashift:DI (match_dup 3) (const_int 3))))])
+
+(define_expand "unaligned_loadhi"
+  [(set (match_operand:DI 2 "register_operand")
+	(mem:DI (and:DI (match_operand:DI 1 "address_operand")
+			(const_int -8))))
+   (set (match_operand:DI 3 "register_operand")
+	(match_dup 1))
+   (set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_dup 2)
+			 (const_int 16)
+			 (ashift:DI (match_dup 3) (const_int 3))))])
+
+;; Storing an aligned byte or word requires two temporaries.  Operand 0 is the
+;; aligned SImode MEM.  Operand 1 is the register containing the
+;; byte or word to store.  Operand 2 is the number of bits within the word that
+;; the value should be placed.  Operands 3 and 4 are SImode temporaries.
+
+(define_expand "aligned_store"
+  [(set (match_operand:SI 3 "register_operand")
+	(match_operand:SI 0 "memory_operand"))
+   (set (subreg:DI (match_dup 3) 0)
+	(and:DI (subreg:DI (match_dup 3) 0) (match_dup 5)))
+   (set (subreg:DI (match_operand:SI 4 "register_operand") 0)
+	(ashift:DI (zero_extend:DI (match_operand 1 "register_operand"))
+		   (match_operand:DI 2 "const_int_operand")))
+   (set (subreg:DI (match_dup 4) 0)
+	(ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0)))
+   (set (match_dup 0) (match_dup 4))]
+  ""
+{
+  operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1]))
+			    << INTVAL (operands[2])));
+})
+
+;; For the unaligned byte and halfword cases, we use code similar to that
+;; in the ;; Architecture book, but reordered to lower the number of registers
+;; required.  Operand 0 is the address.  Operand 1 is the data to store.
+;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may
+;; be the same temporary, if desired.  If the address is in a register,
+;; operand 2 can be that register.
+
+(define_expand "@unaligned_store<mode>"
+  [(set (match_operand:DI 3 "register_operand")
+	(mem:DI (and:DI (match_operand:DI 0 "address_operand")
+			(const_int -8))))
+   (set (match_operand:DI 2 "register_operand")
+	(match_dup 0))
+   (set (match_dup 3)
+	(and:DI (not:DI (ashift:DI (match_dup 5)
+				   (ashift:DI (match_dup 2) (const_int 3))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand")
+	(ashift:DI (zero_extend:DI
+		     (match_operand:I12MODE 1 "register_operand"))
+		   (ashift:DI (match_dup 2) (const_int 3))))
+   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
+   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
+	(match_dup 4))]
+  ""
+  "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));")
+
+;; Here are the define_expand's for QI and HI moves that use the above
+;; patterns.  We have the normal sets, plus the ones that need scratch
+;; registers for reload.
+
+(define_expand "mov<mode>"
+  [(set (match_operand:I12MODE 0 "nonimmediate_operand")
+	(match_operand:I12MODE 1 "general_operand"))]
+  ""
+{
+  if (sw_64_expand_mov (<MODE>mode, operands))
+    DONE;
+})
+
+(define_insn "*movqi"
+  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))]
+  "register_operand (operands[0], QImode)
+   || reg_or_0_operand (operands[1], QImode)"
+  "@
+   bis $31,%r1,%0
+   ldi %0,%L1($31)
+   ldbu%U1 %0,%1
+   stb%U0 %r1,%0"
+  [(set_attr "type" "ilog,iadd,ild,ist")
+   (set_attr "isa" "*,*,bwx,bwx")])
+
+(define_insn "*movhi"
+  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
+	(match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))]
+  "register_operand (operands[0], HImode)
+   || reg_or_0_operand (operands[1], HImode)"
+  "@
+   bis $31,%r1,%0
+   ldi %0,%L1($31)
+   ldhu%U1 %0,%1
+   sth%U0 %r1,%0"
+  [(set_attr "type" "ilog,iadd,ild,ist")
+   (set_attr "isa" "*,*,bwx,bwx")])
+
+;; Helpers for the above.  The way reload is structured, we can't
+;; always get a proper address for a stack slot during reload_foo
+;; expansion, so we must delay our address manipulations until after.
+
+(define_insn_and_split "@reload_in<mode>_aligned"
+  [(set (match_operand:I12MODE 0 "register_operand" "=r")
+	(match_operand:I12MODE 1 "memory_operand" "m"))]
+  "!TARGET_BWX && (reload_in_progress || reload_completed)"
+  "#"
+  "!TARGET_BWX && reload_completed"
+  [(const_int 0)]
+{
+  rtx aligned_mem, bitnum;
+  get_aligned_mem (operands[1], &aligned_mem, &bitnum);
+  emit_insn (gen_aligned_load<reloadmode>
+	     (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum,
+	      gen_rtx_REG (SImode, REGNO (operands[0]))));
+  DONE;
+})
+
+(define_mode_iterator VEC [V8QI V4HI V2SI])
+(define_mode_iterator VEC12 [V8QI V4HI])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand")
+	(match_operand:VEC 1 "general_operand"))]
+  ""
+{
+  if (sw_64_expand_mov (<MODE>mode, operands))
+    DONE;
+})
+
+(define_split
+  [(set (match_operand:VEC 0 "register_operand")
+	(match_operand:VEC 1 "non_zero_const_operand"))]
+  ""
+  [(const_int 0)]
+{
+  if (sw_64_split_const_mov (<MODE>mode, operands))
+    DONE;
+  else
+    FAIL;
+})
+
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:VEC 0 "nonimmediate_operand")
+	(match_operand:VEC 1 "general_operand"))]
+  "flag_sw_unalign_byte != 1 || !TARGET_SW8A"
+{
+  sw_64_expand_movmisalign (<MODE>mode, operands);
+  DONE;
+})
+
+(define_insn "*mov<mode>_fix"
+  [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f")
+	(match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))]
+  "register_operand (operands[0], <MODE>mode)
+   || reg_or_0_operand (operands[1], <MODE>mode)"
+  "@
+   bis $31,%r1,%0
+   #
+   ldl%A1%U1 %0,%1
+   stl%A0%U0 %r1,%0
+   fcpys %R1,%R1,%0
+   fldd%U1 %0,%1
+   fstd%U0 %R1,%0
+   fimovd %1,%0
+   ifmovd %1,%0"
+  [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof")
+   (set_attr "isa" "*,*,*,*,*,*,*,fix,fix")])
+
+(define_insn "<code><mode>3"
+  [(set (match_operand:VEC12 0 "register_operand" "=r")
+	(any_maxmin:VEC12
+	 (match_operand:VEC12 1 "reg_or_0_operand" "rW")
+	 (match_operand:VEC12 2 "reg_or_0_operand" "rW")))]
+  "TARGET_MAX"
+  "<maxmin><modesuffix> %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_insn "one_cmpl<mode>2"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(not:VEC (match_operand:VEC 1 "register_operand" "r")))]
+  ""
+  "ornot $31,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "and<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(and:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "and %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*andnot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r"))
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "bic %2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "ior<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(ior:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "bis %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*iornot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r"))
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "ornot %2,%1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "xor<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(xor:VEC (match_operand:VEC 1 "register_operand" "r")
+		 (match_operand:VEC 2 "register_operand" "r")))]
+  ""
+  "xor %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "*xornot<mode>3"
+  [(set (match_operand:VEC 0 "register_operand" "=r")
+	(not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r")
+			  (match_operand:VEC 2 "register_operand" "r"))))]
+  ""
+  "eqv %1,%2,%0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "vec_shl_<mode>"
+  [(set (match_operand:VEC 0 "register_operand")
+	(ashift:DI (match_operand:VEC 1 "register_operand")
+		   (match_operand:DI 2 "reg_or_6bit_operand")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+(define_expand "vec_shr_<mode>"
+  [(set (match_operand:VEC 0 "register_operand")
+	(lshiftrt:DI (match_operand:VEC 1 "register_operand")
+		     (match_operand:DI 2 "reg_or_6bit_operand")))]
+  ""
+{
+  operands[0] = gen_lowpart (DImode, operands[0]);
+  operands[1] = gen_lowpart (DImode, operands[1]);
+})
+
+;; Bit field extract patterns which use ext[wlq][lh]
+
+(define_expand "extvmisaligndi"
+  [(set (match_operand:DI 0 "register_operand")
+	(sign_extract:DI (match_operand:BLK 1 "memory_operand")
+			 (match_operand:DI 2 "const_int_operand")
+			 (match_operand:DI 3 "const_int_operand")))]
+  ""
+{
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+
+  sw_64_expand_unaligned_load (operands[0], operands[1],
+			       INTVAL (operands[2]) / 8,
+			       INTVAL (operands[3]) / 8, 1);
+  DONE;
+})
+
+(define_expand "extzvdi"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_operand:DI 1 "register_operand")
+			 (match_operand:DI 2 "const_int_operand")
+			 (match_operand:DI 3 "const_int_operand")))]
+  ""
+{
+  /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 8
+	  && INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+})
+
+(define_expand "extzvmisaligndi"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extract:DI (match_operand:BLK 1 "memory_operand")
+			 (match_operand:DI 2 "const_int_operand")
+			 (match_operand:DI 3 "const_int_operand")))]
+  ""
+{
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.
+     We fail 8-bit fields, falling back on a simple byte load.  */
+  if (INTVAL (operands[3]) % 8 != 0
+      || (INTVAL (operands[2]) != 16
+	  && INTVAL (operands[2]) != 32
+	  && INTVAL (operands[2]) != 64))
+    FAIL;
+
+  sw_64_expand_unaligned_load (operands[0], operands[1],
+			       INTVAL (operands[2]) / 8,
+			       INTVAL (operands[3]) / 8, 0);
+  DONE;
+})
+
+(define_expand "insvmisaligndi"
+  [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand")
+			 (match_operand:DI 1 "const_int_operand")
+			 (match_operand:DI 2 "const_int_operand"))
+	(match_operand:DI 3 "register_operand"))]
+  ""
+{
+  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
+  if (INTVAL (operands[2]) % 8 != 0
+      || (INTVAL (operands[1]) != 16
+	  && INTVAL (operands[1]) != 32
+	  && INTVAL (operands[1]) != 64))
+    FAIL;
+
+  sw_64_expand_unaligned_store (operands[0], operands[3],
+				INTVAL (operands[1]) / 8,
+				INTVAL (operands[2]) / 8);
+  DONE;
+})
+
+;; Block move/clear, see sw_64.c for more details.
+;; Argument 0 is the destination
+;; Argument 1 is the source
+;; Argument 2 is the length
+;; Argument 3 is the alignment
+
+(define_expand "cpymemqi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+		   (match_operand:BLK 1 "memory_operand"))
+	      (use (match_operand:DI 2 "immediate_operand"))
+	      (use (match_operand:DI 3 "immediate_operand"))])]
+  "flag_sw_unalign_byte != 1  || !TARGET_SW8A"
+{
+  if (sw_64_expand_block_move (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+(define_expand "setmemqi"
+  [(parallel [(set (match_operand:BLK 0 "memory_operand")
+		   (match_operand 2 "const_int_operand"))
+	      (use (match_operand:DI 1 "immediate_operand"))
+	      (use (match_operand:DI 3 "immediate_operand"))])]
+  "flag_sw_unalign_byte != 1  || !TARGET_SW8A"
+{
+  /* If value to set is not zero, use the library routine.  */
+  if (operands[2] != const0_rtx)
+    FAIL;
+
+  if (sw_64_expand_block_clear (operands))
+    DONE;
+  else
+    FAIL;
+})
+
+;; Subroutine of stack space allocation.  Perform a stack probe.
+(define_expand "stack_probe_internal"
+  [(set (match_dup 1) (match_operand:DI 0 "const_int_operand"))]
+  ""
+{
+  operands[1] = gen_rtx_MEM (DImode, plus_constant (Pmode, stack_pointer_rtx,
+						    INTVAL (operands[0])));
+  MEM_VOLATILE_P (operands[1]) = 1;
+
+  operands[0] = const0_rtx;
+})
+
+;; This is how we allocate stack space.  If we are allocating a
+;; constant amount of space and we know it is less than 4096
+;; bytes, we need do nothing.
+;;
+;; If it is more than 4096 bytes, we need to probe the stack
+;; periodically.
+(define_expand "allocate_stack"
+  [(set (reg:DI 30)
+	(plus:DI (reg:DI 30)
+		 (match_operand:DI 1 "reg_or_cint_operand")))
+   (set (match_operand:DI 0 "register_operand" "=r")
+	(match_dup 2))]
+  ""
+{
+  if (CONST_INT_P (operands[1])
+      && INTVAL (operands[1]) < 32768)
+    {
+      if (INTVAL (operands[1]) >= 4096)
+	{
+	  /* We do this the same way as in the prologue and generate explicit
+	     probes.  Then we update the stack by the constant.  */
+
+	  int probed = 4096;
+
+	  emit_insn (gen_stack_probe_internal (GEN_INT (- probed)));
+	  while (probed + 8192 < INTVAL (operands[1]))
+	    emit_insn (gen_stack_probe_internal
+		       (GEN_INT (- (probed += 8192))));
+
+	  if (probed + 4096 < INTVAL (operands[1]))
+	    emit_insn (gen_stack_probe_internal
+		       (GEN_INT (- INTVAL (operands[1]))));
+	}
+
+      operands[1] = GEN_INT (- INTVAL (operands[1]));
+      operands[2] = virtual_stack_dynamic_rtx;
+    }
+  else
+    {
+      rtx_code_label *out_label = 0;
+      rtx_code_label *loop_label = gen_label_rtx ();
+      rtx want = gen_reg_rtx (Pmode);
+      rtx tmp = gen_reg_rtx (Pmode);
+      rtx memref, test;
+
+      emit_insn (gen_subdi3 (want, stack_pointer_rtx,
+			     force_reg (Pmode, operands[1])));
+
+      if (!CONST_INT_P (operands[1]))
+	{
+	  rtx limit = GEN_INT (4096);
+	  out_label = gen_label_rtx ();
+	  test = gen_rtx_LTU (VOIDmode, operands[1], limit);
+	  emit_jump_insn
+	    (gen_cbranchdi4 (test, operands[1], limit, out_label));
+	}
+
+      emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096)));
+      emit_label (loop_label);
+      memref = gen_rtx_MEM (DImode, tmp);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_move_insn (memref, const0_rtx);
+      emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (-8192)));
+      test = gen_rtx_GTU (VOIDmode, tmp, want);
+      emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label));
+
+      memref = gen_rtx_MEM (DImode, want);
+      MEM_VOLATILE_P (memref) = 1;
+      emit_move_insn (memref, const0_rtx);
+
+      if (out_label)
+	emit_label (out_label);
+
+      emit_move_insn (stack_pointer_rtx, want);
+      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
+      DONE;
+    }
+})
+
+;; This is used by sw_64_expand_prolog to do the same thing as above,
+;; except we cannot at that time generate new basic blocks, so we hide
+;; the loop in this one insn.
+
+(define_insn "prologue_stack_probe_loop"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
+		     (match_operand:DI 1 "register_operand" "r")]
+		    UNSPECV_PSPL)]
+  ""
+{
+  operands[2] = gen_label_rtx ();
+  (*targetm.asm_out.internal_label) (asm_out_file, "L",
+			     CODE_LABEL_NUMBER (operands[2]));
+
+  return "stl $31,-8192(%1)\;subl %0,1,%0\;ldi %1,-8192(%1)\;bne %0,%l2";
+}
+  [(set_attr "length" "16")
+   (set_attr "type" "multi")])
+
+(define_expand "prologue"
+  [(const_int 0)]
+  ""
+{
+  sw_64_expand_prologue ();
+  DONE;
+})
+
+;; These take care of emitting the ldgp insn in the prologue.  This will be
+;; an ldi/ldih pair and we want to align them properly.  So we have two
+;; unspec_volatile insns, the first of which emits the ldgp assembler macro
+;; and the second of which emits nothing.  However, both are marked as type
+;; IADD (the default) so the alignment code in sw_64.c does the right thing
+;; with them.
+
+(define_expand "prologue_ldgp"
+  [(set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))]
+  ""
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 27);
+  operands[2] = (TARGET_EXPLICIT_RELOCS
+		 ? GEN_INT (sw_64_next_sequence_number++)
+		 : const0_rtx);
+})
+
+(define_insn "*ldgp_er_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_LDGP1))]
+  "TARGET_EXPLICIT_RELOCS"
+  "ldih %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
+		    (match_operand 2 "const_int_operand")]
+		   UNSPEC_LDGP2))]
+  "TARGET_EXPLICIT_RELOCS"
+  "ldi %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*exc_ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+       (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+		   (match_operand 2 "const_int_operand")]
+		  UNSPECV_LDGP2))]
+  "TARGET_EXPLICIT_RELOCS"
+  "ldi %0,0(%1)\t\t!gpdisp!%2"
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_er_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_PLDGP2))]
+  "TARGET_EXPLICIT_RELOCS"
+{
+   return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:";
+}
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_LDGP1))]
+  ""
+{
+   return "ldgp %0,0(%1)\n$%~..ng:";
+}
+  [(set_attr "cannot_copy" "true")])
+
+(define_insn "*prologue_ldgp_2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
+			     (match_operand 2 "const_int_operand")]
+			    UNSPECV_PLDGP2))]
+  ""
+)
+
+(define_insn "hardware_prefetch_use_syscall"
+[(unspec_volatile  [
+(match_operand:DI 0 "register_operand" "=r")
+(match_operand:DI 1 "register_operand" "=r")
+] UNSPECV_HARDWARE_PREFETCH_CNT)]
+""
+{
+	return  "ldi $16,110($31)\;ldi $18,1($31)\;ldi $19,120($30)\;\
+stl %0,120($30)\;\
+ldl $27,syscall($29)\t\t!literal!%#\;call $26,($27),syscall\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*\;"
+			;
+}
+[(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+;; The _mcount profiling hook has special calling conventions, and
+;; does not clobber all the registers that a normal call would.  So
+;; hide the fact this is a call at all.
+
+(define_insn "prologue_mcount"
+  [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)]
+  ""
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    /* Note that we cannot use a lituse_jsr reloc, since _mcount
+       cannot be called via the PLT.  */
+    return "ldl $28,_mcount($29)\t\t!literal\;call $28,($28),_mcount";
+  else
+    return "ldi $28,_mcount\;call $28,($28),_mcount";
+}
+  [(set_attr "type" "multi")
+   (set_attr "length" "8")])
+
+(define_insn "init_fp"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(match_operand:DI 1 "register_operand" "r"))
+   (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))]
+  ""
+  "bis $31,%1,%0")
+
+(define_expand "epilogue"
+  [(return)]
+  ""
+  "sw_64_expand_epilogue ();")
+
+(define_expand "sibcall_epilogue"
+  [(return)]
+  ""
+{
+  sw_64_expand_epilogue ();
+  DONE;
+})
+
+(define_expand "builtin_longjmp"
+  [(use (match_operand:DI 0 "register_operand" "r"))]
+  ""
+{
+  /* The elements of the buffer are, in order:  */
+  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
+  rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 8));
+  rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 16));
+  rtx pv = gen_rtx_REG (Pmode, 27);
+
+  /* This bit is the same as expand_builtin_longjmp.  */
+  emit_move_insn (pv, lab);
+  emit_stack_restore (SAVE_NONLOCAL, stack);
+  emit_use (hard_frame_pointer_rtx);
+  emit_use (stack_pointer_rtx);
+
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+  /* Load the label we are jumping through into $27 so that we know
+     where to look for it when we get back to setjmp's function for
+     restoring the gp.  */
+  emit_jump_insn (gen_builtin_longjmp_internal (pv));
+  emit_barrier ();
+  DONE;
+})
+
+;; This is effectively a copy of indirect_jump, but constrained such
+;; that register renaming cannot foil our cunning plan with $27.
+(define_insn "builtin_longjmp_internal"
+  [(set (pc)
+	(unspec_volatile [(match_operand:DI 0 "register_operand" "c")]
+			 UNSPECV_LONGJMP))]
+  ""
+  "jmp $31,(%0),0"
+  [(set_attr "type" "ibr")])
+
+(define_expand "builtin_setjmp_receiver"
+  [(unspec_volatile [(label_ref (match_operand 0))] UNSPECV_SETJMPR)]
+  "")
+
+(define_insn_and_split "*builtin_setjmp_receiver_1"
+  [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR)]
+  ""
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    return "#";
+  else
+    return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)";
+}
+  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 1)
+	(unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1))
+   (set (match_dup 1)
+	(unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))]
+{
+  if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0))
+    emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]),
+					UNSPECV_SETJMPR_ER));
+  operands[1] = pic_offset_table_rtx;
+  operands[2] = gen_rtx_REG (Pmode, 27);
+  operands[3] = GEN_INT (sw_64_next_sequence_number++);
+}
+  [(set_attr "length" "12")
+   (set_attr "type" "multi")])
+
+(define_insn "*builtin_setjmp_receiver_er_sl_1"
+  [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR_ER)]
+  "TARGET_EXPLICIT_RELOCS"
+  "ldi $27,$LSJ%=-%l0($27)\n$LSJ%=:")
+
+;; When flag_reorder_blocks_and_partition is in effect, compiler puts
+;; exception landing pads in a cold section.  To prevent inter-section offset
+;; calculation, a jump to original landing pad is emitted in the place of the
+;; original landing pad.  Since landing pad is moved, RA-relative GP
+;; calculation in the prologue of landing pad breaks.  To solve this problem,
+;; we use alternative GP load approach.
+
+(define_expand "exception_receiver"
+  [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)]
+  ""
+{
+  if (flag_reorder_blocks_and_partition)
+    operands[0] = copy_rtx (sw_64_gp_save_rtx ());
+  else
+    operands[0] = const0_rtx;
+})
+
+(define_insn "*exception_receiver_2"
+  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)]
+  "flag_reorder_blocks_and_partition"
+  "ldl $29,%0"
+  [(set_attr "type" "ild")])
+
+(define_insn_and_split "*exception_receiver_1"
+  [(unspec_volatile [(const_int 0)] UNSPECV_EHR)]
+  ""
+{
+  if (TARGET_EXPLICIT_RELOCS)
+    return "#";
+  else
+    return "ldgp $29,0($26)";
+}
+  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
+  [(set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
+   (set (match_dup 0)
+	(unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_LDGP2))]
+{
+  operands[0] = pic_offset_table_rtx;
+  operands[1] = gen_rtx_REG (Pmode, 26);
+  operands[2] = GEN_INT (sw_64_next_sequence_number++);
+}
+  [(set_attr "length" "8")
+   (set_attr "type" "multi")])
+
+;; Prefetch data.
+;;
+;;
+;; On SW6, these become official prefetch instructions.
+
+(define_insn "prefetch"
+  [(prefetch (match_operand:DI 0 "address_operand" "p")
+	     (match_operand:DI 1 "const_int_operand" "n")
+	     (match_operand:DI 2 "const_int_operand" "n"))]
+  "sw_64_cpu == PROCESSOR_SW6 || sw_64_cpu == PROCESSOR_SW8"
+{
+  /* Interpret "no temporal locality" as this data should be evicted once
+     it is used.  The "evict next" alternatives load the data into the cache
+     and leave the LRU eviction counter pointing to that block.  */
+     static const char * alt[2][2] ;
+  if (flag_sw_prefetch_l1)
+  {
+      alt[0][0] = "fillcs_e %a0" ;  /* read, evict next.  */
+      alt[0][1] = "fillcs %a0" ;  /* read, evict next.  */
+      alt[1][0] = "fillde_e %a0" ;  /* write, evict next.  */
+      alt[1][1] = "fillde %a0" ;  /* write, evict next.  */
+
+  }
+  else
+  {
+      alt[0][0] = "s_fillde %a0" ;  /* read, evict next.  */
+      alt[0][1] = "s_fillcs %a0" ;  /* read, evict next.  */
+      alt[1][0] = "fillde_e %a0" ;  /* write, evict next.  */
+      alt[1][1] = "fillde %a0" ;  /* write, evict next.  */
+  }
+
+  bool write = INTVAL (operands[1]) != 0;
+  bool lru = INTVAL (operands[2]) != 0;
+
+  return alt[write][lru];
+}
+  [(set_attr "type" "ild")])
+
+
+;; Close the trap shadow of preceding instructions.  This is generated
+;; by sw_64_reorg.
+
+(define_insn "trapb"
+  [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)]
+  ""
+  "memb"
+  [(set_attr "type" "misc")])
+
+;; No-op instructions used by machine-dependent reorg to preserve
+;; alignment for instruction issue.
+;; The Unicos/Mk assembler does not support these opcodes.
+
+(define_insn "nop"
+  [(const_int 0)]
+  ""
+  "nop"
+  [(set_attr "type" "ilog")])
+
+(define_insn "fnop"
+  [(const_int 1)]
+  "TARGET_FP"
+  "fcpys $f31,$f31,$f31"
+  [(set_attr "type" "fcpys")])
+
+(define_insn "unop"
+  [(const_int 2)]
+  ""
+  "ldl_u $31,0($30)")
+
+(define_insn "realign"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
+		    UNSPECV_REALIGN)]
+  ""
+  ".align %0 #realign")
+
+;; Instructions to be emitted from __builtins.
+
+(define_insn "builtin_cmpbge"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_CMPBGE))]
+  ""
+  "cmpgeb %r1,%2,%0"
+  ;; The SW6 data sheets list this as ILOG.  OTOH, SW6 doesn't
+  ;; actually differentiate between ILOG and ICMP in the schedule.
+  [(set_attr "type" "icmp")])
+
+(define_expand "extbl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (8), operands[2]));
+  DONE;
+})
+
+(define_expand "extwl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "extll"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "extql"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insbl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  operands[1] = gen_lowpart (QImode, operands[1]);
+  emit_insn (gen_insbl (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_inswl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  operands[1] = gen_lowpart (HImode, operands[1]);
+  emit_insn (gen_inswl (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_insll"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  operands[1] = gen_lowpart (SImode, operands[1]);
+  emit_insn (gen_insll (operands[0], operands[1], operands[2]));
+  DONE;
+})
+
+(define_expand "inswh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "inslh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "insqh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "mskbl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = GEN_INT (0xff);
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskwl"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = GEN_INT (0xffff);
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskll"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = gen_int_mode (0xffffffff, DImode);
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskql"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  rtx mask = constm1_rtx;
+  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
+  DONE;
+})
+
+(define_expand "mskwh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2]));
+  DONE;
+})
+
+(define_expand "msklh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2]));
+  DONE;
+})
+
+(define_expand "mskqh"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "register_operand")
+   (match_operand:DI 2 "reg_or_8bit_operand")]
+  ""
+{
+  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2]));
+  DONE;
+})
+
+(define_expand "builtin_zap"
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(match_operand:DI 2 "reg_or_cint_operand")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx mask = sw_64_expand_zap_mask (INTVAL (operands[2]));
+
+      if (mask == const0_rtx)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      if (mask == constm1_rtx)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_insn "*builtin_zap_1"
+  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))]
+  ""
+  "@
+   #
+   #
+   bis $31,$31,%0
+   zap %r1,%2,%0"
+  [(set_attr "type" "shift,shift,ilog,shift")])
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "const_int_operand")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "const_int_operand")))]
+  ""
+  [(const_int 0)]
+{
+  rtx mask = sw_64_expand_zap_mask (INTVAL (operands[2]));
+
+  operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode);
+  emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(match_operand:QI 2 "const_int_operand")]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "register_operand")))]
+  ""
+  [(set (match_dup 0)
+	(and:DI (match_dup 1) (match_dup 2)))]
+{
+  operands[2] = sw_64_expand_zap_mask (INTVAL (operands[2]));
+  if (operands[2] == const0_rtx)
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      DONE;
+    }
+  if (operands[2] == constm1_rtx)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_expand "builtin_zapnot"
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (unspec:DI
+		  [(not:QI (match_operand:DI 2 "reg_or_cint_operand"))]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_cint_operand")))]
+  ""
+{
+  if (CONST_INT_P (operands[2]))
+    {
+      rtx mask = sw_64_expand_zap_mask (~ INTVAL (operands[2]));
+
+      if (mask == const0_rtx)
+	{
+	  emit_move_insn (operands[0], const0_rtx);
+	  DONE;
+	}
+      if (mask == constm1_rtx)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      operands[1] = force_reg (DImode, operands[1]);
+      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
+      DONE;
+    }
+
+  operands[1] = force_reg (DImode, operands[1]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_insn "*builtin_zapnot_1"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(and:DI (unspec:DI
+		  [(not:QI (match_operand:QI 2 "register_operand" "r"))]
+		  UNSPEC_ZAP)
+		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
+  ""
+  "zapnot %r1,%2,%0"
+  [(set_attr "type" "shift")])
+
+(define_insn "builtin_amask"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")]
+		   UNSPEC_AMASK))]
+  ""
+  "amask %1,%0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "builtin_implver"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(const_int 0)] UNSPEC_IMPLVER))]
+  ""
+  "implver %0"
+  [(set_attr "type" "ilog")])
+
+(define_insn "builtin_rpcc"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))]
+  ""
+  "rtc %0"
+  [(set_attr "type" "ilog")])
+
+(define_expand "builtin_minub8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minsb8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minuw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_minsw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxub8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxsb8"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxuw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_expand "builtin_maxsw4"
+  [(match_operand:DI 0 "register_operand")
+   (match_operand:DI 1 "reg_or_0_operand")
+   (match_operand:DI 2 "reg_or_0_operand")]
+  "TARGET_MAX"
+{
+  sw_64_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0],
+				     operands[1], operands[2]);
+  DONE;
+})
+
+(define_insn "builtin_perr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
+		    (match_operand:DI 2 "reg_or_8bit_operand" "rJ")]
+		   UNSPEC_PERR))]
+  "TARGET_MAX"
+  "perr %r1,%r2,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_pklb"
+  [(set (match_operand:DI 0 "register_operand")
+	(vec_concat:V8QI
+	  (vec_concat:V4QI
+	    (truncate:V2QI (match_operand:DI 1 "register_operand"))
+	    (match_dup 2))
+	  (match_dup 3)))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V8QImode, operands[0]);
+  operands[1] = gen_lowpart (V2SImode, operands[1]);
+  operands[2] = CONST0_RTX (V2QImode);
+  operands[3] = CONST0_RTX (V4QImode);
+})
+
+(define_insn "*pklb"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	  (vec_concat:V4QI
+	    (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r"))
+	    (match_operand:V2QI 2 "const0_operand"))
+	  (match_operand:V4QI 3 "const0_operand")))]
+  "TARGET_MAX"
+  "pklb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_pkwb"
+  [(set (match_operand:DI 0 "register_operand")
+	(vec_concat:V8QI
+	  (truncate:V4QI (match_operand:DI 1 "register_operand"))
+	  (match_dup 2)))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V8QImode, operands[0]);
+  operands[1] = gen_lowpart (V4HImode, operands[1]);
+  operands[2] = CONST0_RTX (V4QImode);
+})
+
+(define_insn "*pkwb"
+  [(set (match_operand:V8QI 0 "register_operand" "=r")
+	(vec_concat:V8QI
+	  (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r"))
+	  (match_operand:V4QI 2 "const0_operand")))]
+  "TARGET_MAX"
+  "pkwb %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_unpkbl"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:V2SI
+	  (vec_select:V2QI (match_operand:DI 1 "register_operand")
+			   (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V2SImode, operands[0]);
+  operands[1] = gen_lowpart (V8QImode, operands[1]);
+})
+
+(define_insn "*unpkbl"
+  [(set (match_operand:V2SI 0 "register_operand" "=r")
+	(zero_extend:V2SI
+	  (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+			   (parallel [(const_int 0) (const_int 1)]))))]
+  "TARGET_MAX"
+  "unpkbl %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(define_expand "builtin_unpkbw"
+  [(set (match_operand:DI 0 "register_operand")
+	(zero_extend:V4HI
+	  (vec_select:V4QI (match_operand:DI 1 "register_operand")
+			   (parallel [(const_int 0)
+				      (const_int 1)
+				      (const_int 2)
+				      (const_int 3)]))))]
+  "TARGET_MAX"
+{
+  operands[0] = gen_lowpart (V4HImode, operands[0]);
+  operands[1] = gen_lowpart (V8QImode, operands[1]);
+})
+
+(define_insn "*unpkbw"
+  [(set (match_operand:V4HI 0 "register_operand" "=r")
+	(zero_extend:V4HI
+	  (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
+			   (parallel [(const_int 0)
+				      (const_int 1)
+				      (const_int 2)
+				      (const_int 3)]))))]
+  "TARGET_MAX"
+  "unpkbw %r1,%0"
+  [(set_attr "type" "mvi")])
+
+(include "sync.md")
+
+;; The call patterns are at the end of the file because their
+;; wildcard operand0 interferes with nice recognition.
+
+(define_insn "*call_value_osf_1_er_noreturn"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   call $26,($27),0
+   bsr $26,%1\t\t!samegp
+   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),%1\t\t!lituse_jsr!%#"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,*,8")])
+
+(define_insn "*call_value_osf_1_er_setfpec0"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3 "
+  "@
+   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%1\t\t!samegp
+   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*call_value_osf_1_er_setfpec1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1"
+  "@
+   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%1\t\t!samegp
+   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*call_value_osf_1_er"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS"
+  "@
+   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
+   bsr $26,%1\t\t!samegp
+   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.  Consider the case of { bar (); while (1); }.
+(define_peephole2
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand:DI 1 "call_operand"))
+			 (match_operand 2)))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && reload_completed
+   && ! samegp_function_operand (operands[1], Pmode)
+   && (peep2_regno_dead_p (1, 29)
+       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (match_dup 2)))
+	      (use (reg:DI 29))
+	      (use (match_dup 1))
+	      (use (match_dup 4))
+	      (clobber (reg:DI 26))])]
+{
+  if (CONSTANT_P (operands[1]))
+    {
+      operands[3] = gen_rtx_REG (Pmode, 27);
+      operands[4] = GEN_INT (sw_64_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
+				      operands[1], operands[4]));
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[1] = const0_rtx;
+      operands[4] = const0_rtx;
+    }
+})
+
+(define_peephole2
+  [(parallel [(set (match_operand 0)
+		   (call (mem:DI (match_operand:DI 1 "call_operand"))
+			 (match_operand 2)))
+	      (use (reg:DI 29))
+	      (clobber (reg:DI 26))])]
+  "TARGET_EXPLICIT_RELOCS && reload_completed
+   && ! samegp_function_operand (operands[1], Pmode)
+   && ! (peep2_regno_dead_p (1, 29)
+	 || find_reg_note (insn, REG_NORETURN, NULL_RTX))
+   && !enable_asan_check_stack ()"
+  [(parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (match_dup 2)))
+	      (set (match_dup 6)
+		   (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (match_dup 5))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 6)
+	(unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  if (CONSTANT_P (operands[1]))
+    {
+      operands[3] = gen_rtx_REG (Pmode, 27);
+      operands[5] = GEN_INT (sw_64_next_sequence_number++);
+      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
+				      operands[1], operands[5]));
+    }
+  else
+    {
+      operands[3] = operands[1];
+      operands[1] = const0_rtx;
+      operands[5] = const0_rtx;
+    }
+  operands[4] = GEN_INT (sw_64_next_sequence_number++);
+  operands[6] = pic_offset_table_rtx;
+})
+
+(define_insn "*call_value_osf_2_er_nogp"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (use (match_operand 3))
+   (use (match_operand 4))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS"
+  "call $26,(%1),%3%J4"
+  [(set_attr "type" "call")])
+
+(define_insn "*call_value_osf_2_er"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
+	      (match_operand 2)))
+   (set (reg:DI 29)
+	(unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand")]
+		   UNSPEC_LDGP1))
+   (use (match_operand 3))
+   (use (match_operand 4))
+   (clobber (reg:DI 26))]
+  "TARGET_EXPLICIT_RELOCS"
+  {
+    return "call $26,(%1),%3%J4\;ldih $29,0($26)\t\t!gpdisp!%5";
+  }
+  [(set_attr "type" "call")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "8")])
+
+(define_insn "*call_value_osf_1_noreturn"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS
+   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
+  "@
+   call $26,($27),0
+   bsr $26,$%1..ng
+   call $26,%1"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,*,8")])
+
+(define_int_iterator TLS_CALL
+	[UNSPEC_TLSGD_CALL
+	 UNSPEC_TLSLDM_CALL])
+
+(define_int_attr tls
+	[(UNSPEC_TLSGD_CALL "tlsgd")
+	 (UNSPEC_TLSLDM_CALL "tlsldm")])
+
+(define_insn "call_value_osf_<tls>"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+	      (const_int 0)))
+   (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "HAVE_AS_TLS"
+  "ldl $27,%1($29)\t\t!literal!%2\;call $26,($27),%1\t\t!lituse_<tls>!%2\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
+  [(set_attr "type" "call")
+   (set_attr "cannot_copy" "true")
+   (set_attr "length" "16")])
+
+;; We must use peep2 instead of a split because we need accurate life
+;; information for $gp.
+(define_peephole2
+  [(parallel
+    [(set (match_operand 0)
+	  (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+		(const_int 0)))
+     (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+     (use (reg:DI 29))
+     (clobber (reg:DI 26))])]
+  "HAVE_AS_TLS && reload_completed
+   && peep2_regno_dead_p (1, 29)"
+  [(set (match_dup 3)
+	(unspec:DI [(match_dup 5)
+		    (match_dup 1)
+		    (match_dup 2)] UNSPEC_LITERAL))
+   (parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (const_int 0)))
+	      (use (match_dup 5))
+	      (use (match_dup 1))
+	      (use (unspec [(match_dup 2)] TLS_CALL))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  operands[3] = gen_rtx_REG (Pmode, 27);
+  operands[4] = GEN_INT (sw_64_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+})
+
+(define_peephole2
+  [(parallel
+    [(set (match_operand 0)
+	  (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
+		(const_int 0)))
+     (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
+     (use (reg:DI 29))
+     (clobber (reg:DI 26))])]
+  "HAVE_AS_TLS && reload_completed
+   && !peep2_regno_dead_p (1, 29)
+   && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)"
+  [(set (match_dup 3)
+	(unspec:DI [(match_dup 5)
+		    (match_dup 1)
+		    (match_dup 2)] UNSPEC_LITERAL))
+   (parallel [(set (match_dup 0)
+		   (call (mem:DI (match_dup 3))
+			 (const_int 0)))
+	      (set (match_dup 5)
+		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
+	      (use (match_dup 1))
+	      (use (unspec [(match_dup 2)] TLS_CALL))
+	      (clobber (reg:DI 26))])
+   (set (match_dup 5)
+	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
+{
+  operands[3] = gen_rtx_REG (Pmode, 27);
+  operands[4] = GEN_INT (sw_64_next_sequence_number++);
+  operands[5] = pic_offset_table_rtx;
+})
+
+
+(define_insn "*call_value_osf_1_setfpec0"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3"
+  "@
+  call $26,($27),0\;ldgp $29,0($26)
+  bsr $26,$%1..ng
+  call $26,%1\;ldgp $29,0($26)"
+  [(set_attr "type" "call")
+ (set_attr "length" "12,*,16")])
+
+(define_insn "*call_value_osf_1_setfpec1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1"
+  "@
+   call $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%1..ng
+   call $26,%1\;ldgp $29,0($26)"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*call_value_osf_1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
+	      (match_operand 2)))
+   (use (reg:DI 29))
+   (clobber (reg:DI 26))]
+  "! TARGET_EXPLICIT_RELOCS"
+  "@
+   call $26,($27),0\;ldgp $29,0($26)
+   bsr $26,$%1..ng
+   call $26,%1\;ldgp $29,0($26)"
+  [(set_attr "type" "call")
+   (set_attr "length" "12,*,16")])
+
+(define_insn "*sibcall_value_osf_1_er"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
+	      (match_operand 2)))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "TARGET_EXPLICIT_RELOCS"
+  "@
+   br $31,%1\t\t!samegp
+   ldl $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,8")])
+
+(define_insn "*sibcall_value_osf_1"
+  [(set (match_operand 0)
+	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
+	      (match_operand 2)))
+   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
+  "! TARGET_EXPLICIT_RELOCS"
+  "@
+   br $31,$%1..ng
+   ldi $27,%1\;jmp $31,($27),%1"
+  [(set_attr "type" "call")
+   (set_attr "length" "*,8")])
+
+;; Builtins to replace 1.0f/sqrtf(x) with instructions using RSQRTE and the
+;; appropriate fixup.
+;; Currently, does not work with the double precision floating-point.(0x5fe6eb000000000a)
+(define_expand "rsqrtsf2"
+   [(match_operand:SF 0 "register_operand" "")
+    (match_operand:SF 1 "register_operand" "")]
+   "TARGET_FP && flag_reciprocal_math == 1 && flag_sw_rsqrt == 1"
+  {
+       sw_64_emit_rsqrt (operands[0], operands[1], 1);
+       DONE;
+  })
+
+(define_insn "*movsf2"
+  [(set (match_operand:SF 0 "nonimmediate_operand" "=r")
+	(unspec:SF [(match_operand:SF 1 "input_operand" "f")]
+		   UNSPEC_FIMOVS))]
+ "TARGET_FP && flag_reciprocal_math == 1 && flag_sw_rsqrt == 1"
+ "fimovs %1,%0"
+  [(set_attr "type" "ldsym")])
+
+(define_insn "speculation_barrier"
+  [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
+  ""
+  "imemb"
+  [(set_attr "type" "misc")])
+
+(define_insn "stack_tie"
+  [(set (mem:BLK (scratch))
+  (unspec:BLK [(match_operand:DI 0 "register_operand" "r")
+	 (match_operand:DI 1 "register_operand" "r")]
+	UNSPEC_TIE))]
+  ""
+  ""
+  [(set_attr "length" "0")]
+)
+
+(include "m32.md")
diff --git a/gcc/config/sw_64/sw_64.opt b/gcc/config/sw_64/sw_64.opt
new file mode 100644
index 0000000000000000000000000000000000000000..fdb6304a283e7226f471dfe94db6ddf15e6f8246
--- /dev/null
+++ b/gcc/config/sw_64/sw_64.opt
@@ -0,0 +1,318 @@
+; Options for the Sw_64 port of the compiler
+;
+; Copyright (C) 2005-2020 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+fsw-sf-cmpsel
+Target Var(flag_sw_sf_cmpsel) Init(0)
+use or not use SF cmp/br/selcet instructions.
+
+msw-use-32align
+C C++ Fortran LTO Driver Target Mask(SW_32ALIGN) Save
+Use or not use 32align.
+
+fsw-hardware-prefetch
+Target Var(flag_sw_hardware_prefetch) Init(0)
+set hardware_prefetch registers:PFH_CTL,PFH_CNT.
+
+fsw-hardware-prefetch-clt=
+Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_clt) Init(5) Optimization
+
+fsw-hardware-prefetch-cnt-l1=
+Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l1) Init(0) Optimization
+
+fsw-hardware-prefetch-cnt-l2=
+Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l2) Init(0) Optimization
+
+fsw-hardware-prefetch-cnt-l3=
+Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l3) Init(5) Optimization
+
+fsw-fselect
+Target Var(flag_sw_fselect) Init(0)
+Use or not use less instructions for sel/fsel.
+
+fsw-branch-fusion
+Target Var(flag_sw_branch_fusion) Init(1)
+fuse the cbranch instructions.
+
+fsw-branch-combination
+Target Var(flag_sw_branch_combination) Init(0)
+combine the cbranch instructions.
+
+fsw-unalign-byte
+Target Var(flag_sw_unalign_byte) Init(0)
+Not use or use ldl_u/stl_u instructions.
+
+fsw-rev
+Target Report Var(flag_sw_rev) Init(1)
+Use or not use rev instruction.
+
+fsw-cmov
+Target Report Var(flag_sw_cmov) Init(1)
+Use added floating-point integer conversion instruction.
+
+fsw-bitop
+Target Report Var(flag_sw_bitop) Init(0)
+Use ISA bit operate instructions.
+
+fsw-shift-word
+Target Report Var(flag_sw_shift_word) Init(1)
+Use or not use sw8a shift instructions.
+
+fsw-int-divmod
+Target Report Var(flag_sw_int_divmod) Init(1)
+Use or not use int div/mod instructions.
+
+fsw-fprnd
+Target Report Var(flag_sw_fprnd) Init(0)
+Use float-point rounding instructions.
+
+fsw-auto-inc-dec
+Target Var(flag_sw_auto_inc_dec) Init(0)
+Use or not use int auto-inc-dec load/store instructions.
+
+fsw-use-cas
+Target Var(flag_sw_use_cas) Init(1)
+Use or no use compare and swap instruction.
+
+fsw-fma
+Target Report Var(flag_sw_fma) Init(1)
+Add fma option.
+
+fsw-sdsame
+Target Report Var(flag_sw_sdsame) Init(0)
+For des and src same.
+;;;;;;;;;;;;;;;;;;;;;;;;;
+
+fsw-rsqrt
+Target Report Var(flag_sw_rsqrt) Init(0)
+Fast calculation of 1.0f/sqrtf (x). Does not work with double precision floating-point.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+fsw-fast-math
+Target Report Var(flag_sw_fast_math) Init(0)
+Avoid spec2017-628 error fast-math.  The corresponding code is in gcc/gimple-match-head.c.
+;;;;;;;;;;;;;;;;;;;;;;;;;
+
+fsw-nofcpys
+Target Var(flag_sw_nofcpys) Init(1)
+delete fcpys after fcvtsd instruction.
+
+fsw-rtid
+Target Var(flag_sw_rtid) Init(1)
+Use rtid instead of syscall 0x9e.
+
+fsw-rtx-cost
+Target Var(flag_sw_rtx_cost) Init(0)
+Adjust the rtx-cost.
+
+fsw-sxaddl
+Target Var(flag_sw_sxaddl) Init(1)
+Combine the sXaddl instructions.
+
+fsw-delnop
+Target Var(flag_sw_delnop) Init(1)
+Delete the nop instruction.
+
+fsw-int-div-opt
+Target Report Var(flag_sw_int_div_opt) Init(0)
+SW div opt.
+
+fsw-prefetch-l1
+Target Var(flag_sw_prefetch_l1) Init(1)
+Use l1 load prefetch instead of L2.
+
+fsw-prefetch-add
+Target Var(flag_sw_prefetch_add) Init(1)
+generate prefetch for cases like stream add.
+
+fsw-prefetch-unroll
+Target Var(flag_sw_prefetch_unroll) Init(0)
+Optimize loop unroll in the prefetch pass.
+
+msoft-float
+Target Report Mask(SOFT_FP)
+Do not use hardware fp.
+
+fsw-recip
+Target Report Var(flag_sw_recip) Init(0)
+Use ISA floating reciprocal instructions.
+
+fsw-recip-precision
+Target Report Var(flag_sw_recip_precision) Init(0)
+Assume that the reciprocal estimate instructions provide more accuracy.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+mfp-regs
+Target Report Mask(FPREGS)
+Use fp registers.
+
+mgas
+Target Ignore
+Does nothing.  Preserved for backward compatibility.
+
+mieee-conformant
+Target RejectNegative Mask(IEEE_CONFORMANT)
+Request IEEE-conformant math library routines (SYSV).
+
+mieee
+Target Report RejectNegative Mask(IEEE)
+Emit IEEE-conformant code, without inexact exceptions.
+
+mieee-main
+Target Report RejectNegative Mask(IEEE_MAIN)
+Emit IEEE-conformant code, without inexact exceptions.
+
+mieee-with-inexact
+Target Report RejectNegative Mask(IEEE_WITH_INEXACT)
+
+mbuild-constants
+Target Report Mask(BUILD_CONSTANTS)
+Do not emit complex integer constants to read-only memory.
+
+mfloat-vax
+Target Report RejectNegative Mask(FLOAT_VAX)
+Use VAX fp.
+
+mfloat-ieee
+Target Report RejectNegative InverseMask(FLOAT_VAX)
+Do not use VAX fp.
+
+mbwx
+Target Report Mask(BWX)
+Emit code for the byte/word ISA extension.
+
+mmax
+Target Report Mask(MAX)
+Emit code for the motion video ISA extension.
+
+mfix
+Target Report Mask(FIX)
+Emit code for the fp move and sqrt ISA extension.
+
+mcix
+Target Report Mask(CIX)
+Emit code for the counting ISA extension.
+
+msw6a
+Target Report Mask(SW6A)
+Emit code for the SW6A ISA extension.
+
+msw6b
+Target Report Mask(SW6B)
+Emit code for the SW6B ISA extension.
+
+msw8a
+Target Report Mask(SW8A)
+Emit code for the SW8A ISA extension.
+
+mexplicit-relocs
+Target Report Mask(EXPLICIT_RELOCS)
+Emit code using explicit relocation directives.
+
+msmall-data
+Target Report RejectNegative Mask(SMALL_DATA)
+Emit 16-bit relocations to the small data areas.
+
+mlarge-data
+Target Report RejectNegative InverseMask(SMALL_DATA)
+Emit 32-bit relocations to the small data areas.
+
+msmall-text
+Target Report RejectNegative Mask(SMALL_TEXT)
+Emit direct branches to local functions.
+
+mlarge-text
+Target Report RejectNegative InverseMask(SMALL_TEXT)
+Emit indirect branches to local functions.
+
+mtls-kernel
+Target Report Mask(TLS_KERNEL)
+Emit rdval for thread pointer.
+
+mlong-double-128
+Target Report RejectNegative Mask(LONG_DOUBLE_128)
+Use 128-bit long double.
+
+mlong-double-64
+Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
+Use 64-bit long double.
+
+mcpu=
+Target RejectNegative Joined Var(sw_64_cpu_string)
+Use features of and schedule given CPU.
+
+mtune=
+Target RejectNegative Joined Var(sw_64_tune_string)
+Schedule given CPU.
+
+mfp-rounding-mode=
+Target RejectNegative Joined Var(sw_64_fprm_string)
+Control the generated fp rounding mode.
+
+mfp-trap-mode=
+Target RejectNegative Joined Var(sw_64_fptm_string)
+Control the IEEE trap mode.
+
+mtrap-precision=
+Target RejectNegative Joined Var(sw_64_tp_string)
+Control the precision given to fp exceptions.
+
+mmemory-latency=
+Target RejectNegative Joined Var(sw_64_mlat_string)
+Tune expected memory latency.
+
+mtls-size=
+Target RejectNegative Joined UInteger Var(sw_64_tls_size) Init(32)
+Specify bit size of immediate TLS offsets.
+
+msimd
+C C++ Fortran Driver Target Mask(SW_SIMD) Save
+Support SW SIMD built-in functions and code generation.
+
+mgprel-size=
+Target RejectNegative Joined UInteger Var(sw_64_gprel_size) Init(16)
+Specify bit size of gprel relocation offsets.
+
+mtls-tlsgd=
+Target RejectNegative Joined UInteger Var(sw_64_tls_gd) Init(16)
+Specify the bitsize of tlsgd relocation offset relative GP.
+
+mtls-tlsldm=
+Target RejectNegative Joined UInteger Var(sw_64_tls_ldm) Init(16)
+Specify the bitsize of tlsldm relocation offset relative GP.
+
+mtls-gotdtprel=
+Target RejectNegative Joined UInteger Var(sw_64_tls_gotdtprel) Init(16)
+Specify the bitsize of gotdtprel relocation offset relative GP.
+
+mtls-gottprel=
+Target RejectNegative Joined UInteger Var(sw_64_tls_gottprel) Init(16)
+Specify the bitsize of gottprel relocation offset relative GP.
+
+mlra
+Target Report Var(sh_lra_flag) Init(0) Save
+Use reload instead of LRA (transitional).
+
+mtrunc
+C Fortran Driver Target Mask(SW_TRUNC) Save
+Support fix_trunc code generation.
+
+m32
+C ObjC C++ ObjC++ LTO Fortran Driver Target Report Mask(SW_M32) Init(0)
+M32 optimization.
diff --git a/gcc/config/sw_64/sync.md b/gcc/config/sw_64/sync.md
new file mode 100644
index 0000000000000000000000000000000000000000..71fd0478e21ca55779f61b364323ec869c4ba240
--- /dev/null
+++ b/gcc/config/sw_64/sync.md
@@ -0,0 +1,499 @@
+;; GCC machine description for Sw_64 synchronization instructions.
+;; Copyright (C) 2005-2020 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_code_iterator FETCHOP [plus minus ior xor and])
+(define_code_attr fetchop_name
+  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
+(define_code_attr fetchop_pred
+  [(plus "add_operand") (minus "reg_or_8bit_operand")
+   (ior "or_operand") (xor "or_operand") (and "and_operand")])
+(define_code_attr fetchop_constr
+  [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "rINM")])
+
+
+(define_expand "memory_barrier"
+  [(set (match_dup 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+{
+  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
+  MEM_VOLATILE_P (operands[0]) = 1;
+})
+
+;; mb-> memb
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
+  ""
+  "memb"
+  [(set_attr "type" "mb")])
+
+(define_insn "write_memory_barrier"
+  [(unspec:BLK [(const_int 0)] UNSPEC_MB)]
+  "TARGET_SW8A"
+  "wmemb"
+  [(set_attr "type" "mb")])
+
+;; "ld<modesuffix>_l %0,%1"
+(define_insn "@load_locked_<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=r")
+	(unspec_volatile:I48MODE
+	  [(match_operand:I48MODE 1 "memory_operand" "m")]
+	  UNSPECV_LL))]
+  ""
+ {
+  switch ('<modesuffix>')
+    {
+	case 'w':
+	  return "ldi %0,%1\;lldw %0,0(%0)";
+	case 'l':
+	  return "ldi %0,%1\;lldl %0,0(%0)";
+	default:
+	 return "ld<modesuffix>_l %0,%1";
+    }
+  }
+  [(set_attr "type" "ld_l")])
+
+;; "st<modesuffix>_c %0,%1"
+(define_insn "@store_conditional_<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
+   (set (match_operand:I48MODE 1 "memory_operand" "=m")
+	(match_operand:I48MODE 2 "reg_or_0_operand" "0"))
+  (clobber (reg:DI 28))]
+  ""
+  {
+    switch ('<modesuffix>')
+    {
+	case 'w':
+	  return "ldi $28,%1\;lstw %0,0($28)";
+	case 'l':
+	  return "ldi $28,%1\;lstl %0,0($28)";
+	default:
+	  return "st<modesuffix>_c %0,%1";
+      }
+  }
+  [(set_attr "type" "st_c")])
+
+   (define_insn "builtin_rd_f"
+      [(set (match_operand:DI 0 "register_operand" "=r")
+	  (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))]
+  ""
+  "rd_f %0"
+  [(set_attr "type" "st_c")])
+
+   (define_insn "builtin_wr_f"
+     [(match_operand:DI 0 "register_operand" "r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_LL)]
+   ""
+   "wr_f %0"
+   [(set_attr "type" "ld_l")])
+
+;; The Sw_64 Architecture Handbook says that it is UNPREDICTABLE whether
+;; the lock is cleared by a normal load or store.  This means we cannot
+;; expand a ll/sc sequence before reload, lest a register spill is
+;; inserted inside the sequence.  It is also UNPREDICTABLE whether the
+;; lock is cleared by a TAKEN branch.  This means that we can not expand
+;; a ll/sc sequence containing a branch (i.e. compare-and-swap) until after
+;; the final basic-block reordering pass.
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(parallel
+     [(set (match_operand:DI 0 "register_operand")	  ;; bool out
+	   (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+      (set (match_operand:I48MODE 1 "register_operand")	  ;; val out
+	   (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
+      (set (match_operand:I48MODE 2 "memory_operand")	  ;; memory
+	   (unspec_volatile:I48MODE
+	     [(match_dup 2)
+	      (match_operand:I48MODE 3 "reg_or_8bit_operand")  ;; expected
+	      (match_operand:I48MODE 4 "add_operand")	  ;; desired
+	      (match_operand:SI 5 "const_int_operand")	  ;; is_weak
+	      (match_operand:SI 6 "const_int_operand")	  ;; succ model
+	      (match_operand:SI 7 "const_int_operand")	  ;; fail model
+	      (match_operand:DI 8 "register_operand")]
+	     UNSPECV_CMPXCHG))
+   (clobber (reg:DI 28))])]
+  ""
+{
+  if (<MODE>mode == SImode)
+    {
+      operands[3] = convert_modes (DImode, SImode, operands[3], 0);
+      operands[4] = convert_modes (DImode, SImode, operands[4], 0);
+    }
+  if (TARGET_SW8A)
+    {
+      if (flag_sw_use_cas)
+	{
+	  if (CONST_INT_P (operands[3]))
+	    operands[3] = force_reg (DImode, operands[3]);
+
+	  if (CONST_INT_P (operands[4]))
+	    operands[4] = force_reg (DImode, operands[4]);
+      emit_insn (gen_atomic_compare_and_swap<mode>_target_sw8a (operands[0],
+								operands[1],
+								operands[2],
+								operands[3],
+								operands[4],
+								operands[5],
+								operands[6],
+								operands[7]));
+      DONE;
+	}
+    }
+})
+
+(define_insn_and_split "*atomic_compare_and_swap<mode>"
+  [(set (match_operand:DI 0 "register_operand" "=&r")		;; bool out
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:I48MODE 1 "register_operand" "=&r")	;; val out
+	(unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:I48MODE 2 "memory_operand" "+m")		;; memory
+	(unspec_volatile:I48MODE
+	  [(match_dup 2)
+	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")	;; expected
+	   (match_operand:DI 4 "add_operand" "rKL")		;; desired
+	   (match_operand:SI 5 "const_int_operand")		;; is_weak
+	   (match_operand:SI 6 "const_int_operand")		;; succ model
+	   (match_operand:SI 7 "const_int_operand")		;; fail model
+	   (match_operand:DI 8 "register_operand" "r")]
+	   UNSPECV_CMPXCHG))
+	(clobber (reg:DI 28))]
+
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_compare_and_swap (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "atomic_compare_and_swap<mode>"
+  [(match_operand:DI 0 "register_operand")		;; bool out
+   (match_operand:I12MODE 1 "register_operand")		;; val out
+   (match_operand:I12MODE 2 "mem_noofs_operand")	;; memory
+   (match_operand:I12MODE 3 "register_operand")		;; expected
+   (match_operand:I12MODE 4 "add_operand")		;; desired
+   (match_operand:SI 5 "const_int_operand")		;; is_weak
+   (match_operand:SI 6 "const_int_operand")		;; succ model
+   (match_operand:SI 7 "const_int_operand")		;; fail model
+   (match_operand:DI 8 "register_operand")]
+  ""
+{
+  if (flag_sw_use_cas)
+    {
+      if (CONST_INT_P (operands[3]))
+	operands[3] = force_reg (<MODE>mode, operands[3]);
+
+      if (CONST_INT_P (operands[4]))
+	operands[4] = force_reg (<MODE>mode, operands[4]);
+    }
+  sw_64_expand_compare_and_swap_12 (operands);
+  DONE;
+})
+
+(define_insn_and_split "@atomic_compare_and_swap<mode>_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r")		;; bool out
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:DI 1 "register_operand" "=&r")		;; val out
+	(zero_extend:DI
+	  (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG)))
+   (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w")	;; memory
+	(unspec_volatile:I12MODE
+	  [(match_dup 2)
+	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")	;; expected
+	   (match_operand:DI 4 "reg_or_0_operand" "rJ")		;; desired
+	   (match_operand:DI 5 "register_operand" "r")		;; align
+	   (match_operand:SI 6 "const_int_operand")		;; is_weak
+	   (match_operand:SI 7 "const_int_operand")		;; succ model
+	   (match_operand:SI 8 "const_int_operand")		;; fail model
+	   (match_operand:DI 9 "register_operand" "r")]
+	  UNSPECV_CMPXCHG))
+	(clobber (match_scratch:DI 10 "=&r"))
+	(clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_compare_and_swap_12 (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_compare_and_swap<mode>_target_sw8a"
+  [(set (match_operand:DI 0 "register_operand" "=&r")	      ;; bool out
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:I48MODE 1 "register_operand" "=&r")	 ;; val out
+	(unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:I48MODE 2 "memory_operand" "+m")	    ;; memory
+	(unspec_volatile:I48MODE
+	  [(match_dup 2)
+	   (match_operand:DI 3 "reg_or_8bit_operand" "r")       ;; expected
+	   (match_operand:DI 4 "add_operand" "r")	       ;; desired
+	   (match_operand:SI 5 "const_int_operand")	     ;; is_weak
+	   (match_operand:SI 6 "const_int_operand")	     ;; succ model
+	   (match_operand:SI 7 "const_int_operand")]	    ;; fail model
+	  UNSPECV_CMPXCHG))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  if (flag_sw_use_cas)
+    sw_64_split_atomic_cas (operands);
+  else
+    sw_64_split_compare_and_swap (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "@atomic_compare_and_swap<mode>_1_target_sw8a"
+  [(set (match_operand:DI 0 "register_operand" "=&r")	   ;; bool out
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
+   (set (match_operand:DI 1 "register_operand" "=&r")	   ;; val out
+	(zero_extend:DI
+	  (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG)))
+   (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w")      ;; memory
+	(unspec_volatile:I12MODE
+	  [(match_dup 2)
+	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")      ;; expected
+	   (match_operand:DI 4 "register_operand" "r")	  ;; desired
+	   (match_operand:DI 5 "register_operand" "r")	  ;; align
+	   (match_operand:SI 6 "const_int_operand")	     ;; is_weak
+	   (match_operand:SI 7 "const_int_operand")	     ;; succ model
+	   (match_operand:SI 8 "const_int_operand")]	    ;; fail model
+	  UNSPECV_CMPXCHG))
+   (clobber (match_scratch:DI 9 "=&r"))
+   (clobber (match_scratch:DI 10 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+    sw_64_split_compare_and_swap_12 (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn "sw_64_atomic_cas<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "")	 ;; out
+	(match_operand:I48MODE 1 "memory_operand" ""))  ;; memory.
+   (set (match_dup 1)
+	(unspec_volatile:I48MODE
+	  [(match_dup 0)
+	   (match_operand:I48MODE 2 "register_operand" "")]      ;; value.
+	UNSPECV_CMPXCHG))
+   (clobber (reg:DI 28))]
+  "TARGET_SW8A && flag_sw_use_cas"
+  "ldi $28,%1\;cas<modesuffix> %0,$28,%2")
+;; endif
+
+(define_insn_and_split "atomic_exchange<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")	;; output
+	(match_operand:I48MODE 1 "memory_operand" "+m"))	;; memory
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(match_operand:I48MODE 2 "add_operand" "rKL")	;; input
+	   (match_operand:SI 3 "const_int_operand")]		;; model
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:I48MODE 4 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_exchange (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:I12MODE 0 "register_operand")		;; output
+   (match_operand:I12MODE 1 "mem_noofs_operand")	;; memory
+   (match_operand:I12MODE 2 "reg_or_0_operand")		;; input
+   (match_operand:SI 3 "const_int_operand")]		;; model
+  ""
+{
+  sw_64_expand_atomic_exchange_12 (operands);
+  DONE;
+})
+
+(define_insn_and_split "@atomic_exchange<mode>_1"
+  [(set (match_operand:DI 0 "register_operand" "=&r")		;; output
+	(zero_extend:DI
+	  (match_operand:I12MODE 1 "mem_noofs_operand" "+w")))	;; memory
+   (set (match_dup 1)
+	(unspec:I12MODE
+	  [(match_operand:DI 2 "reg_or_8bit_operand" "rI")	;; input
+	   (match_operand:DI 3 "register_operand" "r")		;; align
+	   (match_operand:SI 4 "const_int_operand")]		;; model
+	  UNSPEC_XCHG))
+   (clobber (match_scratch:DI 5 "=&r"))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_exchange_12 (operands);
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 0)
+	     (match_operand:I48MODE 1 "<fetchop_pred>" "<fetchop_constr>"))
+	   (match_operand:SI 2 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_op (<CODE>, operands[0], operands[1],
+			 NULL, NULL, operands[3],
+			 (enum memmodel) INTVAL (operands[2]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_nand<mode>"
+  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
+	(unspec:I48MODE
+	  [(not:I48MODE
+	     (and:I48MODE (match_dup 0)
+	       (match_operand:I48MODE 1 "register_operand" "r")))
+	   (match_operand:SI 2 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 3 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_op (NOT, operands[0], operands[1],
+			 NULL, NULL, operands[3],
+			 (enum memmodel) INTVAL (operands[2]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_fetch_<fetchop_name><mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 1)
+	     (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>"))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_op (<CODE>, operands[1], operands[2],
+			 operands[0], NULL, operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_fetch_nand<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(match_operand:I48MODE 1 "memory_operand" "+m"))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(not:I48MODE
+	     (and:I48MODE (match_dup 1)
+	       (match_operand:I48MODE 2 "register_operand" "r")))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_op (NOT, operands[1], operands[2],
+			 operands[0], NULL, operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_<fetchop_name>_fetch<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(FETCHOP:I48MODE
+	  (match_operand:I48MODE 1 "memory_operand" "+m")
+	  (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>")))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(FETCHOP:I48MODE (match_dup 1) (match_dup 2))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_op (<CODE>, operands[1], operands[2],
+			 NULL, operands[0], operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
+
+(define_insn_and_split "atomic_nand_fetch<mode>"
+  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
+	(not:I48MODE
+	  (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m")
+	    (match_operand:I48MODE 2 "register_operand" "r"))))
+   (set (match_dup 1)
+	(unspec:I48MODE
+	  [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2)))
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_ATOMIC))
+   (clobber (match_scratch:I48MODE 4 "=&r"))
+   (clobber (reg:DI 28))]
+  ""
+  "#"
+  "epilogue_completed"
+  [(const_int 0)]
+{
+  sw_64_split_atomic_op (NOT, operands[1], operands[2],
+			 NULL, operands[0], operands[4],
+			 (enum memmodel) INTVAL (operands[3]));
+  DONE;
+}
+  [(set_attr "type" "multi")])
diff --git a/gcc/config/sw_64/t-linux b/gcc/config/sw_64/t-linux
new file mode 100644
index 0000000000000000000000000000000000000000..d78ef47dfaf17157fd1fd542e56d70d0f4630526
--- /dev/null
+++ b/gcc/config/sw_64/t-linux
@@ -0,0 +1 @@
+MULTIARCH_DIRNAME = $(call if_multiarch,sw_64-linux-gnu)
diff --git a/gcc/config/sw_64/t-sw_64 b/gcc/config/sw_64/t-sw_64
new file mode 100644
index 0000000000000000000000000000000000000000..d7b5e98a066546da1d91d3942bdf1c16986a29d9
--- /dev/null
+++ b/gcc/config/sw_64/t-sw_64
@@ -0,0 +1,19 @@
+# Copyright (C) 2016-2020 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+PASSES_EXTRA += $(srcdir)/config/sw_64/sw_64-passes.def
diff --git a/gcc/config/sw_64/x-sw_64 b/gcc/config/sw_64/x-sw_64
new file mode 100644
index 0000000000000000000000000000000000000000..229866b30b3b70570979f15d1c57cd0099643a9e
--- /dev/null
+++ b/gcc/config/sw_64/x-sw_64
@@ -0,0 +1,3 @@
+driver-sw_64.o: $(srcdir)/config/sw_64/driver-sw_64.c
+	$(COMPILE) $<
+	$(POSTCOMPILE)
diff --git a/gcc/configure b/gcc/configure
index d4f97834fdc7f8dfbfc2d0a8f10e6beca767e926..707f3fdf5b9a7054f79170413d852954cb84db64 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -25109,6 +25109,29 @@ foo:	.long	25
 	xor	%l1, %tle_lox10(foo), %o5
 	ld	[%g7 + %o5], %o1"
 	;;
+  sw_64*-*-*)
+    conftest_s='
+	.section ".tdata","awT",@progbits
+foo:    .long   25
+	.text
+	ldl     $27,__tls_get_addr($29)	 !literal!1
+	ldi     $16,foo($29)		    !tlsgd!1
+	call    $26,($27),__tls_get_addr	!lituse_tlsgd!1
+	ldl     $27,__tls_get_addr($29)	 !literal!2
+	ldi     $16,foo($29)		    !tlsldm!2
+	call    $26,($27),__tls_get_addr	!lituse_tlsldm!2
+	ldl     $1,foo($29)		     !gotdtprel
+	ldih    $2,foo($29)		     !dtprelhi
+	ldi     $3,foo($2)		      !dtprello
+	ldi     $4,foo($29)		     !dtprel
+	ldl     $1,foo($29)		     !gottprel
+	ldih    $2,foo($29)		     !tprelhi
+	ldi     $3,foo($2)		      !tprello
+	ldi     $4,foo($29)		     !tprel'
+	tls_first_major=2
+	tls_first_minor=13
+	tls_as_opt=--fatal-warnings
+	;;
   tilepro*-*-*)
       conftest_s='
 	.section ".tdata","awT",@progbits
@@ -28283,6 +28306,111 @@ fi
 
 
     ;;
+
+  sw_64*-*-linux* | sw_64*-*-*bsd*)
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for explicit relocation support" >&5
+$as_echo_n "checking assembler for explicit relocation support... " >&6; }
+if ${gcc_cv_as_sw_64_explicit_relocs+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_sw_64_explicit_relocs=no
+    if test $in_tree_gas = yes; then
+    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 12 \) \* 1000 + 0`
+  then gcc_cv_as_sw_64_explicit_relocs=yes
+fi
+  elif test x$gcc_cv_as != x; then
+    $as_echo '	.set nomacro
+	.text
+	ext0b	$3, $2, $3	!lituse_bytoff!1
+	ldl	$2, a($29)	!literal!1
+	ldl	$4, b($29)	!literal!2
+	ldl_u	$3, 0($2)	!lituse_base!1
+	ldl	$27, f($29)	!literal!5
+	call	$26, ($27), f	!lituse_jsr!5
+	ldih	$29, 0($26)	!gpdisp!3
+	ldi	$0, c($29)	!gprel
+	ldih	$1, d($29)	!gprelhigh
+	ldi	$1, d($1)	!gprellow
+	ldi	$29, 0($29)	!gpdisp!3' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_sw_64_explicit_relocs=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_explicit_relocs" >&5
+$as_echo "$gcc_cv_as_sw_64_explicit_relocs" >&6; }
+if test $gcc_cv_as_sw_64_explicit_relocs = yes; then
+
+$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
+
+fi
+
+
+    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for jsrdirect relocation support" >&5
+$as_echo_n "checking assembler for jsrdirect relocation support... " >&6; }
+if ${gcc_cv_as_sw_64_jsrdirect_relocs+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  gcc_cv_as_sw_64_jsrdirect_relocs=no
+    if test $in_tree_gas = yes; then
+    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 16 \) \* 1000 + 90`
+  then gcc_cv_as_sw_64_jsrdirect_relocs=yes
+fi
+#trouble#
+  elif test x$gcc_cv_as != x; then
+    $as_echo '	.set nomacro
+	.text
+	ldl	$27, a($29)	!literal!1
+	call	$26, ($27), a	!lituse_jsrdirect!1' > conftest.s
+    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
+  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
+  (eval $ac_try) 2>&5
+  ac_status=$?
+  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
+  test $ac_status = 0; }; }
+    then
+	gcc_cv_as_sw_64_jsrdirect_relocs=yes
+    else
+      echo "configure: failed program was" >&5
+      cat conftest.s >&5
+    fi
+    rm -f conftest.o conftest.s
+  fi
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_jsrdirect_relocs" >&5
+$as_echo "$gcc_cv_as_sw_64_jsrdirect_relocs" >&6; }
+if test $gcc_cv_as_sw_64_jsrdirect_relocs = yes; then
+
+$as_echo "#define HAVE_AS_JSRDIRECT_RELOCS 1" >>confdefs.h
+
+fi
+cat >> confdefs.h <<_ACEOF
+#define FLAG_SW64_ATOMIC 1
+#define FLAG_SW64_90139 1
+#define FLAG_SW64_PREFETCH 1
+#define FLAG_SW64_PROTECT 1
+#define FLAG_SW64_SIMD 1
+#define FLAG_SW64_AUTOSIMD 1
+#define FLAG_SW64_M32 1
+#define FLAG_SW64_INC_DEC 1
+#define FLAG_SW64_DELNOP 1
+#define FLAG_SW64_FM 1
+#define FLAG_SW64_WMEMB 1
+_ACEOF
+
+    ;;
+
 esac
 
 # Mips and HP-UX need the GNU assembler.
@@ -28311,7 +28439,7 @@ esac
 case "$cpu_type" in
   aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
   | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
-  | tilegx | tilepro | visium | xstormy16 | xtensa)
+  | sw_64 | tilegx | tilepro | visium | xstormy16 | xtensa)
     insn="nop"
     ;;
   ia64 | s390)
@@ -29629,6 +29757,17 @@ $as_echo "$as_me: WARNING: --build-id is not supported by your linker; --enable-
   fi
 fi
 
+# sw_64  add --enable-linker-no-relax to support linker -Wl,-no-relax
+# Check whether --enable-linker-no-relax was given.
+if test "${enable_linker_no_relax+set}" = set; then :
+  enableval=$enable_linker_no_relax;
+else
+  enable_linker_no_relax=no
+fi
+
+if test x"$enable_linker_no_relax" = xyes; then
+  $as_echo "#define ENABLE_LD_NORELAX 1" >>confdefs.h
+fi
 # In binutils 2.21, GNU ld gained support for new emulations fully
 # supporting the Solaris 2 ABI.  Detect their presence in the linker used.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker *_sol2 emulation support" >&5
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 44154f69f0a2279fb17846fb23c729653622bdcf..91b59d3f7fb78f19cdf1025336240f7e006da197 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -3870,6 +3870,29 @@ foo:	.long	25
 	xor	%l1, %tle_lox10(foo), %o5
 	ld	[%g7 + %o5], %o1"
 	;;
+  sw_64*-*-*)
+    conftest_s='
+	.section ".tdata","awT",@progbits
+foo:    .long   25
+	.text
+	ldl     $27,__tls_get_addr($29)	 !literal!1
+	ldi     $16,foo($29)		    !tlsgd!1
+	call    $26,($27),__tls_get_addr	!lituse_tlsgd!1
+	ldl     $27,__tls_get_addr($29)	 !literal!2
+	ldi     $16,foo($29)		    !tlsldm!2
+	call    $26,($27),__tls_get_addr	!lituse_tlsldm!2
+	ldl     $1,foo($29)		     !gotdtprel
+	ldih    $2,foo($29)		     !dtprelhi
+	ldi     $3,foo($2)		      !dtprello
+	ldi     $4,foo($29)		     !dtprel
+	ldl     $1,foo($29)		     !gottprel
+	ldih    $2,foo($29)		     !tprelhi
+	ldi     $3,foo($2)		      !tprello
+	ldi     $4,foo($29)		     !tprel'
+	tls_first_major=2
+	tls_first_minor=13
+	tls_as_opt=--fatal-warnings
+	;;
   tilepro*-*-*)
       conftest_s='
 	.section ".tdata","awT",@progbits
@@ -4345,6 +4368,34 @@ bar:
       [AC_DEFINE(HAVE_AS_SPARC_GOTDATA_OP, 1,
 		[Define if your assembler and linker support GOTDATA_OP relocs.])])
 
+  sw_64*-*-linux* | sw_64*-*-*bsd*)
+    gcc_GAS_CHECK_FEATURE([explicit relocation support],
+	gcc_cv_as_sw_64_explicit_relocs, [2,12,0],,
+[       .set nomacro
+	.text
+	ext0b   $3, $2, $3      !lituse_bytoff!1
+	ldl     $2, a($29)      !literal!1
+	ldl     $4, b($29)      !literal!2
+	ldl_u   $3, 0($2)       !lituse_base!1
+	ldl     $27, f($29)     !literal!5
+	call    $26, ($27), f   !lituse_jsr!5
+	ldih    $29, 0($26)     !gpdisp!3
+	ldi     $0, c($29)      !gprel
+	ldih    $1, d($29)      !gprelhigh
+	ldi     $1, d($1)       !gprellow
+	ldi     $29, 0($29)     !gpdisp!3],,
+    [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
+  [Define if your assembler supports explicit relocations.])])
+    gcc_GAS_CHECK_FEATURE([jsrdirect relocation support],
+	gcc_cv_as_sw_64_jsrdirect_relocs, [2,16,90],,
+[       .set nomacro
+	.text
+	ldl     $27, a($29)     !literal!1
+	call    $26, ($27), a   !lituse_jsrdirect!1],,
+    [AC_DEFINE(HAVE_AS_JSRDIRECT_RELOCS, 1,
+  [Define if your assembler supports the lituse_jsrdirect relocation.])])
+    ;;
+
     gcc_GAS_CHECK_FEATURE([unaligned pcrel relocs],
       gcc_cv_as_sparc_ua_pcrel,,
       [-K PIC],
@@ -5145,7 +5196,7 @@ esac
 # ??? Once 2.11 is released, probably need to add first known working
 # version to the per-target configury.
 case "$cpu_type" in
-  aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
+  aarch64 | alpha | sw_64 | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
   | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
   | tilegx | tilepro | visium | xstormy16 | xtensa)
     insn="nop"
@@ -6052,6 +6103,31 @@ if test x"$enable_linker_build_id" = xyes; then
   fi
 fi
 
+# --no-relax
+AC_ARG_ENABLE(linker-no-relax,
+[AS_HELP_STRING([--enable-linker-no-relax],
+		[compiler will always pass --no-relax to linker])],
+[],
+enable_linker_no_relax=no)
+
+if test x"$enable_linker_build_id" = xyes; then
+  if test x"$gcc_cv_ld_buildid" = xyes; then
+    AC_DEFINE(ENABLE_LD_BUILDID, 1,
+    [Define if gcc should always pass --build-id to linker.])
+  else
+    AC_MSG_WARN(--build-id is not supported by your linker; --enable-linker-build-id ignored)
+  fi
+fi
+
+# --no-relax
+if test x"$enable_linker_no_relax" = xyes; then
+    AC_DEFINE(ENABLE_LD_NORELAX, 1,
+    [Define if gcc should always pass --no-relax to linker.])
+  else
+    AC_MSG_WARN(--no-relax is not supported by your linker; --enable-linker-no-relax ignored)
+  fi
+fi
+
 # In binutils 2.21, GNU ld gained support for new emulations fully
 # supporting the Solaris 2 ABI.  Detect their presence in the linker used.
 AC_CACHE_CHECK(linker *_sol2 emulation support,
@@ -6224,7 +6300,8 @@ case "$target" in
   powerpc*-*-linux* | \
   sparc*-*-linux* | \
   s390*-*-linux* | \
-  alpha*-*-linux*)
+  alpha*-*-linux* | \
+  sw_64*-*-linux*)
     AC_ARG_WITH(long-double-128,
       [AS_HELP_STRING([--with-long-double-128],
 		      [use 128-bit long double by default])],
diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 7d98ec4190aec574fcd77a924cc8f29af11de065..e61ba05abb4368ed16a9457cc419c266a70861ee 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -3518,6 +3518,8 @@ information have to.
 @item
 @uref{#sparcv9-x-solaris2,,sparcv9-*-solaris2*}
 @item
+@uref{#sw_64-x-x,,sw_64*-*-*}
+@item
 @uref{#c6x-x-x,,c6x-*-*}
 @item
 @uref{#tilegx-x-linux,,tilegx-*-linux*}
@@ -4643,6 +4645,12 @@ zSeries system (64-bit) running GNU/Linux for zSeries@.
 zSeries system (64-bit) running TPF@.  This platform is
 supported as cross-compilation target only.
 
+@html
+<hr />
+@end html
+@anchor{sw_64-x-x}
+@heading sw_64*-*-*
+
 @html
 <hr />
 @end html
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index 972512e8115331c4d7fefe137355bd0ad6b042ef..04c1c0ab6b11c4bdc0e8dae2cf9d45fc4bdc10d1 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -2399,6 +2399,43 @@ adjust_address_1 (rtx memref, machine_mode mode, poly_int64 offset,
 	addr = gen_rtx_ZERO_EXTEND (address_mode,
 				    plus_constant (pointer_mode,
 						   XEXP (addr, 0), offset));
+#endif
+#ifdef FLAG_SW64_INC_DEC
+      else if (GET_CODE (addr) == POST_INC)
+	;
+      else if (GET_CODE (addr) == POST_DEC)
+	{
+	  rtx term;
+	  rtx reg = XEXP (addr, 0);
+	  if (known_eq (offset, 0))
+	    term = GEN_INT (8);
+	  else
+	    term = GEN_INT (-24);
+	  addr = gen_rtx_POST_MODIFY (mode, reg,
+				      gen_rtx_PLUS (mode, reg, term));
+	}
+      else if (GET_CODE (addr) == POST_MODIFY)
+	{
+	  if (GET_CODE (XEXP (addr,1)) == PLUS)
+	    {
+	      if (CONSTANT_P (XEXP (XEXP (addr, 1), 1)))
+		{
+		  rtx term;
+		  rtx reg = XEXP (XEXP (addr, 1), 0);
+		  if (known_eq (offset, 0))
+		    term = GEN_INT (8);
+		  else
+		    term = plus_constant (mode,
+					  XEXP (XEXP (addr, 1), 1), -8);
+		  if (term == const0_rtx)
+		    XEXP (addr, 1) = XEXP (XEXP (addr, 1), 0);
+		  else
+		    addr = gen_rtx_POST_MODIFY (mode, reg,
+						gen_rtx_PLUS (mode,
+							      reg, term));
+		}
+	    }
+	}
 #endif
       else
 	addr = plus_constant (address_mode, addr, offset);
diff --git a/gcc/explow.c b/gcc/explow.c
index b838f03587083c8fca23d47dd710ed84f7a98115..ff74b7f4865f070b63a16eb9b77e7628adb84ccf 100644
--- a/gcc/explow.c
+++ b/gcc/explow.c
@@ -1250,7 +1250,11 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align,
      in SIZE for the hole that might result from the alignment operation.  */
 
   unsigned known_align = REGNO_POINTER_ALIGN (VIRTUAL_STACK_DYNAMIC_REGNUM);
+#ifndef FLAG_SW64_90139
+  // it change from 710 extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
+  // see the test pr20210303
   if (known_align == 0)
+#endif
     known_align = BITS_PER_UNIT;
   if (required_align > known_align)
     {
diff --git a/gcc/expr.c b/gcc/expr.c
index c468b5eb9f8a2bfa637a27f5e72182571c28ce9f..a8e8debf53ae97577f3ac585588e64aa4f0b6a1d 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -3811,6 +3811,9 @@ emit_move_insn (rtx x, rtx y)
   rtx_insn *last_insn;
   rtx set;
 
+#ifdef FLAG_SW64_M32
+  if (!TARGET_SW_M32)
+#endif
   gcc_assert (mode != BLKmode
 	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
 
diff --git a/gcc/final.c b/gcc/final.c
index 807384514dbe89f8dbfd31b47a399587b68283fe..5ca16c80175f91c925b5b662ef4ef7db1a6898e3 100644
--- a/gcc/final.c
+++ b/gcc/final.c
@@ -1847,7 +1847,12 @@ profile_function (FILE *file ATTRIBUTE_UNUSED)
     {
       int align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
       switch_to_section (data_section);
+#ifdef FLAG_SW64_DELNOP
+      if (flag_sw_delnop == 0)
+        ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#else
       ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
+#endif
       targetm.asm_out.internal_label (file, "LP", current_function_funcdef_no);
       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
     }
@@ -2466,9 +2471,14 @@ final_scan_insn_1 (rtx_insn *insn, FILE *file, int optimize_p ATTRIBUTE_UNUSED,
 #else
 #ifdef ASM_OUTPUT_ALIGN_WITH_NOP
               ASM_OUTPUT_ALIGN_WITH_NOP (file, alignment.levels[0].log);
+#else
+#ifdef FLAG_SW64_DELNOP
+	      if (flag_sw_delnop == 0)
+	        ASM_OUTPUT_ALIGN (file, alignment.levels[0].log);
 #else
 	      ASM_OUTPUT_ALIGN (file, alignment.levels[0].log);
 #endif
+#endif
 #endif
 	    }
 	}
@@ -2502,7 +2512,12 @@ final_scan_insn_1 (rtx_insn *insn, FILE *file, int optimize_p ATTRIBUTE_UNUSED,
 #else
 	      log_align = exact_log2 (BIGGEST_ALIGNMENT / BITS_PER_UNIT);
 #endif
+#ifdef FLAG_SW64_DELNOP
+	      if (flag_sw_delnop == 0)
+		ASM_OUTPUT_ALIGN (file, log_align);
+#else
 	      ASM_OUTPUT_ALIGN (file, log_align);
+#endif
 	    }
 	  else
 	    switch_to_section (current_function_section ());
diff --git a/gcc/flags.h b/gcc/flags.h
index 921f4390581fdc837e400a1d069eb0e77c0529ff..31d5b08806b87c86734948c5677ace53e9ca58bb 100644
--- a/gcc/flags.h
+++ b/gcc/flags.h
@@ -39,6 +39,10 @@ extern bool fast_math_flags_struct_set_p (struct cl_optimization *);
 
 extern bool final_insns_dump_p;
 
+#ifdef SW64_TARGET_SUPPORT_FPCR
+extern int flag_fpcr_set;
+extern int stfp3_flag;
+#endif
 
 /* Other basic status info about current function.  */
 
diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
index 70219a537b983fea3b6bcbcb26c8affe3f21dbaa..fe1e13d16d2643b9263feb8897ff64b08d5a114c 100644
--- a/gcc/fortran/interface.c
+++ b/gcc/fortran/interface.c
@@ -3261,10 +3261,18 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
 			     "at %L", f->sym->name, actual_size,
 			     formal_size, &a->expr->where);
 	      else
+#ifdef FLAG_SW64_90139  //close this for it will cause speccpu 416 build err
+		gfc_warning (OPT_Wargument_mismatch,
+			     "Actual argument contains too few "
+			     "elements for dummy argument %qs (%lu/%lu) "
+			     "at %L.Please add -std=legacy options", f->sym->name, actual_size,
+			     formal_size, &a->expr->where);
+#else
 	        gfc_error_now ("Actual argument contains too few "
 			       "elements for dummy argument %qs (%lu/%lu) "
 			       "at %L", f->sym->name, actual_size,
 			       formal_size, &a->expr->where);
+#endif
 	    }
 	  return false;
 	}
diff --git a/gcc/gcc.c b/gcc/gcc.c
index efa0b53ce97736783c05d8aa8bc239156aca5049..8f42186dd5a8a801a21c834b312f7f5ff3aa20ea 100644
--- a/gcc/gcc.c
+++ b/gcc/gcc.c
@@ -1844,6 +1844,12 @@ init_spec (void)
   }
 #endif
 
+/* --no-relax for sw_64 */
+#ifdef ENABLE_LD_NORELAX
+#define LINK_NORELAX_SPEC  "%{!r:--no-relax} "
+  obstack_grow (&obstack, LINK_NORELAX_SPEC, sizeof (LINK_NORELAX_SPEC) - 1);
+#endif
+
 #if defined LINK_EH_SPEC || defined LINK_BUILDID_SPEC || \
     defined LINKER_HASH_STYLE
 # ifdef LINK_BUILDID_SPEC
diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
index 061aef39c2d5e984bb699c3ef3791f0edffb1cda..3c2540edf856d0fbad6ccd223cdf6ba285eaaab7 100644
--- a/gcc/gimple-match-head.c
+++ b/gcc/gimple-match-head.c
@@ -1233,6 +1233,11 @@ optimize_pow_to_exp (tree arg0, tree arg1)
 	case PLUS_EXPR:
 	case MINUS_EXPR:
 	  break;
+#ifdef FLAG_SW64_FM
+	case PAREN_EXPR:
+	  if (flag_sw_fast_math == 1)
+#endif
+		  return false;
 	default:
 	  return true;
 	}
diff --git a/gcc/optabs.c b/gcc/optabs.c
index 64a1a17686b7dfe11a40291703ac781458c53749..943c61ae289177ed63b4aa0951405b5201797595 100644
--- a/gcc/optabs.c
+++ b/gcc/optabs.c
@@ -6309,7 +6309,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
 				enum memmodel fail_model)
 {
   machine_mode mode = GET_MODE (mem);
+#ifdef FLAG_SW64_ATOMIC
+  class expand_operand ops[9];
+  rtx imust=gen_reg_rtx(DImode);
+#else
   class expand_operand ops[8];
+#endif
   enum insn_code icode;
   rtx target_oval, target_bool = NULL_RTX;
   rtx libfunc;
@@ -6358,7 +6363,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
       create_integer_operand (&ops[5], is_weak);
       create_integer_operand (&ops[6], succ_model);
       create_integer_operand (&ops[7], fail_model);
+#ifdef FLAG_SW64_ATOMIC
+      create_fixed_operand (&ops[8], imust);
+      if (maybe_expand_insn (icode, 9, ops))
+#else
       if (maybe_expand_insn (icode, 8, ops))
+#endif
 	{
 	  /* Return success/failure.  */
 	  target_bool = ops[0].value;
diff --git a/gcc/sync-builtins.def b/gcc/sync-builtins.def
index 156a13ce0f8bfb238da4ca208ef90510ffac4c8a..ee9d8207487fa8b9d593a9e9b19489a92fbbcd9e 100644
--- a/gcc/sync-builtins.def
+++ b/gcc/sync-builtins.def
@@ -256,6 +256,8 @@ DEF_SYNC_BUILTIN (BUILT_IN_SYNC_LOCK_RELEASE_16, "__sync_lock_release_16",
 
 DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE, "__sync_synchronize",
 		  BT_FN_VOID, ATTR_NOTHROWCALL_LEAF_LIST)
+DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE_WRITE, "__sync_synchronize_write",
+		  BT_FN_VOID, ATTR_NOTHROWCALL_LEAF_LIST)
 
 /* __sync* builtins for the C++ memory model.  */
 
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
index e80361f0a48aaae70a702db8e6abaa5a852055b7..099121b9d4ba14b3a149492e3276921c2627b34b 100644
--- a/gcc/target-insns.def
+++ b/gcc/target-insns.def
@@ -60,6 +60,7 @@ DEF_TARGET_INSN (jump, (rtx x0))
 DEF_TARGET_INSN (load_multiple, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (mem_thread_fence, (rtx x0))
 DEF_TARGET_INSN (memory_barrier, (void))
+DEF_TARGET_INSN (write_memory_barrier, (void))
 DEF_TARGET_INSN (memory_blockage, (void))
 DEF_TARGET_INSN (movstr, (rtx x0, rtx x1, rtx x2))
 DEF_TARGET_INSN (nonlocal_goto, (rtx x0, rtx x1, rtx x2, rtx x3))
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
index 2e0ef685f36fa0482b800a0078200d015fe35d1c..60b8f15a97840427b80a9a2208321966b023c50f 100644
--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
@@ -1,6 +1,6 @@
 // PR c++/49673: check that test_data goes into .rodata
 // { dg-do compile { target c++11 } }
-// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } }
+// { dg-additional-options -G0 { target { { alpha*-*-* sw_64*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } }
 // { dg-final { scan-assembler "\\.rdata" { target mips*-*-* } } }
 // { dg-final { scan-assembler "rodata" { target { { *-*-linux-gnu *-*-gnu* *-*-elf } && { ! { mips*-*-* riscv*-*-* } } } } } }
 
diff --git a/gcc/testsuite/g++.dg/opt/devirt2.C b/gcc/testsuite/g++.dg/opt/devirt2.C
index cf4842bd4df346d241ca8d9d0e7bf39403f5e23d..3417372868987939e3da8b087605cec7b9314648 100644
--- a/gcc/testsuite/g++.dg/opt/devirt2.C
+++ b/gcc/testsuite/g++.dg/opt/devirt2.C
@@ -5,7 +5,7 @@
 // { dg-additional-options "-mshort-calls" {target epiphany-*-*} }
 // Using -mno-abicalls avoids a R_MIPS_JALR .reloc.
 // { dg-additional-options "-mno-abicalls" { target mips*-*-* } }
-// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } }
+// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* sw_64*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } }
 // For *-*-mingw* there is additionally one .def match
 // { dg-final { scan-assembler-times "xyzzy" 3 { target *-*-mingw* } } }
 // The IA64 and HPPA compilers generate external declarations in addition
diff --git a/gcc/testsuite/g++.dg/pr49718.C b/gcc/testsuite/g++.dg/pr49718.C
index b1cc5deb7ac60c2c5e4d19583bf65fb2de68c7f3..13c661642de43c23afe21f3e9f24aac89ae7dbf7 100644
--- a/gcc/testsuite/g++.dg/pr49718.C
+++ b/gcc/testsuite/g++.dg/pr49718.C
@@ -1,6 +1,6 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -finstrument-functions" } */
-/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* } } */
+/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* sw_64*-*-* } } */
 /* { dg-additional-options "-mno-relax-pic-calls" { target mips*-*-* } } */
 /* { dg-final { scan-assembler-times "__cyg_profile_func_enter" 1 { target { ! { hppa*-*-hpux* } } } } } */
 /* { dg-final { scan-assembler-times "__cyg_profile_func_enter,%r" 1 { target hppa*-*-hpux* } } } */
diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
index 649e168e0b1d8e8097aa4752a659d2ec77c23f59..255054b493fc5e4687e3cfee456560d2997779d4 100644
--- a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
+++ b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
@@ -28,6 +28,9 @@
 #elif defined (__aarch64__)
   /* On AArch64 integer division by zero does not trap.  */
 # define DO_TEST 0
+#elif defined (__sw_64__)
+  /* On Sw_64 integer division by zero does not trap.  */
+# define DO_TEST 0
 #elif defined (__TMS320C6X__)
   /* On TI C6X division by zero does not trap.  */
 # define DO_TEST 0
diff --git a/gcc/testsuite/gcc.dg/20020312-2.c b/gcc/testsuite/gcc.dg/20020312-2.c
index 52c33d09b90a94e52c498fa78a96cbd37952366e..51e2c939af716b486c06b3084c3aab63828c5fd2 100644
--- a/gcc/testsuite/gcc.dg/20020312-2.c
+++ b/gcc/testsuite/gcc.dg/20020312-2.c
@@ -15,6 +15,8 @@ extern void abort (void);
 
 #if defined(__alpha__)
 /* PIC register is $29, but is used even without -fpic.  */
+#elif defined(__sw_64__)
+/* PIC register is $29, but is used even without -fpic.  */
 #elif defined(__arc__)
 # define PIC_REG  "26"
 #elif defined(__arm__)
diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
index 692c64ad2073781060df4748fcb996f7f2fbb935..2f545764565bf55f52894529d9435cf0d139ade9 100644
--- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
@@ -24,7 +24,7 @@
 			 | FE_OVERFLOW		\
 			 | FE_UNDERFLOW)
 
-#if defined __alpha__ || defined __aarch64__
+#if defined __alpha__ || defined __aarch64__ || defined __sw_64__
   #define ITER_COUNT 100
 #else
   #define ITER_COUNT 10000
diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
index a3d95c4e587d9a8786afb7aec5ee691ab82277ca..3a89d29a0c153504851dee0ed91928ae10d5fa33 100644
--- a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
+++ b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
@@ -47,8 +47,8 @@ typedef __SIZE_TYPE__    size_t;
 
 /* The following tests fail because of missing range information.  The xfail
    exclusions are PR79356.  */
-TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX);   /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* } } } } */
-TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* } } } } */
+TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX);   /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* sw_64*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* } } } } */
+TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* sw_64*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* } } } } */
 TEST (int, INT_MIN + 2, ALLOC_MAX);    /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
 TEST (int, -3, ALLOC_MAX);             /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
 TEST (int, -2, ALLOC_MAX);             /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
diff --git a/gcc/testsuite/gcc.dg/cpp/assert4.c b/gcc/testsuite/gcc.dg/cpp/assert4.c
index 92e3dba5ce6ca58105aee20e7f220d32cdb90bcc..1b40ddeb6c367529749146a81ce24ccbf969998f 100644
--- a/gcc/testsuite/gcc.dg/cpp/assert4.c
+++ b/gcc/testsuite/gcc.dg/cpp/assert4.c
@@ -151,8 +151,8 @@
 	|| (!defined __alpha_ev4__ && #cpu(ev4))
 #  error
 # endif
-#elif #cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \
-	|| #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)
+#elif (#cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \
+	|| #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)) && !#cpu(sw_64)
 # error
 #endif
 
diff --git a/gcc/testsuite/gcc.dg/pr44194-1.c b/gcc/testsuite/gcc.dg/pr44194-1.c
index 20b74a5aa122e0ad57ed714812d9e91c71484260..7efd3b6abd2069a8f5e4f3c20782165ef14e0f02 100644
--- a/gcc/testsuite/gcc.dg/pr44194-1.c
+++ b/gcc/testsuite/gcc.dg/pr44194-1.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */
+/* { dg-do compile { target { { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } }  && { ! sw_64*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */
 /* { dg-options "-O2 -fdump-rtl-dse1 -fdump-rtl-final" } */
 
 /* Restrict to 64-bit targets since 32-bit targets usually return small
diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c
index be1254a7348d8b9fedec182c186653746e70c4fa..70d0948db44ba932d9a6d2d016c9d489a11ee206 100644
--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
+++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
@@ -31,6 +31,8 @@
 #  define SIZE 192
 #elif defined (__alpha__)
 #  define SIZE 240
+#elif defined (__sw_64__)
+#  define SIZE 240
 #elif defined (__ia64__)
 #  define SIZE 272
 #elif defined(__mips__)
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
index 3e07a359b5560e7bb93eb0320fab84dccd642756..ce3a9d080d74e01a937b4b7d87fffa40f0d2589e 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
@@ -33,4 +33,4 @@ void test55 (int x, int y)
    that the && should be emitted (based on BRANCH_COST).  Fix this
    by teaching dom to look through && and register all components
    as true.  */
-/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-*" } } } } */
+/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* sw_64*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-*" } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
index e4daa9d4ff38304ef9cf8db02a246bbb9bfa1486..d5342cf3a6e2934163d690eee92b967e82ecf54a 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
@@ -27,4 +27,4 @@ foo ()
    but the loop reads only one element at a time, and DOM cannot resolve these.
    The same happens on powerpc depending on the SIMD support available.  */
 
-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
+/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* sw_64*-*-* hppa*64*-*-* nvptx*-*-* } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
index 0224997f18af44a416b4f7c85f1e6545d831ca92..81884e7b1ca1efb92ae69f50622bccc3ff0df0dd 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
@@ -23,7 +23,7 @@ f1 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -44,7 +44,7 @@ f2 (int i, ...)
    architecture or bytes on 64-bit architecture.  */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -63,7 +63,7 @@ f3 (int i, ...)
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
@@ -79,7 +79,7 @@ f4 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -98,7 +98,7 @@ f5 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -119,7 +119,7 @@ f6 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -137,7 +137,7 @@ f7 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -157,7 +157,7 @@ f8 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -175,7 +175,7 @@ f9 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -195,7 +195,7 @@ f10 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -216,7 +216,7 @@ f11 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -237,7 +237,7 @@ f12 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -258,7 +258,7 @@ f13 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -279,7 +279,7 @@ f14 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -308,7 +308,7 @@ f15 (int i, ...)
 /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 
 /* We may be able to improve upon this after fixing PR66010/PR66013.  */
-/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
 
 /* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
 /* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
index d044654e0416c2f7abe67c1d9f4a9b3221d57e4a..d92290bb02da32d9d36fa3ffdf40deef3222c5a5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
@@ -22,7 +22,7 @@ f1 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -38,7 +38,7 @@ f2 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -57,7 +57,7 @@ f3 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -74,7 +74,7 @@ f4 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -91,7 +91,7 @@ f5 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -110,7 +110,7 @@ f6 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -127,7 +127,7 @@ f7 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -144,7 +144,7 @@ f8 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -161,7 +161,7 @@ f10 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -178,7 +178,7 @@ f11 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -195,7 +195,7 @@ f12 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
index 1a637d6efe4cb46852f0c75b509853d8b41f8238..8b2f38929a785d24d654bcd90385601616b91ed5 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
@@ -25,7 +25,7 @@ f1 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -43,7 +43,7 @@ f2 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -67,7 +67,7 @@ f3 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
@@ -89,7 +89,7 @@ f4 (int i, ...)
 }
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
index c8ad4fe320db9f8139aba22b9fb92f1d2a5d4b87..c3eba1e21d7742060fc0b70632ddd355051fb07b 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
@@ -23,7 +23,7 @@ f1 (int i, ...)
   va_end (ap);
 }
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
@@ -37,7 +37,7 @@ f2 (int i, ...)
   va_end (ap);
 }
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
@@ -56,7 +56,7 @@ f3 (int i, ...)
     }
 }
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
@@ -75,7 +75,7 @@ f4 (int i, ...)
     }
 }
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
@@ -90,7 +90,7 @@ f5 (int i, ...)
   bar (__real__ ci + __imag__ ci);
 }
 /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
@@ -105,7 +105,7 @@ f6 (int i, ...)
   bar (__real__ ci + __imag__ cd);
 }
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
 
@@ -120,6 +120,6 @@ f7 (int i, ...)
   bar (__real__ cd + __imag__ cd);
 }
 /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
-/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
index be7bc0d12b3c5b293d9a841e98f015f338356e64..c2db580cb9bb4f7eb5e2c10bfd488f781b49d1da 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
@@ -28,7 +28,7 @@ bar (int x, char const *y, ...)
 
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
-/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
+/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
 /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
diff --git a/gcc/testsuite/go.test/go-test.exp b/gcc/testsuite/go.test/go-test.exp
index 51f9b381d677039d69da38ef1e9a3df9b8fb1517..18e866ad32eb11f6140d374566822b9dd9cec869 100644
--- a/gcc/testsuite/go.test/go-test.exp
+++ b/gcc/testsuite/go.test/go-test.exp
@@ -193,6 +193,9 @@ proc go-set-goarch { } {
 	"alpha*-*-*" {
 	    set goarch "alpha"
 	}
+        "sw_64*-*-*" {
+            set goarch "sw_64"
+        }
 	"arm*-*-*" -
 	"ep9312*-*-*" -
 	"strongarm*-*-*" -
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index bd62a0d9e799fc703935d24c565ec1beb823a1d5..b618c2eed8eca4a565779a85fd77a147d311cedc 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -3306,6 +3306,7 @@ proc check_effective_target_vect_cmdline_needed { } {
     return [check_cached_effective_target vect_cmdline_needed {
 	if { [istarget alpha*-*-*]
 	     || [istarget ia64-*-*]
+	     || [istarget sw_64-*-*]
 	     || (([istarget i?86-*-*] || [istarget x86_64-*-*])
 		 && ![is-effective-target ia32])
 	     || ([istarget powerpc*-*-*]
@@ -3334,6 +3335,7 @@ proc check_effective_target_vect_int { } {
 	 || [istarget amdgcn-*-*]
 	 || [istarget sparc*-*-*]
 	 || [istarget alpha*-*-*]
+	 || [istarget sw_64*-*-*]
 	 || [istarget ia64-*-*]
 	 || [istarget aarch64*-*-*]
 	 || [is-effective-target arm_neon]
@@ -6451,6 +6453,7 @@ proc check_effective_target_vect_no_int_min_max { } {
     return [check_cached_effective_target_indexed vect_no_int_min_max {
       expr { [istarget sparc*-*-*]
 	     || [istarget alpha*-*-*]
+	     || [istarget sw_64*-*-*]
 	     || ([istarget mips*-*-*]
 		 && [et-is-effective-target mips_loongson_mmi]) }}]
 }
@@ -6463,7 +6466,7 @@ proc check_effective_target_vect_no_int_min_max { } {
 proc check_effective_target_vect_no_int_add { } {
     # Alpha only supports vector add on V8QI and V4HI.
     return [check_cached_effective_target_indexed vect_no_int_add {
-      expr { [istarget alpha*-*-*] }}]
+      expr { [istarget alpha*-*-*] || [istarget sw_64*-*-*] }}]
 }
 
 # Return 1 if the target plus current options does not support vector
@@ -7545,6 +7548,7 @@ proc check_effective_target_sync_long_long { } {
 	 || [istarget aarch64*-*-*]
 	 || [istarget arm*-*-*]
 	 || [istarget alpha*-*-*]
+	 || [istarget sw_64*-*-*]
 	 || ([istarget sparc*-*-*] && [check_effective_target_lp64])
 	 || [istarget s390*-*-*] } {
 	return 1
@@ -7626,6 +7630,7 @@ proc check_effective_target_sync_long_long_runtime { } {
 		 }
 	     } "" ])
 	 || [istarget alpha*-*-*]
+	 || [istarget sw_64*-*-*]
 	 || ([istarget sparc*-*-*]
 	     && [check_effective_target_lp64]
 	     && [check_effective_target_ultrasparc_hw])
@@ -7642,6 +7647,7 @@ proc check_effective_target_bswap { } {
     return [check_cached_effective_target bswap {
       expr { [istarget aarch64*-*-*]
 	     || [istarget alpha*-*-*]
+	     || [istarget sw_64*-*-*]
 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
 	     || [istarget m68k-*-*]
 	     || [istarget powerpc*-*-*]
@@ -7666,6 +7672,7 @@ proc check_effective_target_sync_int_long { } {
 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
 	     || [istarget aarch64*-*-*]
 	     || [istarget alpha*-*-*] 
+	     || [istarget sw_64*-*-*] 
 	     || [istarget arm*-*-linux-*] 
 	     || [istarget arm*-*-uclinuxfdpiceabi] 
 	     || ([istarget arm*-*-*]
@@ -7690,6 +7697,7 @@ proc check_effective_target_sync_char_short { } {
 	     || [istarget ia64-*-*]
 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
 	     || [istarget alpha*-*-*] 
+	     || [istarget sw_64*-*-*] 
 	     || [istarget arm*-*-linux-*] 
 	     || [istarget arm*-*-uclinuxfdpiceabi] 
 	     || ([istarget arm*-*-*]
@@ -8118,6 +8126,7 @@ proc check_effective_target_fd_truncate { } {
 
 proc add_options_for_ieee { flags } {
     if { [istarget alpha*-*-*]
+	 || [istarget sw_64*-*-*]
          || [istarget sh*-*-*] } {
        return "$flags -mieee"
     }
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 51e6bd400ea2e8ad09498e55e70518cc1aa545c0..5b90804d70143aa082f7778fe350c6a0985225ca 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -2214,6 +2214,18 @@ do_compile ()
 {
   process_options ();
 
+#ifdef FLAG_SW64_M32
+  if (TARGET_SW_M32)
+    {
+      char cwd[200];
+      getcwd (cwd, sizeof (cwd));
+      if (strstr (cwd, "429") == NULL)
+	target_flags = target_flags & (~MASK_SW_M32);
+      else
+	flag_tree_parallelize_loops = 1;
+    }
+#endif
+
   /* Don't do any more if an error has already occurred.  */
   if (!seen_error ())
     {
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index 781831c391822c517e3ff425d1eca17d5912daa2..933619dd2e36ba7ef58f3e4ac100e35c651c0d19 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -1307,7 +1307,11 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
 
   /* At most param_simultaneous_prefetches should be running
      at the same time.  */
+#ifdef FLAG_SW64_PREFETCH
+  remaining_prefetch_slots = param_simultaneous_prefetches * 5;
+#else
   remaining_prefetch_slots = param_simultaneous_prefetches;
+#endif
 
   /* The prefetch will run for AHEAD iterations of the original loop, i.e.,
      AHEAD / UNROLL_FACTOR iterations of the unrolled loop.  In each iteration,
@@ -1331,8 +1335,10 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
         /* The loop is far from being sufficiently unrolled for this
            prefetch.  Do not generate prefetch to avoid many redudant
            prefetches.  */
+#ifndef FLAG_SW64_PREFETCH
         if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO)
           continue;
+#endif
 
 	/* If we need to prefetch the reference each PREFETCH_MOD iterations,
 	   and we unroll the loop UNROLL_FACTOR times, we need to insert
@@ -1403,6 +1409,19 @@ estimate_prefetch_count (struct mem_ref_group *groups, unsigned unroll_factor)
   return prefetch_count;
 }
 
+#ifdef FLAG_SW64_PREFETCH
+/* Due to the need for SW to dynamically adjust the value of PF during
+   prefetching, PF needs to handle negative values.However ,since Common
+   Joined UInteger Var(PFX) is used, the function needs to convert unsig
+   ned (0-200) to (-100,100) */
+int convert_default_to_sw(unsigned int pf_value)
+{
+  if(pf_value > 100)
+    return 100 - (int)pf_value;
+  return pf_value;
+}
+#endif
+
 /* Issue prefetches for the reference REF into loop as decided before.
    HEAD is the number of iterations to prefetch ahead.  UNROLL_FACTOR
    is the factor by which LOOP was unrolled.  */
@@ -1437,8 +1456,14 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
       if (cst_and_fits_in_hwi (ref->group->step))
         {
           /* Determine the address to prefetch.  */
+#ifdef FLAG_SW64_PREFETCH
+	  delta = (ahead + ap * ref->prefetch_mod) *
+		   int_cst_value (ref->group->step) * 2;
+#else
           delta = (ahead + ap * ref->prefetch_mod) *
 		   int_cst_value (ref->group->step);
+#endif
+
           addr = fold_build_pointer_plus_hwi (addr_base, delta);
           addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
 					   NULL, true, GSI_SAME_STMT);
@@ -1628,8 +1653,21 @@ should_unroll_loop_p (class loop *loop, class tree_niter_desc *desc,
      as well; but the unrolling/prefetching is usually more profitable for
      loops consisting of a single basic block, and we want to limit the
      code growth.  */
+#ifdef FLAG_SW64_PREFETCH
+  if (flag_sw_prefetch_unroll == 1)
+    {
+      if (loop->num_nodes > 7)
+	return false;
+    }
+  else
+    {
+      if (loop->num_nodes > 2)
+	return false;
+    }
+#else
   if (loop->num_nodes > 2)
     return false;
+#endif
 
   return true;
 }
@@ -1675,6 +1713,12 @@ determine_unroll_factor (class loop *loop, struct mem_ref_group *refs,
       if (should_issue_prefetch_p (ref))
 	{
 	  mod_constraint = ref->prefetch_mod;
+#ifdef FLAG_SW64_PREFETCH
+	  /* TODO: mod_constraint is set to 4 by experience,
+	     but we should do it with precision.  */
+	  if (mod_constraint > upper_bound)
+	    mod_constraint = 4;
+#endif
 	  nfactor = least_common_multiple (mod_constraint, factor);
 	  if (nfactor <= upper_bound)
 	    factor = nfactor;
diff --git a/include/longlong.h b/include/longlong.h
index 22bd54604a8d84b3a2b20f187fa3feb60faa221f..5c7b5a0a1d2eb47db91b6d8e42816ca62b635a47 100644
--- a/include/longlong.h
+++ b/include/longlong.h
@@ -1458,6 +1458,60 @@ extern UDItype __umulsidi3 (USItype, USItype);
 #define UDIV_TIME 230
 #endif /* sparc64 */
 
+#if defined (__sw_64) && W_TYPE_SIZE == 64
+/* There is a bug in g++ before version 5 that
+   errors on __builtin_sw_64_umulh.  */
+#if !defined(__cplusplus) || __GNUC__ >= 5
+#define umul_ppmm(ph, pl, m0, m1) \
+  do {								  \
+    UDItype __m0 = (m0), __m1 = (m1);				   \
+    (ph) = __builtin_sw_64_umulh (__m0, __m1);			  \
+    (pl) = __m0 * __m1;						 \
+  } while (0)
+#define UMUL_TIME 46
+#endif /* !c++ */
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  do { UDItype __r;						     \
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));			 \
+    (r) = __r;							  \
+  } while (0)
+extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#ifdef __sw_64_cix__
+#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
+#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
+#define COUNT_LEADING_ZEROS_0 64
+#else
+#define count_leading_zeros(COUNT,X) \
+  do {								  \
+    UDItype __xr = (X), __t, __a;				       \
+    __t = __builtin_sw_64_cmpbge (0, __xr);			     \
+    __a = __clz_tab[__t ^ 0xff] - 1;				    \
+    __t = __builtin_sw_64_extbl (__xr, __a);			    \
+    (COUNT) = 64 - (__clz_tab[__t] + __a*8);			    \
+  } while (0)
+#define count_trailing_zeros(COUNT,X) \
+  do {								  \
+    UDItype __xr = (X), __t, __a;				       \
+    __t = __builtin_sw_64_cmpbge (0, __xr);			     \
+    __t = ~__t & -~__t;						 \
+    __a = ((__t & 0xCC) != 0) * 2;				      \
+    __a += ((__t & 0xF0) != 0) * 4;				     \
+    __a += ((__t & 0xAA) != 0);					 \
+    __t = __builtin_sw_64_extbl (__xr, __a);			    \
+    __a <<= 3;							  \
+    __t &= -__t;							\
+    __a += ((__t & 0xCC) != 0) * 2;				     \
+    __a += ((__t & 0xF0) != 0) * 4;				     \
+    __a += ((__t & 0xAA) != 0);					 \
+    (COUNT) = __a;						      \
+  } while (0)
+#endif /* __sw_64_cix__ */
+#endif /* __sw_64 */
+//__sw_64
+
 #if defined (__vax__) && W_TYPE_SIZE == 32
 #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index a8d4a14d273b153b117b507ec76356635ccd876e..281f9340b310c014cff36a16fffba0ae9c975a70 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -73,7 +73,7 @@ extern int errno;
   /* Guess whether integer division by zero raises signal SIGFPE.
      Set to 1 only if you know for sure.  In case of doubt, set to 0.  */
 # if defined __alpha__ || defined __arm__ || defined __i386__ \
-     || defined __m68k__ || defined __s390__
+     || defined __m68k__ || defined __s390__ || defined __sw_64__
 #  define INTDIV0_RAISES_SIGFPE 1
 # else
 #  define INTDIV0_RAISES_SIGFPE 0
diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
index 5dd0926d207f1a542a7a7ee4cc00084c3c74bdfd..423324de98dea865a38cdb163cbbe311c2ff70ce 100644
--- a/libatomic/configure.tgt
+++ b/libatomic/configure.tgt
@@ -81,6 +81,12 @@ case "${target_cpu}" in
 	ARCH=sparc
 	;;
 
+  sw_64*)
+	# fenv.c needs this option to generate inexact exceptions.
+	XCFLAGS="${XCFLAGS} -mfp-trap-mode=sui"
+	ARCH=sw_64
+	;;
+
   i[3456]86)
 	case " ${CC} ${CFLAGS} " in
 	  *" -m64 "*|*" -mx32 "*)
diff --git a/libcpp/lex.c b/libcpp/lex.c
index 665297af776bb81d615122e2794d6300a2299385..df0329f6134dfb3fe7f2b22c3a52f479527b82d7 100644
--- a/libcpp/lex.c
+++ b/libcpp/lex.c
@@ -168,6 +168,8 @@ acc_char_cmp (word_type val, word_type c)
   /* We can get exact results using a compare-bytes instruction.  
      Get (val == c) via (0 >= (val ^ c)).  */
   return __builtin_alpha_cmpbge (0, val ^ c);
+#elif defined(__GNUC__) && defined(__sw_64__)
+  return __builtin_sw_64_cmpbge (0, val ^ c);
 #else
   word_type magic = 0x7efefefeU;
   if (sizeof(word_type) == 8)
@@ -186,7 +188,7 @@ static inline int
 acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
 		word_type val ATTRIBUTE_UNUSED)
 {
-#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
+#if defined(__GNUC__) && (defined(__alpha__) ||  defined(__sw_64__))&& !WORDS_BIGENDIAN
   /* The cmpbge instruction sets *bits* of the result corresponding to
      matches in the bytes with no false positives.  */
   return __builtin_ctzl (cmp);
diff --git a/libffi/Makefile.in b/libffi/Makefile.in
index 745bdd80777b4f6ca08c18ed698b6466200a4b08..779b0e02302d3e9009a32a2cb90addcbcd8c0b72 100644
--- a/libffi/Makefile.in
+++ b/libffi/Makefile.in
@@ -552,6 +552,7 @@ noinst_HEADERS = \
 	src/sh/ffitarget.h						\
 	src/sh64/ffitarget.h						\
 	src/sparc/ffitarget.h src/sparc/internal.h			\
+	src/sw_64/ffitarget.h src/sw_64/internal.h			\
 	src/tile/ffitarget.h						\
 	src/vax/ffitarget.h						\
 	src/x86/ffitarget.h src/x86/internal.h src/x86/internal64.h	\
@@ -588,6 +589,7 @@ EXTRA_libffi_la_SOURCES = \
 	src/sh/ffi.c src/sh/sysv.S					\
 	src/sh64/ffi.c src/sh64/sysv.S					\
 	src/sparc/ffi.c src/sparc/ffi64.c src/sparc/v8.S src/sparc/v9.S	\
+	src/sw_64/ffi.c src/sw_64/sysv.S					\
 	src/tile/ffi.c src/tile/tile.S					\
 	src/vax/ffi.c src/vax/elfbsd.S					\
 	src/x86/ffi.c src/x86/sysv.S					\
@@ -1012,6 +1014,16 @@ src/sparc/v8.lo: src/sparc/$(am__dirstamp) \
 	src/sparc/$(DEPDIR)/$(am__dirstamp)
 src/sparc/v9.lo: src/sparc/$(am__dirstamp) \
 	src/sparc/$(DEPDIR)/$(am__dirstamp)
+src/sw_64/$(am__dirstamp):
+	@$(MKDIR_P) src/sw_64
+	@: > src/sw_64/$(am__dirstamp)
+src/sw_64/$(DEPDIR)/$(am__dirstamp):
+	@$(MKDIR_P) src/sw_64/$(DEPDIR)
+	@: > src/sw_64/$(DEPDIR)/$(am__dirstamp)
+src/sw_64/ffi.lo: src/sw_64/$(am__dirstamp) \
+	src/sw_64/$(DEPDIR)/$(am__dirstamp)
+src/sw_64/sysv.lo: src/sw_64/$(am__dirstamp) \
+	src/sw_64/$(DEPDIR)/$(am__dirstamp)
 src/tile/$(am__dirstamp):
 	@$(MKDIR_P) src/tile
 	@: > src/tile/$(am__dirstamp)
@@ -1129,6 +1141,10 @@ mostlyclean-compile:
 	-rm -f src/sh64/*.lo
 	-rm -f src/sparc/*.$(OBJEXT)
 	-rm -f src/sparc/*.lo
+	-rm -f src/sw_64/ffi.$(OBJEXT)
+	-rm -f src/sw_64/ffi.lo
+	-rm -f src/sw_64/sysv.$(OBJEXT)
+	-rm -f src/sw_64/sysv.lo
 	-rm -f src/tile/*.$(OBJEXT)
 	-rm -f src/tile/*.lo
 	-rm -f src/vax/*.$(OBJEXT)
@@ -1211,6 +1227,8 @@ distclean-compile:
 @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/ffi64.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/v8.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/v9.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/sw_64/$(DEPDIR)/ffi.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@src/sw_64/$(DEPDIR)/sysv.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/tile/$(DEPDIR)/ffi.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/tile/$(DEPDIR)/tile.Plo@am__quote@
 @AMDEP_TRUE@@am__include@ @am__quote@src/vax/$(DEPDIR)/elfbsd.Plo@am__quote@
@@ -1307,6 +1325,7 @@ clean-libtool:
 	-rm -rf src/sh/.libs src/sh/_libs
 	-rm -rf src/sh64/.libs src/sh64/_libs
 	-rm -rf src/sparc/.libs src/sparc/_libs
+	-rm -rf src/sw_64/.libs src/sw_64/_libs
 	-rm -rf src/tile/.libs src/tile/_libs
 	-rm -rf src/vax/.libs src/vax/_libs
 	-rm -rf src/x86/.libs src/x86/_libs
@@ -1669,6 +1688,8 @@ distclean-generic:
 	-rm -f src/sh64/$(am__dirstamp)
 	-rm -f src/sparc/$(DEPDIR)/$(am__dirstamp)
 	-rm -f src/sparc/$(am__dirstamp)
+	-rm -f src/sw_64/$(DEPDIR)/$(am__dirstamp)
+	-rm -f src/sw_64/$(am__dirstamp)
 	-rm -f src/tile/$(DEPDIR)/$(am__dirstamp)
 	-rm -f src/tile/$(am__dirstamp)
 	-rm -f src/vax/$(DEPDIR)/$(am__dirstamp)
@@ -1691,7 +1712,7 @@ clean-am: clean-aminfo clean-generic clean-libtool clean-local \
 
 distclean: distclean-recursive
 	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
-	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
+	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/sw_64/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
 	-rm -f Makefile
 distclean-am: clean-am distclean-compile distclean-generic \
 	distclean-hdr distclean-libtool distclean-local distclean-tags
@@ -1830,7 +1851,7 @@ installcheck-am:
 maintainer-clean: maintainer-clean-recursive
 	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
 	-rm -rf $(top_srcdir)/autom4te.cache
-	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
+	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/sw_64/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
 	-rm -f Makefile
 maintainer-clean-am: distclean-am maintainer-clean-aminfo \
 	maintainer-clean-generic maintainer-clean-local \
diff --git a/libffi/configure.host b/libffi/configure.host
index 786b32c5bb00c3efb76d6daf15a024ea4c7e00d4..c9a3ecad632eced71e5a20d2ceb1e499cd71f1df 100644
--- a/libffi/configure.host
+++ b/libffi/configure.host
@@ -219,6 +219,13 @@ case "${host}" in
 	SOURCES="ffi.c ffi64.c v8.S v9.S"
 	;;
 
+  sw_64*-*-*)
+	TARGET=SW_64; TARGETDIR=sw_64;
+	# Support 128-bit long double, changeable via command-line switch.
+	HAVE_LONG_DOUBLE='defined(__LONG_DOUBLE_128__)'
+	SOURCES="ffi.c sysv.S"
+	;;
+
   tile*-*)
         TARGET=TILE; TARGETDIR=tile
 	SOURCES="ffi.c tile.S"
diff --git a/libffi/src/sw_64/ffi.c b/libffi/src/sw_64/ffi.c
new file mode 100644
index 0000000000000000000000000000000000000000..c882641148a4ba20c8d475a9995abbcb211fd27e
--- /dev/null
+++ b/libffi/src/sw_64/ffi.c
@@ -0,0 +1,516 @@
+/* -----------------------------------------------------------------------
+   ffi.c - Copyright (c) 2012  Anthony Green
+	   Copyright (c) 1998, 2001, 2007, 2008  Red Hat, Inc.
+
+   Sunway Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+
+#include <ffi.h>
+#include <ffi_common.h>
+#include <stdlib.h>
+#include "internal.h"
+
+/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
+   all further uses in this file will refer to the 128-bit type.  */
+#if defined(__LONG_DOUBLE_128__)
+#if FFI_TYPE_LONGDOUBLE != 4
+#error FFI_TYPE_LONGDOUBLE out of date
+#endif
+#else
+#undef FFI_TYPE_LONGDOUBLE
+#define FFI_TYPE_LONGDOUBLE 4
+#endif
+
+extern void
+ffi_call_sysv (void *stack, void *frame, unsigned flags, void *raddr,
+	      void (*fn) (void), void *closure) FFI_HIDDEN;
+extern void
+ffi_closure_sysv (void) FFI_HIDDEN;
+extern void
+ffi_go_closure_sysv (void) FFI_HIDDEN;
+
+/* Promote a float value to its in-register double representation.
+   Unlike actually casting to double, this does not trap on NaN.  */
+static inline UINT64
+lds (void *ptr)
+{
+  UINT64 ret;
+  asm("flds %0,%1" : "=f"(ret) : "m"(*(UINT32 *) ptr));
+  return ret;
+}
+
+/* And the reverse.  */
+static inline void
+sts (void *ptr, UINT64 val)
+{
+  asm("fsts %1,%0" : "=m"(*(UINT32 *) ptr) : "f"(val));
+}
+
+ffi_status FFI_HIDDEN
+ffi_prep_cif_machdep (ffi_cif *cif)
+{
+  size_t bytes = 0;
+  int flags, i, avn;
+  ffi_type *rtype, *itype;
+
+  if (cif->abi != FFI_OSF)
+    return FFI_BAD_ABI;
+
+  /* Compute the size of the argument area.  */
+  for (i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      itype = cif->arg_types[i];
+      switch (itype->type)
+	{
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	case FFI_TYPE_LONGDOUBLE:
+	  /* All take one 8 byte slot.  */
+	  bytes += 8;
+	  break;
+
+	case FFI_TYPE_VOID:
+	case FFI_TYPE_STRUCT:
+	  /* Passed by value in N slots.  */
+	  bytes += ALIGN (itype->size, FFI_SIZEOF_ARG);
+	  break;
+
+	case FFI_TYPE_COMPLEX:
+	  /* _Complex long double passed by reference; others in 2 slots.  */
+	  if (itype->elements[0]->type == FFI_TYPE_LONGDOUBLE)
+	    bytes += 8;
+	  else
+	    bytes += 16;
+	  break;
+
+	default:
+	  abort ();
+	}
+    }
+
+  /* Set the return type flag */
+  rtype = cif->rtype;
+  switch (rtype->type)
+    {
+    case FFI_TYPE_VOID:
+      flags = SW_64_FLAGS (SW_64_ST_VOID, SW_64_LD_VOID);
+      break;
+    case FFI_TYPE_INT:
+    case FFI_TYPE_UINT32:
+    case FFI_TYPE_SINT32:
+      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT32);
+      break;
+    case FFI_TYPE_FLOAT:
+      flags = SW_64_FLAGS (SW_64_ST_FLOAT, SW_64_LD_FLOAT);
+      break;
+    case FFI_TYPE_DOUBLE:
+      flags = SW_64_FLAGS (SW_64_ST_DOUBLE, SW_64_LD_DOUBLE);
+      break;
+    case FFI_TYPE_UINT8:
+      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_UINT8);
+      break;
+    case FFI_TYPE_SINT8:
+      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_SINT8);
+      break;
+    case FFI_TYPE_UINT16:
+      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_UINT16);
+      break;
+    case FFI_TYPE_SINT16:
+      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_SINT16);
+      break;
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_POINTER:
+      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT64);
+      break;
+    case FFI_TYPE_LONGDOUBLE:
+    case FFI_TYPE_STRUCT:
+      /* Passed in memory, with a hidden pointer.  */
+      flags = SW_64_RET_IN_MEM;
+      break;
+    case FFI_TYPE_COMPLEX:
+      itype = rtype->elements[0];
+      switch (itype->type)
+	{
+	case FFI_TYPE_FLOAT:
+	  flags = SW_64_FLAGS (SW_64_ST_CPLXF, SW_64_LD_CPLXF);
+	  break;
+	case FFI_TYPE_DOUBLE:
+	  flags = SW_64_FLAGS (SW_64_ST_CPLXD, SW_64_LD_CPLXD);
+	  break;
+	default:
+	  if (rtype->size <= 8)
+	    flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT64);
+	  else
+	    flags = SW_64_RET_IN_MEM;
+	  break;
+	}
+      break;
+    default:
+      abort ();
+    }
+  cif->flags = flags;
+
+  /* Include the hidden structure pointer in args requirement.  */
+  if (flags == SW_64_RET_IN_MEM)
+    bytes += 8;
+  /* Minimum size is 6 slots, so that ffi_call_sysv can pop them.  */
+  if (bytes < 6 * 8)
+    bytes = 6 * 8;
+  cif->bytes = bytes;
+
+  return FFI_OK;
+}
+
+static unsigned long
+extend_basic_type (void *valp, int type, int argn)
+{
+  switch (type)
+    {
+    case FFI_TYPE_SINT8:
+      return *(SINT8 *) valp;
+    case FFI_TYPE_UINT8:
+      return *(UINT8 *) valp;
+    case FFI_TYPE_SINT16:
+      return *(SINT16 *) valp;
+    case FFI_TYPE_UINT16:
+      return *(UINT16 *) valp;
+
+    case FFI_TYPE_FLOAT:
+      if (argn < 6)
+	return lds (valp);
+      /* FALLTHRU */
+
+    case FFI_TYPE_INT:
+    case FFI_TYPE_SINT32:
+    case FFI_TYPE_UINT32:
+      /* Note that unsigned 32-bit quantities are sign extended.  */
+      return *(SINT32 *) valp;
+
+    case FFI_TYPE_SINT64:
+    case FFI_TYPE_UINT64:
+    case FFI_TYPE_POINTER:
+    case FFI_TYPE_DOUBLE:
+      return *(UINT64 *) valp;
+
+    default:
+      abort ();
+    }
+}
+
+static void
+ffi_call_int (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
+	      void *closure)
+{
+  unsigned long *argp;
+  long i, avn, argn, flags = cif->flags;
+  ffi_type **arg_types;
+  void *frame;
+
+  /* If the return value is a struct and we don't have a return
+     value address then we need to make one.  */
+  if (rvalue == NULL && flags == SW_64_RET_IN_MEM)
+    rvalue = alloca (cif->rtype->size);
+
+  /* Allocate the space for the arguments, plus 4 words of temp
+     space for ffi_call_sysv.  */
+  argp = frame = alloca (cif->bytes + 4 * FFI_SIZEOF_ARG);
+  frame += cif->bytes;
+
+  argn = 0;
+  if (flags == SW_64_RET_IN_MEM)
+    argp[argn++] = (unsigned long) rvalue;
+
+  avn = cif->nargs;
+  arg_types = cif->arg_types;
+
+  for (i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      void *valp = avalue[i];
+      int type = ty->type;
+      size_t size;
+
+      switch (type)
+	{
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	case FFI_TYPE_FLOAT:
+	case FFI_TYPE_DOUBLE:
+	  argp[argn] = extend_basic_type (valp, type, argn);
+	  argn++;
+	  break;
+
+	case FFI_TYPE_LONGDOUBLE:
+	by_reference:
+	  /* Note that 128-bit long double is passed by reference.  */
+	  argp[argn++] = (unsigned long) valp;
+	  break;
+
+	case FFI_TYPE_VOID:
+	case FFI_TYPE_STRUCT:
+	  size = ty->size;
+	  memcpy (argp + argn, valp, size);
+	  argn += ALIGN (size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	  break;
+
+	case FFI_TYPE_COMPLEX:
+	  type = ty->elements[0]->type;
+	  if (type == FFI_TYPE_LONGDOUBLE)
+	    goto by_reference;
+
+	  /* Most complex types passed as two separate arguments.  */
+	  size = ty->elements[0]->size;
+	  argp[argn] = extend_basic_type (valp, type, argn);
+	  argp[argn + 1] = extend_basic_type (valp + size, type, argn + 1);
+	  argn += 2;
+	  break;
+
+	default:
+	  abort ();
+	}
+    }
+
+  flags = (flags >> SW_64_ST_SHIFT) & 0xff;
+  ffi_call_sysv (argp, frame, flags, rvalue, fn, closure);
+}
+
+void
+ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, NULL);
+}
+
+void
+ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
+	     void *closure)
+{
+  ffi_call_int (cif, fn, rvalue, avalue, closure);
+}
+
+ffi_status
+ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif,
+		      void (*fun) (ffi_cif *, void *, void **, void *),
+		      void *user_data, void *codeloc)
+{
+  unsigned int *tramp;
+
+  if (cif->abi != FFI_OSF)
+    return FFI_BAD_ABI;
+
+  tramp = (unsigned int *) &closure->tramp[0];
+  tramp[0] = 0x43fb0741; /* mov $27,$1	   */
+  tramp[1] = 0x8f7b0010; /* ldl $27,16($27)      */
+  tramp[2] = 0x0ffb0000; /* jmp $31,($27),0      */
+  tramp[3] = 0x43ff075f; /* nop		  */
+  *(void **) &tramp[4] = ffi_closure_sysv;
+
+  closure->cif = cif;
+  closure->fun = fun;
+  closure->user_data = user_data;
+
+  /* Flush the Icache. 0x86 is PAL_imb in Tru64 UNIX <sw_64/pal.h>.  */
+  asm volatile("sys_call 0x86" : : : "memory");
+
+  return FFI_OK;
+}
+
+ffi_status
+ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
+		     void (*fun) (ffi_cif *, void *, void **, void *))
+{
+  if (cif->abi != FFI_OSF)
+    return FFI_BAD_ABI;
+
+  closure->tramp = (void *) ffi_go_closure_sysv;
+  closure->cif = cif;
+  closure->fun = fun;
+
+  return FFI_OK;
+}
+
+long FFI_HIDDEN
+ffi_closure_sysv_inner (ffi_cif *cif,
+		       void (*fun) (ffi_cif *, void *, void **, void *),
+		       void *user_data, void *rvalue, unsigned long *argp)
+{
+  void **avalue;
+  ffi_type **arg_types;
+  long i, avn, argn, flags;
+
+  avalue = alloca (cif->nargs * sizeof (void *));
+  flags = cif->flags;
+  argn = 0;
+
+  /* Copy the caller's structure return address to that the closure
+     returns the data directly to the caller.  */
+  if (flags == SW_64_RET_IN_MEM)
+    {
+      rvalue = (void *) argp[0];
+      argn = 1;
+    }
+
+  arg_types = cif->arg_types;
+
+  /* Grab the addresses of the arguments from the stack frame.  */
+  for (i = 0, avn = cif->nargs; i < avn; i++)
+    {
+      ffi_type *ty = arg_types[i];
+      int type = ty->type;
+      void *valp = &argp[argn];
+      size_t size;
+
+      switch (type)
+	{
+	case FFI_TYPE_INT:
+	case FFI_TYPE_SINT8:
+	case FFI_TYPE_UINT8:
+	case FFI_TYPE_SINT16:
+	case FFI_TYPE_UINT16:
+	case FFI_TYPE_SINT32:
+	case FFI_TYPE_UINT32:
+	case FFI_TYPE_SINT64:
+	case FFI_TYPE_UINT64:
+	case FFI_TYPE_POINTER:
+	  argn += 1;
+	  break;
+
+	case FFI_TYPE_VOID:
+	case FFI_TYPE_STRUCT:
+	  size = ty->size;
+	  argn += ALIGN (size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
+	  break;
+
+	case FFI_TYPE_FLOAT:
+	  /* Floats coming from registers need conversion from double
+	     back to float format.  */
+	  if (argn < 6)
+	    {
+	      valp = &argp[argn - 6];
+	      sts (valp, argp[argn - 6]);
+	    }
+	  argn += 1;
+	  break;
+
+	case FFI_TYPE_DOUBLE:
+	  if (argn < 6)
+	    valp = &argp[argn - 6];
+	  argn += 1;
+	  break;
+
+	case FFI_TYPE_LONGDOUBLE:
+	by_reference:
+	  /* 128-bit long double is passed by reference.  */
+	  valp = (void *) argp[argn];
+	  argn += 1;
+	  break;
+
+	case FFI_TYPE_COMPLEX:
+	  type = ty->elements[0]->type;
+	  switch (type)
+	    {
+	    case FFI_TYPE_SINT64:
+	    case FFI_TYPE_UINT64:
+	      /* Passed as separate arguments, but they wind up sequential.  */
+	      break;
+
+	    case FFI_TYPE_INT:
+	    case FFI_TYPE_SINT8:
+	    case FFI_TYPE_UINT8:
+	    case FFI_TYPE_SINT16:
+	    case FFI_TYPE_UINT16:
+	    case FFI_TYPE_SINT32:
+	    case FFI_TYPE_UINT32:
+	      /* Passed as separate arguments.  Disjoint, but there's room
+		 enough in one slot to hold the pair.  */
+	      size = ty->elements[0]->size;
+	      memcpy (valp + size, valp + 8, size);
+	      break;
+
+	    case FFI_TYPE_FLOAT:
+	      /* Passed as separate arguments.  Disjoint, and each piece
+		 may need conversion back to float.  */
+	      if (argn < 6)
+		{
+		  valp = &argp[argn - 6];
+		  sts (valp, argp[argn - 6]);
+		}
+	      if (argn + 1 < 6)
+		sts (valp + 4, argp[argn + 1 - 6]);
+	      else
+		*(UINT32 *) (valp + 4) = argp[argn + 1];
+	      break;
+
+	    case FFI_TYPE_DOUBLE:
+	      /* Passed as separate arguments.  Only disjoint if one part
+		 is in fp regs and the other is on the stack.  */
+	      if (argn < 5)
+		valp = &argp[argn - 6];
+	      else if (argn == 5)
+		{
+		  valp = alloca (16);
+		  ((UINT64 *) valp)[0] = argp[5 - 6];
+		  ((UINT64 *) valp)[1] = argp[6];
+		}
+	      break;
+
+	    case FFI_TYPE_LONGDOUBLE:
+	      goto by_reference;
+
+	    default:
+	      abort ();
+	    }
+	  argn += 2;
+	  break;
+
+	default:
+	  abort ();
+	}
+
+      avalue[i] = valp;
+    }
+
+  /* Invoke the closure.  */
+  fun (cif, rvalue, avalue, user_data);
+
+  /* Tell ffi_closure_sysv how to perform return type promotions.  */
+  return (flags >> SW_64_LD_SHIFT) & 0xff;
+}
diff --git a/libffi/src/sw_64/ffitarget.h b/libffi/src/sw_64/ffitarget.h
new file mode 100644
index 0000000000000000000000000000000000000000..f5792e1dd68c284f23ca6441e38ec08379e16c19
--- /dev/null
+++ b/libffi/src/sw_64/ffitarget.h
@@ -0,0 +1,59 @@
+/* -----------------------------------------------------------------*-C-*-
+   ffitarget.h - Copyright (c) 2012  Anthony Green
+		 Copyright (c) 1996-2003  Red Hat, Inc.
+   Target configuration macros for Sunway.
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+
+   ----------------------------------------------------------------------- */
+
+#ifndef LIBFFI_TARGET_H
+#define LIBFFI_TARGET_H
+
+#ifndef LIBFFI_H
+#error									 \
+  "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
+#endif
+
+#ifndef LIBFFI_ASM
+typedef unsigned long ffi_arg;
+typedef signed long ffi_sarg;
+
+typedef enum ffi_abi
+{
+  FFI_FIRST_ABI = 0,
+  FFI_OSF,
+  FFI_LAST_ABI,
+  FFI_DEFAULT_ABI = FFI_OSF
+} ffi_abi;
+#endif
+
+#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
+#define FFI_TARGET_HAS_COMPLEX_TYPE
+
+/* ---- Definitions for closures ----------------------------------------- */
+
+#define FFI_CLOSURES 1
+#define FFI_GO_CLOSURES 1
+#define FFI_TRAMPOLINE_SIZE 24
+#define FFI_NATIVE_RAW_API 0
+
+#endif
diff --git a/libffi/src/sw_64/internal.h b/libffi/src/sw_64/internal.h
new file mode 100644
index 0000000000000000000000000000000000000000..92ad32179ff271fcb83b81242d9b9a61f8161abe
--- /dev/null
+++ b/libffi/src/sw_64/internal.h
@@ -0,0 +1,23 @@
+#define SW_64_ST_VOID 0
+#define SW_64_ST_INT 1
+#define SW_64_ST_FLOAT 2
+#define SW_64_ST_DOUBLE 3
+#define SW_64_ST_CPLXF 4
+#define SW_64_ST_CPLXD 5
+
+#define SW_64_LD_VOID 0
+#define SW_64_LD_INT64 1
+#define SW_64_LD_INT32 2
+#define SW_64_LD_UINT16 3
+#define SW_64_LD_SINT16 4
+#define SW_64_LD_UINT8 5
+#define SW_64_LD_SINT8 6
+#define SW_64_LD_FLOAT 7
+#define SW_64_LD_DOUBLE 8
+#define SW_64_LD_CPLXF 9
+#define SW_64_LD_CPLXD 10
+
+#define SW_64_ST_SHIFT 0
+#define SW_64_LD_SHIFT 8
+#define SW_64_RET_IN_MEM 0x10000
+#define SW_64_FLAGS(S, L) (((L) << SW_64_LD_SHIFT) | (S))
diff --git a/libffi/src/sw_64/sysv.S b/libffi/src/sw_64/sysv.S
new file mode 100644
index 0000000000000000000000000000000000000000..588cb6e76ccba75e6738aa23958aad1f57804540
--- /dev/null
+++ b/libffi/src/sw_64/sysv.S
@@ -0,0 +1,281 @@
+/* -----------------------------------------------------------------------
+   sysv.S - Copyright (c) 1998, 2001, 2007, 2008, 2011, 2014 Red Hat
+
+   Sunway/SYSV Foreign Function Interface
+
+   Permission is hereby granted, free of charge, to any person obtaining
+   a copy of this software and associated documentation files (the
+   ``Software''), to deal in the Software without restriction, including
+   without limitation the rights to use, copy, modify, merge, publish,
+   distribute, sublicense, and/or sell copies of the Software, and to
+   permit persons to whom the Software is furnished to do so, subject to
+   the following conditions:
+
+   The above copyright notice and this permission notice shall be included
+   in all copies or substantial portions of the Software.
+
+   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
+   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+   DEALINGS IN THE SOFTWARE.
+   ----------------------------------------------------------------------- */
+#define LIBFFI_ASM
+#include <fficonfig.h>
+#include <ffi.h>
+#include <ffi_cfi.h>
+#include "internal.h"
+
+	.arch sw6a
+	.text
+
+/* Aid in building a direct addressed jump table, 4 insns per entry.  */
+.macro E index
+	.align	4
+	.org	99b + \index * 16
+.endm
+
+/* ffi_call_sysv (void *stack, void *frame, unsigned flags,
+		 void *raddr, void (*fnaddr)(void), void *closure)
+
+   Bit o trickiness here -- FRAME is the base of the stack frame
+   for this function.  This has been allocated by ffi_call.  We also
+   deallocate some of the stack that has been alloca'd.  */
+
+	.align	4
+	.globl	ffi_call_sysv
+	.ent	ffi_call_sysv
+	FFI_HIDDEN(ffi_call_sysv)
+
+ffi_call_sysv:
+	cfi_startproc
+	cfi_def_cfa($17, 32)
+	mov	$16, $30
+	stl	$26, 0($17)
+	stl	$15, 8($17)
+	mov	$17, $15
+	.prologue 0
+	cfi_def_cfa_register($15)
+	cfi_rel_offset($26, 0)
+	cfi_rel_offset($15, 8)
+
+	stl	$18, 16($17)		# save flags into frame
+	stl	$19, 24($17)		# save rvalue into frame
+	mov	$20, $27		# fn into place for call
+	mov	$21, $1			# closure into static chain
+
+	# Load up all of the (potential) argument registers.
+	ldl	$16, 0($30)
+	fldd	$f16, 0($30)
+	fldd	$f17, 8($30)
+	ldl	$17, 8($30)
+	fldd	$f18, 16($30)
+	ldl	$18, 16($30)
+	fldd	$f19, 24($30)
+	ldl	$19, 24($30)
+	fldd	$f20, 32($30)
+	ldl	$20, 32($30)
+	fldd	$f21, 40($30)
+	ldl	$21, 40($30)
+
+	# Deallocate the register argument area.
+	ldi	$30, 48($30)
+
+	call	$26, ($27), 0
+0:
+	ldih	$29, 0($26)		!gpdisp!1
+	ldl	$2, 24($15)		# reload rvalue
+	ldi	$29, 0($29)		!gpdisp!1
+	ldl	$3, 16($15)		# reload flags
+	ldi	$1, 99f-0b($26)
+	ldl	$26, 0($15)
+	ldl	$15, 8($15)
+	cfi_restore($26)
+	cfi_restore($15)
+	cfi_def_cfa($sp, 0)
+	seleq	$2, 0, $3	# mash null rvalue to void
+	addl	$3, $3, $3
+	s8addl	$3, $1, $1		# 99f + stcode * 16
+	jmp	$31, ($1), $st_int
+
+	.align	4
+99:
+E 0
+	ret
+E 1
+$st_int:
+	stl	$0, 0($2)
+	ret
+E 2
+	fsts	$f0, 0($2)
+	ret
+E 4
+	fstd	$f0, 0($2)
+	ret
+E 6
+	fsts	$f0, 0($2)
+	fsts	$f1, 4($2)
+	ret
+E 10
+	fstd	$f0, 0($2)
+	fstd	$f1, 8($2)
+	ret
+
+	cfi_endproc
+	.end	ffi_call_sysv
+
+/* ffi_closure_sysv(...)
+
+   Receives the closure argument in $1.   */
+
+#define CLOSURE_FS	(16*8)
+
+	.align	4
+	.globl	ffi_go_closure_sysv
+	.ent	ffi_go_closure_sysv
+	FFI_HIDDEN(ffi_go_closure_sysv)
+
+ffi_go_closure_sysv:
+	cfi_startproc
+	ldgp	$29, 0($27)
+	subl	$30, CLOSURE_FS, $30
+	cfi_adjust_cfa_offset(CLOSURE_FS)
+	stl	$26, 0($30)
+	.prologue 1
+	cfi_rel_offset($26, 0)
+
+	stl	$16, 10*8($30)
+	stl	$17, 11*8($30)
+	stl	$18, 12*8($30)
+
+	ldl	$16, 8($1)			# load cif
+	ldl	$17, 16($1)			# load fun
+	mov	$1, $18				# closure is user_data
+	br	$do_closure
+
+	cfi_endproc
+	.end	ffi_go_closure_sysv
+
+	.align	4
+	.globl	ffi_closure_sysv
+	.ent	ffi_closure_sysv
+	FFI_HIDDEN(ffi_closure_sysv)
+
+ffi_closure_sysv:
+	cfi_startproc
+	ldgp	$29, 0($27)
+	subl	$30, CLOSURE_FS, $30
+	cfi_adjust_cfa_offset(CLOSURE_FS)
+	stl	$26, 0($30)
+	.prologue 1
+	cfi_rel_offset($26, 0)
+
+	# Store all of the potential argument registers in va_list format.
+	stl	$16, 10*8($30)
+	stl	$17, 11*8($30)
+	stl	$18, 12*8($30)
+
+	ldl	$16, 24($1)			# load cif
+	ldl	$17, 32($1)			# load fun
+	ldl	$18, 40($1)			# load user_data
+
+$do_closure:
+	stl	$19, 13*8($30)
+	stl	$20, 14*8($30)
+	stl	$21, 15*8($30)
+	fstd	$f16, 4*8($30)
+	fstd	$f17, 5*8($30)
+	fstd	$f18, 6*8($30)
+	fstd	$f19, 7*8($30)
+	fstd	$f20, 8*8($30)
+	fstd	$f21, 9*8($30)
+
+	# Call ffi_closure_sysv_inner to do the bulk of the work.
+	ldi	$19, 2*8($30)
+	ldi	$20, 10*8($30)
+	call	$26, ffi_closure_sysv_inner
+0:
+	ldih	$29, 0($26)			!gpdisp!2
+	ldi	$2, 99f-0b($26)
+	s4addl	$0, 0, $1			# ldcode * 4
+	ldl	$0, 16($30)			# preload return value
+	s4addl	$1, $2, $1			# 99f + ldcode * 16
+	ldi	$29, 0($29)			!gpdisp!2
+	ldl	$26, 0($30)
+	cfi_restore($26)
+	jmp	$31, ($1), $load_32
+
+.macro epilogue
+	addl	$30, CLOSURE_FS, $30
+	cfi_adjust_cfa_offset(-CLOSURE_FS)
+	ret
+	.align	4
+	cfi_adjust_cfa_offset(CLOSURE_FS)
+.endm
+
+	.align 4
+99:
+E 0
+	epilogue
+
+E 1
+	epilogue
+
+E 2
+$load_32:
+	sextl	$0, $0
+	epilogue
+
+E 3
+	zapnot	$0, 3, $0
+	epilogue
+
+E 4
+#ifdef __sw_64_bwx__
+	sexth	$0, $0
+#else
+	sll	$0, 48, $0
+	sra	$0, 48, $0
+#endif
+	epilogue
+
+E 5
+	and	$0, 0xff, $0
+	epilogue
+
+E 6
+#ifdef __sw_64_bwx__
+	sextb	$0, $0
+#else
+	sll	$0, 56, $0
+	sra	$0, 56, $0
+#endif
+	epilogue
+
+E 7
+	flds	$f0, 16($sp)
+	epilogue
+
+E 8
+	fldd	$f0, 16($sp)
+	epilogue
+
+E 9
+	flds	$f0, 16($sp)
+	flds	$f1, 20($sp)
+	epilogue
+
+E 10
+	fldd	$f0, 16($sp)
+	fldd	$f1, 24($sp)
+	epilogue
+
+	cfi_endproc
+	.end	ffi_closure_sysv
+
+#if defined __ELF__ && defined __linux__
+	.section	.note.GNU-stack,"",@progbits
+#endif
diff --git a/libffi/src/types.c b/libffi/src/types.c
index 7e80aec6eb4b66da2a583cbb159da450dff6cd50..9ff182e35e247fbea6bb0e32e631917136876c5e 100644
--- a/libffi/src/types.c
+++ b/libffi/src/types.c
@@ -78,13 +78,13 @@ FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER, const);
 FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT, const);
 FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const);
 
-#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha__
+#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha_ || defined __sw_64___
 #define FFI_LDBL_CONST const
 #else
 #define FFI_LDBL_CONST
 #endif
 
-#ifdef __alpha__
+#if defined __alpha__ || defined __sw_64__
 /* Even if we're not configured to default to 128-bit long double, 
    maintain binary compatibility, as -mlong-double-128 can be used
    at any time.  */
diff --git a/libgcc/config.host b/libgcc/config.host
index c529cc40f0c8d536524e2539483e6b148ded4413..ba196609ee296d753b44f44e1408602efbb10a20 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -199,6 +199,9 @@ s390*-*-*)
 sh[123456789lbe]*-*-*)
 	cpu_type=sh
 	;;
+sw_64*-*-*)
+	cpu_type=sw_64
+	;;
 tilegx*-*-*)
 	cpu_type=tilegx
 	;;
@@ -1424,6 +1427,21 @@ sparc64-*-linux*)		# 64-bit SPARC's running GNU/Linux
 	;;
 sparc64-*-netbsd*)
 	;;
+sw_64*-*-linux*)
+	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm sw_64/t-linux"
+	extra_parts="$extra_parts crtfastmath.o"
+	md_unwind_header=sw_64/linux-unwind.h
+	;;
+sw_64*-*-freebsd*)
+	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm"
+	extra_parts="$extra_parts crtbeginT.o crtfastmath.o"
+	;;
+sw_64*-*-netbsd*)
+	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee"
+	;;
+sw_64*-*-openbsd*)
+	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee"
+	;;
 tic6x-*-uclinux)
 	tmake_file="${tmake_file} t-softfp-sfdf t-softfp-excl t-softfp \
 		c6x/t-elf  c6x/t-uclinux t-crtstuff-pic t-libgcc-pic \
diff --git a/libgcc/config/sw_64/crtfastmath.c b/libgcc/config/sw_64/crtfastmath.c
new file mode 100644
index 0000000000000000000000000000000000000000..1cd890458228ea4ebf943326fb2242b0652d83dc
--- /dev/null
+++ b/libgcc/config/sw_64/crtfastmath.c
@@ -0,0 +1,36 @@
+/*
+ * Copyright (C) 2001-2020 Free Software Foundation, Inc.
+ * Contributed by Richard Henderson (rth@redhat.com)
+ *
+ * This file is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option) any
+ * later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/* Assume SYSV/1 compatible interfaces.  */
+
+extern void
+__ieee_set_fp_control (unsigned long int);
+
+#define IEEE_MAP_DMZ (1UL << 12) /* Map denorm inputs to zero */
+#define IEEE_MAP_UMZ (1UL << 13) /* Map underflowed outputs to zero */
+
+static void __attribute__ ((constructor)) set_fast_math (void)
+{
+  __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ);
+}
diff --git a/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
new file mode 100644
index 0000000000000000000000000000000000000000..21f259687860d495acb12b5afca5f24a5dd5ba2a
--- /dev/null
+++ b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
@@ -0,0 +1,50 @@
+# Copyright (C) 2006-2020 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+%ifdef __LONG_DOUBLE_128__
+
+# long double 128 bit support in libgcc_s.so.1 is only available
+# when configured with --with-long-double-128.  Make sure all the
+# symbols are available at @@GCC_LDBL_* versions to make it clear
+# there is a configurable symbol set.
+
+%exclude {
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+%inherit GCC_LDBL_3.0 GCC_3.0
+GCC_LDBL_3.0 {
+  __fixtfdi
+  __fixunstfdi
+  __floatditf
+}
+
+%inherit GCC_LDBL_4.0.0 GCC_4.0.0
+GCC_LDBL_4.0.0 {
+  __divtc3
+  __multc3
+  __powitf2
+}
+
+%endif
diff --git a/libgcc/config/sw_64/linux-unwind.h b/libgcc/config/sw_64/linux-unwind.h
new file mode 100644
index 0000000000000000000000000000000000000000..79da6a16a94e501c749cd1f27544492a60d445d3
--- /dev/null
+++ b/libgcc/config/sw_64/linux-unwind.h
@@ -0,0 +1,103 @@
+/* DWARF2 EH unwinding support for Sw_64 Linux.
+   Copyright (C) 2004-2020 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef inhibit_libc
+/* Do code reading to identify a signal frame, and set the frame
+   state data appropriately.  See unwind-dw2.c for the structs.  */
+
+#include <signal.h>
+#include <sys/ucontext.h>
+
+#define MD_FALLBACK_FRAME_STATE_FOR sw_64_fallback_frame_state
+
+static _Unwind_Reason_Code
+sw_64_fallback_frame_state (struct _Unwind_Context *context,
+			    _Unwind_FrameState *fs)
+{
+  unsigned int *pc = context->ra;
+  struct sigcontext *sc;
+  long new_cfa;
+  int i;
+
+  if (pc[0] != 0x47fe0410     /* mov $30,$16 */
+      || pc[2] != 0x00000083) /* callsys */
+    return _URC_END_OF_STACK;
+  if (context->cfa == 0)
+    return _URC_END_OF_STACK;
+  if (pc[1] == 0x201f0067) /* lda $0,NR_sigreturn */
+    sc = context->cfa;
+  else if (pc[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */
+    {
+      struct rt_sigframe
+      {
+	siginfo_t info;
+	ucontext_t uc;
+      } *rt_ = context->cfa;
+      /* The void * cast is necessary to avoid an aliasing warning.
+	 The aliasing warning is correct, but should not be a problem
+	 because it does not alias anything.  */
+      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
+    }
+  else
+    return _URC_END_OF_STACK;
+
+  new_cfa = sc->sc_regs[30];
+  fs->regs.cfa_how = CFA_REG_OFFSET;
+  fs->regs.cfa_reg = 30;
+  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
+  for (i = 0; i < 30; ++i)
+    {
+      fs->regs.reg[i].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i].loc.offset = (long) &sc->sc_regs[i] - new_cfa;
+    }
+  for (i = 0; i < 31; ++i)
+    {
+      fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
+      fs->regs.reg[i + 32].loc.offset = (long) &sc->sc_fpregs[i] - new_cfa;
+    }
+  fs->regs.reg[64].how = REG_SAVED_OFFSET;
+  fs->regs.reg[64].loc.offset = (long) &sc->sc_pc - new_cfa;
+  fs->retaddr_column = 64;
+  fs->signal_frame = 1;
+
+  return _URC_NO_REASON;
+}
+
+#define MD_FROB_UPDATE_CONTEXT sw_64_frob_update_context
+
+/* Fix up for signal handlers that don't have S flag set.  */
+
+static void
+sw_64_frob_update_context (struct _Unwind_Context *context,
+			   _Unwind_FrameState *fs ATTRIBUTE_UNUSED)
+{
+  unsigned int *pc = context->ra;
+
+  if (pc[0] == 0x47fe0410	  /* mov $30,$16 */
+      && pc[2] == 0x00000083       /* callsys */
+      && (pc[1] == 0x201f0067      /* lda $0,NR_sigreturn */
+	  || pc[1] == 0x201f015f)) /* lda $0,NR_rt_sigreturn */
+    _Unwind_SetSignalFrame (context, 1);
+}
+#endif
diff --git a/libgcc/config/sw_64/qrnnd.S b/libgcc/config/sw_64/qrnnd.S
new file mode 100644
index 0000000000000000000000000000000000000000..d22b31b4ebb0c3bcc6740c350cfa255912130a88
--- /dev/null
+++ b/libgcc/config/sw_64/qrnnd.S
@@ -0,0 +1,181 @@
+ # Sw_64 __udiv_qrnnd
+ # Copyright (C) 1992-2020 Free Software Foundation, Inc.
+
+ # This file is part of GCC.
+
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 3 of the License, or (at your
+ # option) any later version.
+
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+
+ # Under Section 7 of GPL version 3, you are granted additional
+ # permissions described in the GCC Runtime Library Exception, version
+ # 3.1, as published by the Free Software Foundation.
+
+ # You should have received a copy of the GNU General Public License and
+ # a copy of the GCC Runtime Library Exception along with this program;
+ # see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ # <http://www.gnu.org/licenses/>.
+
+#ifdef __ELF__
+.section .note.GNU-stack,""
+#endif
+
+	.set noreorder
+	.set noat
+
+	.text
+
+	.globl __udiv_qrnnd
+	.ent __udiv_qrnnd
+#ifdef __VMS__
+__udiv_qrnnd..en:
+	.frame $29,0,$26,0
+	.prologue
+#else
+__udiv_qrnnd:
+	.frame $30,0,$26,0
+	.prologue 0
+#endif
+/*
+	ldiq -> ldi
+	addq->addl
+	subq->subl
+	cmovne qb,tmp,n1->selne qb,tmp,n1,n1
+	stq ->stl
+	cmoveq tmp,AT,n1(n0)->seleq tmp,AT,n1,n1(n0,n0)   */
+#define cnt	$2
+#define tmp	$3
+#define rem_ptr	$16
+#define n1	$17
+#define n0	$18
+#define d	$19
+#define qb	$20
+#define AT	$at
+
+	ldi	cnt,16
+	blt	d,$largedivisor
+
+$loop1:	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	d,n1,qb
+	subl	n1,d,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	d,n1,qb
+	subl	n1,d,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	d,n1,qb
+	subl	n1,d,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	d,n1,qb
+	subl	n1,d,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	subl	cnt,1,cnt
+	bgt	cnt,$loop1
+	stl	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$largedivisor:
+	and	n0,1,$4
+
+	srl	n0,1,n0
+	sll	n1,63,tmp
+	or	tmp,n0,n0
+	srl	n1,1,n1
+
+	and	d,1,$6
+	srl	d,1,$5
+	addl	$5,$6,$5
+
+$loop2:	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	$5,n1,qb
+	subl	n1,$5,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	$5,n1,qb
+	subl	n1,$5,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	$5,n1,qb
+	subl	n1,$5,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	cmplt	n0,0,tmp
+	addl	n1,n1,n1
+	bis	n1,tmp,n1
+	addl	n0,n0,n0
+	cmpule	$5,n1,qb
+	subl	n1,$5,tmp
+	selne	qb,tmp,n1,n1
+	bis	n0,qb,n0
+	subl	cnt,1,cnt
+	bgt	cnt,$loop2
+
+	addl	n1,n1,n1
+	addl	$4,n1,n1
+	bne	$6,$Odd
+	stl	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+$Odd:
+	/* q' in n0. r' in n1 */
+	addl	n1,n0,n1
+
+	cmpult	n1,n0,tmp	# tmp := carry from addl
+	subl	n1,d,AT
+	addl	n0,tmp,n0
+	selne	tmp,AT,n1,n1
+
+	cmpult	n1,d,tmp
+	addl	n0,1,AT
+	seleq	tmp,AT,n0,n0
+	subl	n1,d,AT
+	seleq	tmp,AT,n1,n1
+
+	stl	n1,0(rem_ptr)
+	bis	$31,n0,$0
+	ret	$31,($26),1
+
+#ifdef __VMS__
+	.link
+	.align 3
+__udiv_qrnnd:
+	.pdesc	__udiv_qrnnd..en,null
+#endif
+	.end	__udiv_qrnnd
diff --git a/libgcc/config/sw_64/t-ieee b/libgcc/config/sw_64/t-ieee
new file mode 100644
index 0000000000000000000000000000000000000000..9b66e50acc5db17c3fa2cdbd8040dff123acad0d
--- /dev/null
+++ b/libgcc/config/sw_64/t-ieee
@@ -0,0 +1,2 @@
+# All sw_64s get an IEEE complaint set of libraries.
+#HOST_LIBGCC2_CFLAGS += -mieee
diff --git a/libgcc/config/sw_64/t-linux b/libgcc/config/sw_64/t-linux
new file mode 100644
index 0000000000000000000000000000000000000000..0b7b7e6a1086ce5bc3a447620f20d4d9208630ae
--- /dev/null
+++ b/libgcc/config/sw_64/t-linux
@@ -0,0 +1,2 @@
+SHLIB_MAPFILES += $(srcdir)/config/sw_64/libgcc-sw_64-ldbl.ver
+
diff --git a/libgcc/config/sw_64/t-sw_64 b/libgcc/config/sw_64/t-sw_64
new file mode 100644
index 0000000000000000000000000000000000000000..dffba8ee79134187635e5ca47873008a48ae784b
--- /dev/null
+++ b/libgcc/config/sw_64/t-sw_64
@@ -0,0 +1,6 @@
+# This is a support routine for longlong.h, used by libgcc2.c.
+LIB2ADD += $(srcdir)/config/sw_64/qrnnd.S
+
+# When GAS-generated unwind tables are created, they get created
+# after the __FRAME_END__ terminator, which causes an ld error.
+CRTSTUFF_T_CFLAGS = -fno-unwind-tables
diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
index e0a9fd712e70c816113ddb6d061ce979657878fc..50aa1bf06d3a93b3879f774e814e9f721eb87c59 100644
--- a/libgcc/libgcc2.c
+++ b/libgcc/libgcc2.c
@@ -2187,7 +2187,7 @@ int mprotect (char *,int, int);
 int
 getpagesize (void)
 {
-#ifdef _ALPHA_
+#if defined _ALPHA_ || defined _SW_64_
   return 8192;
 #else
   return 4096;
diff --git a/libgfortran/config/fpu-glibc.h b/libgfortran/config/fpu-glibc.h
index 2abb0da6b1e8506166d78ca22f72578d63d93726..f4153059797bbc2315b0882f18bc5095c00f819a 100644
--- a/libgfortran/config/fpu-glibc.h
+++ b/libgfortran/config/fpu-glibc.h
@@ -446,7 +446,7 @@ set_fpu_state (void *state)
 int
 support_fpu_underflow_control (int kind __attribute__((unused)))
 {
-#if defined(__alpha__) && defined(FE_MAP_UMZ)
+#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ)
   return (kind == 4 || kind == 8) ? 1 : 0;
 #else
   return 0;
@@ -457,7 +457,7 @@ support_fpu_underflow_control (int kind __attribute__((unused)))
 int
 get_fpu_underflow_mode (void)
 {
-#if defined(__alpha__) && defined(FE_MAP_UMZ)
+#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ)
 
   fenv_t state = __ieee_get_fp_control ();
 
@@ -475,7 +475,7 @@ get_fpu_underflow_mode (void)
 void
 set_fpu_underflow_mode (int gradual __attribute__((unused)))
 {
-#if defined(__alpha__) && defined(FE_MAP_UMZ)
+#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ)
 
   fenv_t state = __ieee_get_fp_control ();
 
diff --git a/libgfortran/configure.host b/libgfortran/configure.host
index 5824f253e2f18ea7a019256d8ca1c54b950aa5a3..85407b61eb4b6e602f1c03821b61b8a4b02b4251 100644
--- a/libgfortran/configure.host
+++ b/libgfortran/configure.host
@@ -56,4 +56,6 @@ case "${host_cpu}" in
     ieee_flags="-mieee" ;;
   sh*)
     ieee_flags="-mieee" ;;
+  sw_64*)
+    ieee_flags="-mieee" ;;
 esac
diff --git a/libgo/configure b/libgo/configure
index 2f787392abd220946547acc40b58e657e348155f..51cff79ba79e64dc9e2424d62ae08f433f95eefe 100644
--- a/libgo/configure
+++ b/libgo/configure
@@ -14070,10 +14070,10 @@ esac
 #   - libgo/go/syscall/endian_XX.go
 #   - possibly others
 # - possibly update files in libgo/go/internal/syscall/unix
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm"
 
 # All known GOARCH family values.
-ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM"
+ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM"
 
 GOARCH=unknown
 case ${host} in
@@ -14256,6 +14256,9 @@ else
 fi
 rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
     ;;
+  sw_64*-*-*)
+    GOARCH=sw_64
+    ;;
 esac
 
 
diff --git a/libgo/configure.ac b/libgo/configure.ac
index f800d44a0e9cf194e1e3fff87dbdd9a093c4ba39..91cfe35134807e26012f8e0b2d93a2dc96618b46 100644
--- a/libgo/configure.ac
+++ b/libgo/configure.ac
@@ -236,10 +236,10 @@ AC_SUBST(USE_DEJAGNU)
 #   - libgo/go/syscall/endian_XX.go
 #   - possibly others
 # - possibly update files in libgo/go/internal/syscall/unix
-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm"
+ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm"
 
 # All known GOARCH family values.
-ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM"
+ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM"
 
 GOARCH=unknown
 case ${host} in
@@ -361,6 +361,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
 [GOARCH=sparc],
 [GOARCH=sparc64])
     ;;
+  sw_64*-*-*)
+    GOARCH=sw_64
+    ;;
 esac
 AC_SUBST(GOARCH)
 AC_SUBST(ALLGOARCH)
diff --git a/libgo/go/cmd/cgo/main.go b/libgo/go/cmd/cgo/main.go
index 80f35681d75f0dfcd91cf1781ff1d4f74c5167fd..366abd1061b4a075be320ecff6effdd5b291caa6 100644
--- a/libgo/go/cmd/cgo/main.go
+++ b/libgo/go/cmd/cgo/main.go
@@ -191,6 +191,7 @@ var ptrSizeMap = map[string]int64{
 	"shbe":        4,
 	"sparc":       4,
 	"sparc64":     8,
+	"sw_64":       8,
 }
 
 var intSizeMap = map[string]int64{
@@ -217,6 +218,7 @@ var intSizeMap = map[string]int64{
 	"shbe":        4,
 	"sparc":       4,
 	"sparc64":     8,
+	"sw_64":       8,
 }
 
 var cPrefix string
diff --git a/libgo/go/cmd/internal/sys/arch.go b/libgo/go/cmd/internal/sys/arch.go
index e8687363defc502c17cdb97660b7c4ac886d799d..604bbec612eb971bda07658d2160ba5ab2ca44b1 100644
--- a/libgo/go/cmd/internal/sys/arch.go
+++ b/libgo/go/cmd/internal/sys/arch.go
@@ -12,6 +12,7 @@ type ArchFamily byte
 
 const (
 	NoArch ArchFamily = iota
+	SW_64
 	AMD64
 	ARM
 	ARM64
@@ -169,8 +170,17 @@ var ArchWasm = &Arch{
 	RegSize:   8,
 	MinLC:     1,
 }
-
+/*TODO*/
+var ArchSW_64 = &Arch{
+	Name:      "sw_64",
+	Family:    SW_64,
+	ByteOrder: binary.LittleEndian,
+	PtrSize:   8,
+	RegSize:   8,
+	MinLC:     1,
+}
 var Archs = [...]*Arch{
+	ArchSW_64,
 	Arch386,
 	ArchAMD64,
 	ArchARM,
diff --git a/libgo/go/debug/elf/elf.go b/libgo/go/debug/elf/elf.go
index 96a67ce732728c38b7cb07adaa78fb25ff08dd03..c417537b9210186b55ab2c328731a66397bc27b1 100644
--- a/libgo/go/debug/elf/elf.go
+++ b/libgo/go/debug/elf/elf.go
@@ -6,6 +6,7 @@
  * $FreeBSD: src/sys/sys/elf64.h,v 1.10.14.1 2005/12/30 22:13:58 marcel Exp $
  * $FreeBSD: src/sys/sys/elf_common.h,v 1.15.8.1 2005/12/30 22:13:58 marcel Exp $
  * $FreeBSD: src/sys/alpha/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $
+ * $FreeBSD: src/sys/sw_64/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $
  * $FreeBSD: src/sys/amd64/include/elf.h,v 1.18 2004/08/03 08:21:48 dfr Exp $
  * $FreeBSD: src/sys/arm/include/elf.h,v 1.5.2.1 2006/06/30 21:42:52 cognet Exp $
  * $FreeBSD: src/sys/i386/include/elf.h,v 1.16 2004/08/02 19:12:17 dfr Exp $
@@ -390,6 +391,8 @@ const (
 	EM_MIPS_RS4_BE Machine = 10     /* MIPS R4000 Big-Endian */
 	EM_ALPHA_STD   Machine = 41     /* Digital Alpha (standard value). */
 	EM_ALPHA       Machine = 0x9026 /* Alpha (written in the absence of an ABI) */
+	EM_SW_64_STD   Machine = 41     /* Digital Sw_64 (standard value). */
+	EM_SW_64       Machine = 0x9916 /* mieee-opt Sw_64 (written in the absence of an ABI) */
 )
 
 var machineStrings = []intName{
@@ -581,6 +584,8 @@ var machineStrings = []intName{
 	{10, "EM_MIPS_RS4_BE"},
 	{41, "EM_ALPHA_STD"},
 	{0x9026, "EM_ALPHA"},
+	{41, "EM_SW_64_STD"},
+	{0x9916, "EM_SW_64"},
 }
 
 func (i Machine) String() string   { return stringName(uint32(i), machineStrings, false) }
@@ -1463,6 +1468,73 @@ var ralphaStrings = []intName{
 
 func (i R_ALPHA) String() string   { return stringName(uint32(i), ralphaStrings, false) }
 func (i R_ALPHA) GoString() string { return stringName(uint32(i), ralphaStrings, true) }
+// Relocation types for SW_64.
+type R_SW_64 int
+
+const (
+	R_SW_64_NONE	   R_SW_64 = 0  /* No reloc */
+	R_SW_64_REFLONG	R_SW_64 = 1  /* Direct 32 bit */
+	R_SW_64_REFQUAD	R_SW_64 = 2  /* Direct 64 bit */
+	R_SW_64_GPREL32	R_SW_64 = 3  /* GP relative 32 bit */
+	R_SW_64_LITERAL	R_SW_64 = 4  /* GP relative 16 bit w/optimization */
+	R_SW_64_LITUSE	 R_SW_64 = 5  /* Optimization hint for LITERAL */
+	R_SW_64_GPDISP	 R_SW_64 = 6  /* Add displacement to GP */
+	R_SW_64_BRADDR	 R_SW_64 = 7  /* PC+4 relative 23 bit shifted */
+	R_SW_64_HINT	   R_SW_64 = 8  /* PC+4 relative 16 bit shifted */
+	R_SW_64_SREL16	 R_SW_64 = 9  /* PC relative 16 bit */
+	R_SW_64_SREL32	 R_SW_64 = 10 /* PC relative 32 bit */
+	R_SW_64_SREL64	 R_SW_64 = 11 /* PC relative 64 bit */
+	R_SW_64_OP_PUSH	R_SW_64 = 12 /* OP stack push */
+	R_SW_64_OP_STORE       R_SW_64 = 13 /* OP stack pop and store */
+	R_SW_64_OP_PSUB	R_SW_64 = 14 /* OP stack subtract */
+	R_SW_64_OP_PRSHIFT     R_SW_64 = 15 /* OP stack right shift */
+	R_SW_64_GPVALUE	R_SW_64 = 16
+	R_SW_64_GPRELHIGH      R_SW_64 = 17
+	R_SW_64_GPRELLOW       R_SW_64 = 18
+	R_SW_64_IMMED_GP_16    R_SW_64 = 19
+	R_SW_64_IMMED_GP_HI32  R_SW_64 = 20
+	R_SW_64_IMMED_SCN_HI32 R_SW_64 = 21
+	R_SW_64_IMMED_BR_HI32  R_SW_64 = 22
+	R_SW_64_IMMED_LO32     R_SW_64 = 23
+	R_SW_64_COPY	   R_SW_64 = 24 /* Copy sympol at runtime */
+	R_SW_64_GLOB_DAT       R_SW_64 = 25 /* Create GOT entry */
+	R_SW_64_JMP_SLOT       R_SW_64 = 26 /* Create PLT entry */
+	R_SW_64_RELATIVE       R_SW_64 = 27 /* Adjust by program base */
+)
+
+var rsw_64Strings = []intName{
+	{0, "R_SW_64_NONE"},
+	{1, "R_SW_64_REFLONG"},
+	{2, "R_SW_64_REFQUAD"},
+	{3, "R_SW_64_GPREL32"},
+	{4, "R_SW_64_LITERAL"},
+	{5, "R_SW_64_LITUSE"},
+	{6, "R_SW_64_GPDISP"},
+	{7, "R_SW_64_BRADDR"},
+	{8, "R_SW_64_HINT"},
+	{9, "R_SW_64_SREL16"},
+	{10, "R_SW_64_SREL32"},
+	{11, "R_SW_64_SREL64"},
+	{12, "R_SW_64_OP_PUSH"},
+	{13, "R_SW_64_OP_STORE"},
+	{14, "R_SW_64_OP_PSUB"},
+	{15, "R_SW_64_OP_PRSHIFT"},
+	{16, "R_SW_64_GPVALUE"},
+	{17, "R_SW_64_GPRELHIGH"},
+	{18, "R_SW_64_GPRELLOW"},
+	{19, "R_SW_64_IMMED_GP_16"},
+	{20, "R_SW_64_IMMED_GP_HI32"},
+	{21, "R_SW_64_IMMED_SCN_HI32"},
+	{22, "R_SW_64_IMMED_BR_HI32"},
+	{23, "R_SW_64_IMMED_LO32"},
+	{24, "R_SW_64_COPY"},
+	{25, "R_SW_64_GLOB_DAT"},
+	{26, "R_SW_64_JMP_SLOT"},
+	{27, "R_SW_64_RELATIVE"},
+}
+
+func (i R_SW_64) String() string   { return stringName(uint32(i), rsw_64Strings, false) }
+func (i R_SW_64) GoString() string { return stringName(uint32(i), rsw_64Strings, true) }
 
 // Relocation types for ARM.
 type R_ARM int
diff --git a/libgo/go/debug/elf/elf_test.go b/libgo/go/debug/elf/elf_test.go
index f8985a8992361fb1f4ff958515f705bae3575f65..b4dccf386cc6db548a553a348de95ecd7fbca0bc 100644
--- a/libgo/go/debug/elf/elf_test.go
+++ b/libgo/go/debug/elf/elf_test.go
@@ -31,6 +31,7 @@ var nameTests = []nameTest{
 	{STV_HIDDEN, "STV_HIDDEN"},
 	{R_X86_64_PC32, "R_X86_64_PC32"},
 	{R_ALPHA_OP_PUSH, "R_ALPHA_OP_PUSH"},
+	{R_SW_64_OP_PUSH, "R_SW_64_OP_PUSH"},
 	{R_ARM_THM_ABS5, "R_ARM_THM_ABS5"},
 	{R_386_GOT32, "R_386_GOT32"},
 	{R_PPC_GOT16_HI, "R_PPC_GOT16_HI"},
diff --git a/libgo/go/debug/elf/file.go b/libgo/go/debug/elf/file.go
index b9a8b1e0cbb545de13b882d0171326aeb2d2b2b8..eea0f9aa10848acb28a8e21f6bfb1f0557bc7e31 100644
--- a/libgo/go/debug/elf/file.go
+++ b/libgo/go/debug/elf/file.go
@@ -627,6 +627,8 @@ func (f *File) applyRelocations(dst []byte, rels []byte) error {
 		return f.applyRelocationsSPARC64(dst, rels)
 	case f.Class == ELFCLASS64 && f.Machine == EM_ALPHA:
 		return f.applyRelocationsALPHA(dst, rels)
+	case f.Class == ELFCLASS64 && f.Machine == EM_SW_64:
+		return f.applyRelocationsSW_64(dst, rels)
 	default:
 		return errors.New("applyRelocations: not implemented")
 	}
@@ -1238,6 +1240,53 @@ func (f *File) applyRelocationsALPHA(dst []byte, rels []byte) error {
 	return nil
 }
 
+//SW_64 begin
+
+func (f *File) applyRelocationsSW_64(dst []byte, rels []byte) error {
+	// 24 is the size of Rela64.
+	if len(rels)%24 != 0 {
+	return errors.New("length of relocation section is not a multiple of 24")
+	}
+
+	symbols, _, err := f.getSymbols(SHT_SYMTAB)
+	if err != nil {
+	return err
+	}
+
+	b := bytes.NewReader(rels)
+	var rela Rela64
+	for b.Len() > 0 {
+		binary.Read(b, f.ByteOrder, &rela)
+		symNo := rela.Info >> 32
+		t := R_SW_64(rela.Info & 0xffff)
+
+		if symNo == 0 || symNo > uint64(len(symbols)) {
+			continue
+		}
+		sym := &symbols[symNo-1]
+		if SymType(sym.Info&0xf) != STT_SECTION {
+		       // We don't handle non-section relocations for now.
+		       continue
+		}
+
+		// There are relocations, so this must be a normal
+		// object file, and we only look at section symbols,
+		// so we assume that the symbol value is 0.
+		switch t {
+		case R_SW_64_REFQUAD:
+			if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 {
+				continue
+			}
+			f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend))
+		case R_SW_64_REFLONG:
+			if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 {
+			}
+			f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend))
+		}
+	}
+	return nil
+}
+//SW_64 end
 func (f *File) DWARF() (*dwarf.Data, error) {
 	dwarfSuffix := func(s *Section) string {
 		switch {
diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go
index 5e73dcf731660c3da429ec5164d3b97c7e35b59f..1a6e2860e9b4e376484b34e6784fcc021c2ec26e 100644
--- a/libgo/go/encoding/xml/xml.go
+++ b/libgo/go/encoding/xml/xml.go
@@ -1719,6 +1719,7 @@ var htmlEntity = map[string]string{
 	"Psi":      "\u03A8",
 	"Omega":    "\u03A9",
 	"alpha":    "\u03B1",
+	"sw_64":    "\u03B1",
 	"beta":     "\u03B2",
 	"gamma":    "\u03B3",
 	"delta":    "\u03B4",
diff --git a/libgo/go/go/build/syslist.go b/libgo/go/go/build/syslist.go
index d72649b8b4129913c41aba8aa6dabe5610f2f446..c0975fa96ff444d3af7132727b3331ae05436872 100644
--- a/libgo/go/go/build/syslist.go
+++ b/libgo/go/go/build/syslist.go
@@ -8,4 +8,4 @@ package build
 // Do not remove from this list, as these are used for go/build filename matching.
 
 const goosList = "aix android darwin dragonfly freebsd hurd illumos js linux nacl netbsd openbsd plan9 solaris windows zos "
-const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le ppc riscv riscv64 s390 s390x sparc sparc64 wasm alpha m68k nios2 sh shbe "
+const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le ppc riscv riscv64 s390 s390x sparc sparc64 wasm alpha sw_64 m68k nios2 sh shbe "
diff --git a/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
new file mode 100644
index 0000000000000000000000000000000000000000..9587b5aa4caf004945f33f5fd01884f72b893c5b
--- /dev/null
+++ b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
@@ -0,0 +1,9 @@
+// Copyright 2016 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package unix
+
+// Linux getrandom system call number.
+// See GetRandom in getrandom_linux.go.
+const randomTrap uintptr = 511
diff --git a/libgo/go/net/listen_test.go b/libgo/go/net/listen_test.go
index d8c72096ed16514fc415f9ba15905fc2439a25de..ba7808774c9909b8ccf5b60ab872a55847ec4716 100644
--- a/libgo/go/net/listen_test.go
+++ b/libgo/go/net/listen_test.go
@@ -677,7 +677,7 @@ func multicastRIBContains(ip IP) (bool, error) {
 	case "aix", "dragonfly", "netbsd", "openbsd", "plan9", "solaris", "illumos", "windows":
 		return true, nil // not implemented yet
 	case "linux":
-		if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" {
+		if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" || runtime.GOARCH == "sw_64" {
 			return true, nil // not implemented yet
 		}
 	}
diff --git a/libgo/go/regexp/testdata/basic.dat b/libgo/go/regexp/testdata/basic.dat
index 7859290ba1dd0dc354b3149e16f364b2a1015794..061c403d6c9270dcf6e2b1195b84c9087843dde0 100644
--- a/libgo/go/regexp/testdata/basic.dat
+++ b/libgo/go/regexp/testdata/basic.dat
@@ -157,6 +157,7 @@ E	a[bcd]*dcdcde		adcdcde		(0,7)
 E	(ab|a)b*c		abc		(0,3)(0,2)
 E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
 BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
+BE	[A-Za-z_][A-Za-z0-9_]*	sw_64		(0,5)
 E	^a(bc+|b[eh])g|.h$	abh		(1,3)
 E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
 E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
index 704bbe6f62bc9a6a79df8be630f7c3e2ff10a73d..d7b9e0b226d625e4d5a9bf9ed3c42493b9e737fb 100644
--- a/libgo/go/runtime/hash64.go
+++ b/libgo/go/runtime/hash64.go
@@ -6,7 +6,7 @@
 //   xxhash: https://code.google.com/p/xxhash/
 // cityhash: https://code.google.com/p/cityhash/
 
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha sw_64 amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64
 
 package runtime
 
diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go
index af9e7d164b93eeec575edd6de54a3ea56388a931..d572e6656765757502c1e44800e223dd72e350fa 100644
--- a/libgo/go/runtime/lfstack_64bit.go
+++ b/libgo/go/runtime/lfstack_64bit.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sparc64 ia64
+// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sw_64 sparc64 ia64
 
 package runtime
 
diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go
index 385b7b3e7a7a5819b8d90710e6f31f7781231b03..ceed0f4423e2381e5532146959e82f46d79ccf3e 100644
--- a/libgo/go/runtime/mpagealloc_64bit.go
+++ b/libgo/go/runtime/mpagealloc_64bit.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 
-// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64
+// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64 sw_64
 
 // See mpagealloc_32bit.go for why darwin/arm64 is excluded here.
 
diff --git a/libgo/go/syscall/endian_little.go b/libgo/go/syscall/endian_little.go
index 0cd2d7524c646f6e61bbd7428a9b14b6761553dd..b67d4807978f583ea652b186f7742e8fc3b5e140 100644
--- a/libgo/go/syscall/endian_little.go
+++ b/libgo/go/syscall/endian_little.go
@@ -2,7 +2,7 @@
 // Use of this source code is governed by a BSD-style
 // license that can be found in the LICENSE file.
 //
-// +build 386 alpha amd64 amd64p32 arm arm64 ia64 mips64le mipsle mips64p32le nios2 ppc64le riscv64 sh wasm
+// +build 386 alpha sw_64 amd64 amd64p32 arm arm64 ia64 mips64le mipsle mips64p32le nios2 ppc64le riscv64 sh wasm
 
 package syscall
 
diff --git a/libgo/go/syscall/libcall_linux_sw_64.go b/libgo/go/syscall/libcall_linux_sw_64.go
new file mode 100644
index 0000000000000000000000000000000000000000..f6bb7be296bebf92d78278428b3c565060f29838
--- /dev/null
+++ b/libgo/go/syscall/libcall_linux_sw_64.go
@@ -0,0 +1,13 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// GNU/Linux library calls Sw_64 specific.
+
+package syscall
+
+//sys	Ioperm(from int, num int, on int) (err error)
+//ioperm(from _C_long, num _C_long, on _C_int) _C_int
+
+//sys	Iopl(level int) (err error)
+//iopl(level _C_int) _C_int
diff --git a/libgo/go/syscall/syscall_linux_sw_64.go b/libgo/go/syscall/syscall_linux_sw_64.go
new file mode 100644
index 0000000000000000000000000000000000000000..5a87d687d1be72d8c36da399f26796152e7ff742
--- /dev/null
+++ b/libgo/go/syscall/syscall_linux_sw_64.go
@@ -0,0 +1,25 @@
+// syscall_linux_sw_64.go -- GNU/Linux SW_64 specific support
+
+// Copyright 2011 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package syscall
+
+import "unsafe"
+
+func (r *PtraceRegs) PC() uint64 {
+	return r.Pc
+}
+
+func (r *PtraceRegs) SetPC(pc uint64) {
+	r.Pc = pc
+}
+
+func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) {
+	return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout)))
+}
+
+func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) {
+	return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs)))
+}
diff --git a/libgo/goarch.sh b/libgo/goarch.sh
index a5b6217c931193f4cda5bac1f3577682b99035fc..7013301f4f4bbc19568a59d6c43a0a8492cf5ade 100644
--- a/libgo/goarch.sh
+++ b/libgo/goarch.sh
@@ -52,6 +52,11 @@ case $goarch in
 	defaultphyspagesize=8192
 	pcquantum=4
 	;;
+    sw_64)
+	family=SW_64
+	defaultphyspagesize=8192
+	pcquantum=4
+	;;
     amd64)
 	family=AMD64
 	;;
diff --git a/libgo/match.sh b/libgo/match.sh
index cd35942f8bcca4d03de4fb110e8a46cea6947efe..028ea11a38643e03b89aed386562106418052a7e 100644
--- a/libgo/match.sh
+++ b/libgo/match.sh
@@ -116,7 +116,7 @@ for f in $gofiles; do
 	aix | android | darwin | dragonfly | freebsd | illumos | hurd | js | linux | nacl | netbsd | openbsd | plan9 | solaris | windows)
 	    tag1=nonmatchingtag
 	    ;;
-	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
+	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | sw_64 | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
 	    tag1=nonmatchingtag
 	    ;;
     esac
@@ -128,7 +128,7 @@ for f in $gofiles; do
 	aix | android | darwin | dragonfly | freebsd | hurd | illumos | js | linux | nacl | netbsd | openbsd | plan9 | solaris | windows)
 	    tag2=nonmatchingtag
 	    ;;
-	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
+	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | sw_64 | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
 	    tag2=nonmatchingtag
 	    ;;
     esac
diff --git a/libgo/mksysinfo.sh b/libgo/mksysinfo.sh
index bd2ba32cba1b58c9fc8fb54d055eb2d3089ab94c..ce2d557100d9310eff71aa3763244e1898ab2d0e 100644
--- a/libgo/mksysinfo.sh
+++ b/libgo/mksysinfo.sh
@@ -353,7 +353,12 @@ if test "$regs" = ""; then
   # mips*
   regs=`grep '^type _pt_regs struct' gen-sysinfo.go || true`
 fi
+if test "$regs" = ""; then
+  # sw_64*
+  regs=`grep '^type _user_pt_regs struct' gen-sysinfo.go || true`
+fi
 if test "$regs" != ""; then
+  regs=`echo $regs | sed -e 's/type _user_pt_regs struct//'`
   regs=`echo $regs | sed -e 's/type _pt_regs struct//'`
   regs=`echo $regs |
     sed -e 's/type __*user_regs_struct struct //' -e 's/[{}]//g'`
diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
index b429fdb240349c96ef84df37dc86fa8dd5e48c93..9f7eb1b8f0451ebc13b912bbf2f7789d59179f79 100644
--- a/libgo/runtime/go-signal.c
+++ b/libgo/runtime/go-signal.c
@@ -223,6 +223,8 @@ getSiginfo(siginfo_t *info, void *context __attribute__((unused)))
 	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gregs[REG_EIP];
 #elif defined(__alpha__) && defined(__linux__)
 	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc;
+#elif defined(__sw_64__) && defined(__linux__)
+	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc;
 #elif defined(__PPC__) && defined(__linux__)
 	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.regs->nip;
 #elif defined(__PPC__) && defined(_AIX)
@@ -296,7 +298,7 @@ dumpregs(siginfo_t *info __attribute__((unused)), void *context __attribute__((u
 		runtime_printf("fs     %x\n", m->gregs[REG_FS]);
 		runtime_printf("gs     %x\n", m->gregs[REG_GS]);
 	  }
-#elif defined(__alpha__) && defined(__linux__)
+#elif (defined(__alpha__)||defined(__sw_64__)) && defined(__linux__)
 	{
 		mcontext_t *m = &((ucontext_t*)(context))->uc_mcontext;
 
diff --git a/libgomp/config/linux/sw_64/futex.h b/libgomp/config/linux/sw_64/futex.h
new file mode 100644
index 0000000000000000000000000000000000000000..cd19a9bb4770df38d7ffa2c953f5dc41f56af5e8
--- /dev/null
+++ b/libgomp/config/linux/sw_64/futex.h
@@ -0,0 +1,102 @@
+/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Offloading and Multi Processing Library
+   (libgomp).
+
+   Libgomp is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Provide target-specific access to the futex system call.  */
+
+#ifndef SYS_futex
+#define SYS_futex 394
+#endif
+
+static inline void
+futex_wait (int *addr, int val)
+{
+  register long sc_0 __asm__("$0");
+  register long sc_16 __asm__("$16");
+  register long sc_17 __asm__("$17");
+  register long sc_18 __asm__("$18");
+  register long sc_19 __asm__("$19");
+
+  sc_0 = SYS_futex;
+  sc_16 = (long) addr;
+  sc_17 = gomp_futex_wait;
+  sc_18 = val;
+  sc_19 = 0;
+  __asm volatile("callsys"
+		 : "=r"(sc_0), "=r"(sc_19)
+		 : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
+		 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
+		   "$24", "$25", "$27", "$28", "memory");
+  if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
+    {
+      gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
+      gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
+      sc_0 = SYS_futex;
+      sc_17 &= ~FUTEX_PRIVATE_FLAG;
+      sc_19 = 0;
+      __asm volatile("callsys"
+		     : "=r"(sc_0), "=r"(sc_19)
+		     : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
+		     : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22",
+		       "$23", "$24", "$25", "$27", "$28", "memory");
+    }
+}
+
+static inline void
+futex_wake (int *addr, int count)
+{
+  register long sc_0 __asm__("$0");
+  register long sc_16 __asm__("$16");
+  register long sc_17 __asm__("$17");
+  register long sc_18 __asm__("$18");
+  register long sc_19 __asm__("$19");
+
+  sc_0 = SYS_futex;
+  sc_16 = (long) addr;
+  sc_17 = gomp_futex_wake;
+  sc_18 = count;
+  __asm volatile("callsys"
+		 : "=r"(sc_0), "=r"(sc_19)
+		 : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18)
+		 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
+		   "$24", "$25", "$27", "$28", "memory");
+  if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
+    {
+      gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
+      gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
+      sc_0 = SYS_futex;
+      sc_17 &= ~FUTEX_PRIVATE_FLAG;
+      __asm volatile("callsys"
+		     : "=r"(sc_0), "=r"(sc_19)
+		     : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18)
+		     : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22",
+		       "$23", "$24", "$25", "$27", "$28", "memory");
+    }
+}
+
+static inline void
+cpu_relax (void)
+{
+  __asm volatile("" : : : "memory");
+}
diff --git a/libgomp/configure b/libgomp/configure
index b03036c2738cbee16e15f3198c3028a72633c9ae..7d8f769d0534d86d60e652b89e3c765d22fe64d7 100644
--- a/libgomp/configure
+++ b/libgomp/configure
@@ -11844,6 +11844,12 @@ case `echo $GFORTRAN` in
       FC=no
     fi ;;
 esac
+case "${target}" in
+  sw_64-*-*)
+	FC="$GFORTRAN"
+	;;
+*)
+esac
 ac_ext=${ac_fc_srcext-f}
 ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
 ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
index e5b558be0c1948cd50fb8ea67f8495f0ae7e1f31..8a1a8565e522636f163252f7a1ba02bf12e6058b 100644
--- a/libgomp/configure.tgt
+++ b/libgomp/configure.tgt
@@ -76,6 +76,10 @@ if test x$enable_linux_futex = xyes; then
 	config_path="linux/s390 linux posix"
 	;;
 
+    sw_64*-*-linux*)
+	config_path="linux/sw_64 linux posix"
+	;;
+
     tile*-*-linux*)
 	config_path="linux/tile linux posix"
 	;;
diff --git a/libgomp/libgomp.spec.in b/libgomp/libgomp.spec.in
index 5651603f48745617e9aa7de5f07d2a618a18caad..738895d592659028b08da32455d004cf5a709705 100644
--- a/libgomp/libgomp.spec.in
+++ b/libgomp/libgomp.spec.in
@@ -1,3 +1,4 @@
 # This spec file is read by gcc when linking.  It is used to specify the
 # standard libraries we need in order to link with libgomp.
-*link_gomp: @link_gomp@
+#*link_gomp: @link_gomp@
+*link_gomp: @link_gomp@ --whole-archive -lpthread --no-whole-archive
diff --git a/libitm/config/linux/sw_64/futex_bits.h b/libitm/config/linux/sw_64/futex_bits.h
new file mode 100644
index 0000000000000000000000000000000000000000..5688fc17a14179f8609c4b6a0d6d43cc009bbaa0
--- /dev/null
+++ b/libitm/config/linux/sw_64/futex_bits.h
@@ -0,0 +1,56 @@
+/* Copyright (C) 2008-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+/* Provide target-specific access to the futex system call.  */
+
+#ifndef SYS_futex
+#define SYS_futex 394
+#endif
+
+static inline long
+sys_futex0 (std::atomic<int> *addr, int op, int val)
+{
+  register long sc_0 __asm__("$0");
+  register long sc_16 __asm__("$16");
+  register long sc_17 __asm__("$17");
+  register long sc_18 __asm__("$18");
+  register long sc_19 __asm__("$19");
+  long res;
+
+  sc_0 = SYS_futex;
+  sc_16 = (long) addr;
+  sc_17 = op;
+  sc_18 = val;
+  sc_19 = 0;
+  __asm volatile("callsys"
+		 : "=r"(sc_0), "=r"(sc_19)
+		 : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
+		 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
+		   "$24", "$25", "$27", "$28", "memory");
+
+  res = sc_0;
+  if (__builtin_expect (sc_19, 0))
+    res = -res;
+  return res;
+}
diff --git a/libitm/config/sw_64/sjlj.S b/libitm/config/sw_64/sjlj.S
new file mode 100644
index 0000000000000000000000000000000000000000..5c62e3d237179834e2d9df91c8cea83d7e975d58
--- /dev/null
+++ b/libitm/config/sw_64/sjlj.S
@@ -0,0 +1,112 @@
+/* Copyright (C) 2009-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+	.text
+	.align 4
+	.globl	_ITM_beginTransaction
+	.ent	_ITM_beginTransaction
+
+#define FRAME	144
+
+_ITM_beginTransaction:
+	ldgp	$29, 0($27)
+	subl	$30, FRAME, $30
+	.frame	$30, FRAME, $26, 0
+	.mask	0x04000000, 0
+	stl	$26, 0($30)
+	.prologue 1
+
+	stl	$9, 8($30)
+	stl	$10, 16($30)
+	addl	$30, FRAME, $0
+	stl	$11, 24($30)
+
+	stl	$12, 32($30)
+	stl	$13, 40($30)
+	stl	$14, 48($30)
+	stl	$15, 56($30)
+
+	stl	$0, 64($30)
+	fstd	$f2, 72($30)
+	fstd	$f3, 80($30)
+	fstd	$f4, 88($30)
+
+	fstd	$f5, 96($30)
+	fstd	$f6, 104($30)
+	fstd	$f7, 112($30)
+	fstd	$f8, 120($30)
+
+	fstd	$f9, 128($30)
+	mov	$30, $17
+#ifdef __PIC__
+	unop
+	bsr	$26, GTM_begin_transaction !samegp
+#else
+	call	$26, GTM_begin_transaction
+	ldgp	$29, 0($26)
+#endif
+
+	ldl	$26, 0($30)
+	addl	$30, FRAME, $30
+	ret
+.end _ITM_beginTransaction
+
+	.align 4
+	.globl	GTM_longjmp
+#ifdef __ELF__
+	.hidden	GTM_longjmp
+#endif
+	.ent	GTM_longjmp
+
+GTM_longjmp:
+	.prologue 0
+	ldl	$26, 0($17)
+	ldl	$9, 8($17)
+	ldl	$10, 16($17)
+	ldl	$11, 24($17)
+
+	ldl	$12, 32($17)
+	ldl	$13, 40($17)
+	ldl	$14, 48($17)
+	ldl	$15, 56($17)
+
+	ldl	$1, 64($17)
+	fldd	$f2, 72($17)
+	fldd	$f3, 80($17)
+	fldd	$f4, 88($17)
+
+	fldd	$f5, 96($17)
+	fldd	$f6, 104($17)
+	fldd	$f7, 112($17)
+	fldd	$f8, 120($17)
+
+	fldd	$f9, 128($17)
+	mov	$16, $0
+	mov	$1, $30
+	ret
+.end GTM_longjmp
+
+#ifdef __linux__
+.section .note.GNU-stack, "", @progbits
+#endif
diff --git a/libitm/config/sw_64/target.h b/libitm/config/sw_64/target.h
new file mode 100644
index 0000000000000000000000000000000000000000..4cf8d8d41d86c71cc95c99517317ac01a8f2d35a
--- /dev/null
+++ b/libitm/config/sw_64/target.h
@@ -0,0 +1,44 @@
+/* Copyright (C) 2009-2020 Free Software Foundation, Inc.
+   Contributed by Richard Henderson <rth@redhat.com>.
+
+   This file is part of the GNU Transactional Memory Library (libitm).
+
+   Libitm is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
+   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+   more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+namespace GTM HIDDEN {
+
+typedef struct gtm_jmpbuf
+{
+  unsigned long pc;
+  unsigned long s[7];
+  void *cfa;
+  unsigned long f[8];
+} gtm_jmpbuf;
+
+/* The size of one line in hardware caches (in bytes). */
+#define HW_CACHELINE_SIZE 64
+
+static inline void
+cpu_relax (void)
+{
+  __asm volatile("" : : : "memory");
+}
+
+} // namespace GTMHIDDEN
diff --git a/libitm/configure.tgt b/libitm/configure.tgt
index d1beb5c9ec85a3621b0709394a726065cc89139a..30db505a7a91529fd3063cfd7c582674eb27ec54 100644
--- a/libitm/configure.tgt
+++ b/libitm/configure.tgt
@@ -121,6 +121,7 @@ case "${target_cpu}" in
   *)
 	ARCH="${target_cpu}"
 	;;
+  sw_64*)		ARCH=sw_64 ;;
 esac
 
 # For the benefit of top-level configure, determine if the cpu is supported.
diff --git a/libsanitizer/asan/asan_allocator.h b/libsanitizer/asan/asan_allocator.h
index b37d8ef4e8d2926e5014fe7a05467cd139d1023f..78ab20f4d1c0ff0eb43e881f634f8169ce23e50d 100644
--- a/libsanitizer/asan/asan_allocator.h
+++ b/libsanitizer/asan/asan_allocator.h
@@ -146,6 +146,11 @@ typedef DefaultSizeClassMap SizeClassMap;
 const uptr kAllocatorSpace = ~(uptr)0;
 const uptr kAllocatorSize  =  0x8000000000ULL;  // 500G
 typedef DefaultSizeClassMap SizeClassMap;
+# elif SANITIZER_SW64
+// If kSpaceBeg is ~0 then SpaceBeg is chosen dynamically my mmap.
+const uptr kAllocatorSpace = ~(uptr)0;
+const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
+typedef DefaultSizeClassMap SizeClassMap;
 # else
 const uptr kAllocatorSpace = 0x600000000000ULL;
 const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
diff --git a/libsanitizer/asan/asan_interceptors.cpp b/libsanitizer/asan/asan_interceptors.cpp
index b19cf25c7cd00ddede212ee932b4f8d0065814c9..0f8cf179e974c16c426233d4bbf45a837c0ced53 100644
--- a/libsanitizer/asan/asan_interceptors.cpp
+++ b/libsanitizer/asan/asan_interceptors.cpp
@@ -41,6 +41,8 @@
 #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
 #elif defined(__mips__) && SANITIZER_LINUX
 #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.2"
+#elif defined(__sw_64__)
+#define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
 #endif
 
 namespace __asan {
diff --git a/libsanitizer/asan/asan_mapping.h b/libsanitizer/asan/asan_mapping.h
index 09be904270cedbb1bed9736dd3a616463180c83b..44187e3754b9683b7c33655d692efe7bc940640b 100644
--- a/libsanitizer/asan/asan_mapping.h
+++ b/libsanitizer/asan/asan_mapping.h
@@ -163,6 +163,7 @@ static const u64 kDefaultShort64bitShadowOffset =
 static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
 static const u64 kMIPS32_ShadowOffset32 = 0x0aaa0000;
 static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
+static const u64 kSW64_ShadowOffset64 = 1ULL << 49;
 static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
 static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
 static const u64 kSPARC64_ShadowOffset64 = 1ULL << 43;  // 0x80000000000
@@ -210,6 +211,8 @@ static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
 #    define SHADOW_OFFSET kAArch64_ShadowOffset64
 #  elif defined(__powerpc64__)
 #    define SHADOW_OFFSET kPPC64_ShadowOffset64
+#  elif defined(__sw_64__)
+#    define SHADOW_OFFSET kSW64_ShadowOffset64
 #  elif defined(__s390x__)
 #    define SHADOW_OFFSET kSystemZ_ShadowOffset64
 #  elif SANITIZER_FREEBSD
diff --git a/libsanitizer/configure.tgt b/libsanitizer/configure.tgt
index fa30065b5954efb301b88dd38c35de9e8ad00541..9ebad0020628bee80e65015550b7e7e3ffa8e08c 100644
--- a/libsanitizer/configure.tgt
+++ b/libsanitizer/configure.tgt
@@ -47,6 +47,10 @@ case "${target}" in
 	;;
   arm*-*-linux*)
 	;;
+  sw_64*-*-linux*)
+       TSAN_SUPPORTED=yes
+       LSAN_SUPPORTED=yes
+	;;
   mips*64*-*-linux*)
 	# This clause is only here to not match the supported mips*-*-linux*.
 	UNSUPPORTED=1
diff --git a/libsanitizer/lsan/lsan_allocator.cpp b/libsanitizer/lsan/lsan_allocator.cpp
index d86c3921395cb2bcb8d263c4171b4fbba10b2ffd..b3ce8dc8166bd324c3580a745cd99a7196c1fbbb 100644
--- a/libsanitizer/lsan/lsan_allocator.cpp
+++ b/libsanitizer/lsan/lsan_allocator.cpp
@@ -28,7 +28,7 @@ extern "C" void *memset(void *ptr, int value, uptr num);
 namespace __lsan {
 #if defined(__i386__) || defined(__arm__)
 static const uptr kMaxAllowedMallocSize = 1UL << 30;
-#elif defined(__mips64) || defined(__aarch64__)
+#elif defined(__mips64) || defined(__aarch64__) || defined(__sw_64__)
 static const uptr kMaxAllowedMallocSize = 4UL << 30;
 #else
 static const uptr kMaxAllowedMallocSize = 8UL << 30;
diff --git a/libsanitizer/lsan/lsan_allocator.h b/libsanitizer/lsan/lsan_allocator.h
index e1397099767284f28376214ac9284a610cac46d2..a5363392eb1f8abac829cf1d6720958a9e32479d 100644
--- a/libsanitizer/lsan/lsan_allocator.h
+++ b/libsanitizer/lsan/lsan_allocator.h
@@ -50,7 +50,7 @@ struct ChunkMetadata {
 };
 
 #if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \
-    defined(__arm__)
+    defined(__arm__) || defined(__sw_64__)
 template <typename AddressSpaceViewTy>
 struct AP32 {
   static const uptr kSpaceBeg = 0;
diff --git a/libsanitizer/lsan/lsan_common.cpp b/libsanitizer/lsan/lsan_common.cpp
index 9ff9f4c5d1c977d32b7ce668e7a810691bae3215..a86141326100669addcfc2e7559707777868f8d9 100644
--- a/libsanitizer/lsan/lsan_common.cpp
+++ b/libsanitizer/lsan/lsan_common.cpp
@@ -138,6 +138,8 @@ static inline bool CanBeAHeapPointer(uptr p) {
   return ((p >> 47) == 0);
 #elif defined(__mips64)
   return ((p >> 40) == 0);
+#elif defined(__sw_64__)
+  return ((p >> 52) == 0);
 #elif defined(__aarch64__)
   unsigned runtimeVMA =
     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
diff --git a/libsanitizer/lsan/lsan_common.h b/libsanitizer/lsan/lsan_common.h
index d24abe31b71b524e3f545410e772ce6744170ba8..ed09db21507b82667c26d2fc578f994354acad89 100644
--- a/libsanitizer/lsan/lsan_common.h
+++ b/libsanitizer/lsan/lsan_common.h
@@ -32,7 +32,7 @@
 #if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) && \
     (SANITIZER_WORDSIZE == 64) &&                               \
     (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \
-     defined(__powerpc64__))
+     defined(__powerpc64__) || defined(__sw_64__))
 #define CAN_SANITIZE_LEAKS 1
 #elif defined(__i386__) && \
     (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC)
diff --git a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
index 50e3558b52e87275987a6ba4522a0ea66e02e382..283529f008524135b7d2990456de708817aae384 100644
--- a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
@@ -4516,7 +4516,11 @@ INTERCEPTOR(int, shmctl, int shmid, int cmd, void *buf) {
   }
   return res;
 }
+#ifdef SANITIZER_SW64
+#define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION_VER(shmctl, "GLIBC_2.2");
+#else
 #define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION(shmctl);
+#endif
 #else
 #define INIT_SHMCTL
 #endif
diff --git a/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc b/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc
index 31ff48cfd2cfccef973402f8613568e32afb205f..e83569b991dc3defdbc09f646eaa7185680d006e 100644
--- a/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc
+++ b/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc
@@ -2296,7 +2296,8 @@ POST_SYSCALL(ni_syscall)(long res) {}
 PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
 #if !SANITIZER_ANDROID && \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__))
+     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
+     defined(__sw_64__))
   if (data) {
     if (request == ptrace_setregs) {
       PRE_READ((void *)data, struct_user_regs_struct_sz);
@@ -2317,7 +2318,8 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
 POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
 #if !SANITIZER_ANDROID && \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
-     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__))
+     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
+     defined(__sw_64__))
   if (res >= 0 && data) {
     // Note that this is different from the interceptor in
     // sanitizer_common_interceptors.inc.
diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
index 15ccd738d858bb7afea21b59fc453027ec49f0a1..4ce47654ddfdea4b37ee1601946361bb3b14a4f6 100644
--- a/libsanitizer/sanitizer_common/sanitizer_linux.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
@@ -42,6 +42,16 @@
 #undef stat
 #endif
 
+#if defined(__sw_64__)
+#define stat kernel_stat
+#define stat64 kernel_stat64
+#include <asm/stat.h>
+#undef stat
+#undef stat64
+#include <cstring>
+#include <cstdio>
+#endif
+
 #include <dlfcn.h>
 #include <errno.h>
 #include <fcntl.h>
@@ -250,7 +260,7 @@ static void stat64_to_stat(struct stat64 *in, struct stat *out) {
 }
 #endif
 
-#if defined(__mips64)
+#if defined(__mips64) || defined(__sw_64__)
 // Undefine compatibility macros from <sys/stat.h>
 // so that they would not clash with the kernel_stat
 // st_[a|m|c]time fields
@@ -278,6 +288,12 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
   out->st_size = in->st_size;
   out->st_blksize = in->st_blksize;
   out->st_blocks = in->st_blocks;
+#if defined(__sw_64__)
+  // There's no nsecs in sw_64's struct stat
+  out->st_atim.tv_sec = in->st_atime;
+  out->st_mtim.tv_sec = in->st_mtime;
+  out->st_ctim.tv_sec = in->st_ctime;
+#else
 #if defined(__USE_MISC)     || \
     defined(__USE_XOPEN2K8) || \
     defined(SANITIZER_ANDROID)
@@ -295,6 +311,7 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
   out->st_ctime = in->st_ctime;
   out->st_atimensec = in->st_ctime_nsec;
 #endif
+#endif
 }
 #endif
 
@@ -305,8 +322,8 @@ uptr internal_stat(const char *path, void *buf) {
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           0);
 #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
-# if defined(__mips64)
-  // For mips64, stat syscall fills buffer in the format of kernel_stat
+# if defined(__mips64)  || defined(__sw_64__)
+  // For mips64 and sw_64, stat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(stat), path, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
@@ -330,8 +347,8 @@ uptr internal_lstat(const char *path, void *buf) {
   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
                           AT_SYMLINK_NOFOLLOW);
 #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
-# if SANITIZER_MIPS64
-  // For mips64, lstat syscall fills buffer in the format of kernel_stat
+# if SANITIZER_MIPS64 || SANITIZER_SW64
+  // For mips64 and sw_64, lstat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(lstat), path, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
@@ -350,8 +367,8 @@ uptr internal_lstat(const char *path, void *buf) {
 uptr internal_fstat(fd_t fd, void *buf) {
 #if SANITIZER_FREEBSD || SANITIZER_OPENBSD || \
     SANITIZER_LINUX_USES_64BIT_SYSCALLS
-#if SANITIZER_MIPS64 && !SANITIZER_OPENBSD
-  // For mips64, fstat syscall fills buffer in the format of kernel_stat
+#if (SANITIZER_MIPS64 || SANITIZER_SW64) && !SANITIZER_OPENBSD
+  // For mips64 and sw_64, fstat syscall fills buffer in the format of kernel_stat
   struct kernel_stat kbuf;
   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
@@ -727,6 +744,19 @@ uptr internal_waitpid(int pid, int *status, int options) {
                           0 /* rusage */);
 }
 
+#ifdef __sw_64__
+uptr internal_getpid() {
+  return internal_syscall(SYSCALL(getxpid));
+}
+
+uptr internal_getppid() {
+  uptr ppid;
+  internal_syscall(SYSCALL(getxpid));
+  asm("mov $20, %0\n"
+     :"=r"(ppid));
+  return ppid;
+}
+#else
 uptr internal_getpid() {
   return internal_syscall(SYSCALL(getpid));
 }
@@ -734,6 +764,7 @@ uptr internal_getpid() {
 uptr internal_getppid() {
   return internal_syscall(SYSCALL(getppid));
 }
+#endif
 
 uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
 #if SANITIZER_FREEBSD
@@ -760,7 +791,7 @@ uptr internal_sigaltstack(const void *ss, void *oss) {
 }
 
 int internal_fork() {
-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
+#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS || SANITIZER_SW64
   return internal_syscall(SYSCALL(clone), SIGCHLD, 0);
 #else
   return internal_syscall(SYSCALL(fork));
@@ -816,7 +847,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
     // rt_sigaction, so we need to do the same (we'll need to reimplement the
     // restorers; for x86_64 the restorer address can be obtained from
     // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact).
-#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+#if (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64
     k_act.sa_restorer = u_act->sa_restorer;
 #endif
   }
@@ -832,7 +863,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
     internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask,
                     sizeof(__sanitizer_kernel_sigset_t));
     u_oldact->sa_flags = k_oldact.sa_flags;
-#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
+#if (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64
     u_oldact->sa_restorer = k_oldact.sa_restorer;
 #endif
   }
@@ -1035,6 +1066,11 @@ uptr GetMaxVirtualAddress() {
   return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
 # elif defined(__mips64)
   return (1ULL << 40) - 1;  // 0x000000ffffffffffUL;
+# elif defined(__sw_64__)
+// SW64 has a 42-bit user address space(4TiB)
+// according to TASK_SIZE in kernel.
+// In sw6b PGTABLE is SW_4LEVEL.
+  return (1ULL << 52) - 1;  // 0x000fffffffffffffUL;
 # elif defined(__s390x__)
   return (1ULL << 53) - 1;  // 0x001fffffffffffffUL;
 #elif defined(__sparc__)
@@ -1326,6 +1362,72 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                        : "memory", "$29" );
   return res;
 }
+#elif defined(__sw_64__)
+uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+		    int *parent_tidptr, void *newtls, int *child_tidptr) {
+  long long res;
+  if (!fn || !child_stack)
+    return -EINVAL;
+  child_stack = (char *)child_stack - 4 * sizeof(unsigned long long);
+  ((unsigned long long *)child_stack)[0] = (uptr)fn;
+  ((unsigned long long *)child_stack)[1] = (uptr)arg;
+  ((unsigned long long *)child_stack)[2] = (uptr)flags;
+
+  register void *r20 __asm__("$20") = newtls;
+  register int *r22 __asm__("$22") = child_tidptr;
+
+  __asm__ __volatile__(
+										/* $v0 = syscall($v0 = __NR_clone,
+			* $a0 = flags,
+			* $a1 = child_stack,
+			* $a2 = parent_tidptr,
+			* $a3 = child_tidptr,
+			* $a4 = new_tls)
+			*/
+		       "mov %[flag],$16\n"
+		       "mov %[usp],$17\n"
+		       "mov %[ptid],$18\n"
+		       "ldl $19,0($sp)\n"
+		       "mov %5,$20\n"
+		       /* Store the fifth argument on stack
+			* if we are using 32-bit abi.
+			*/
+		       "ldi $0,%[NR_clone];\n"
+		       "sys_call 0x83;\n"
+
+		       /* if ($v0 != 0)
+			* return;
+			*/
+		       "bne $0,1f;\n"
+		       "mov $31,$15;\n"
+		       /* Call "fn(arg)". */
+		       "ldl $27,0($sp);\n"
+		       "ldl $16,8($sp);\n"
+		       "ldi $sp,32($sp);\n"
+
+		       "call $26,($27),0;\n"
+		       "ldgp  $29, 0($26);\n"
+
+		       /* Call _exit($v0). */
+		       "mov $0,$16;\n"
+		       "ldi $0,%[NR_exit];\n"
+		       "sys_call 0x83;\n"
+
+		       /* Return to parent. */
+		     "1:\n"
+		       : "=r" (res)
+		       : [flag]"r"(flags),
+			 [usp]"r"(child_stack),
+			 [ptid]"r"(parent_tidptr),
+			 "r"(r20),
+			 "r"(r22),
+			 [NR_clone]"i"(__NR_clone),
+			 [NR_exit]"i"(__NR_exit)
+		       : "memory", "$30");
+
+  return res;
+}
+
 #elif defined(__aarch64__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr) {
@@ -1879,6 +1981,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.regs[29];
   *sp = ucontext->uc_mcontext.sp;
+#elif defined(__sw_64__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.sc_pc;
+  *bp = ucontext->uc_mcontext.sc_regs[15];
+  *sp = ucontext->uc_mcontext.sc_regs[30];
 #elif defined(__hppa__)
   ucontext_t *ucontext = (ucontext_t*)context;
   *pc = ucontext->uc_mcontext.sc_iaoq[0];
@@ -1966,6 +2073,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
   *pc = ucontext->uc_mcontext.pc;
   *bp = ucontext->uc_mcontext.gregs[30];
   *sp = ucontext->uc_mcontext.gregs[29];
+#elif defined(__sw_64__)
+  ucontext_t *ucontext = (ucontext_t*)context;
+  *pc = ucontext->uc_mcontext.sc_pc;
+  *bp = ucontext->uc_mcontext.sc_regs[15];
+  *sp = ucontext->uc_mcontext.sc_regs[30];
 #elif defined(__s390__)
   ucontext_t *ucontext = (ucontext_t*)context;
 # if defined(__s390x__)
diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.h b/libsanitizer/sanitizer_common/sanitizer_linux.h
index c28347ad963a7e1482e3aa06bd433670f76cd7d4..05976a700c34e4bcfc47609ac90fa5e17ae460e1 100644
--- a/libsanitizer/sanitizer_common/sanitizer_linux.h
+++ b/libsanitizer/sanitizer_common/sanitizer_linux.h
@@ -61,7 +61,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact);
 void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
 #if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) \
   || defined(__powerpc64__) || defined(__s390__) || defined(__i386__) \
-  || defined(__arm__)
+  || defined(__arm__) || defined(__sw_64__)
 uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
                     int *parent_tidptr, void *newtls, int *child_tidptr);
 #endif
diff --git a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
index e09d568d8024863f7d97861cb2d9e246740083a5..18e7555ba8d8bf84bf4a9a2c796a8713c34f695e 100644
--- a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -262,7 +262,7 @@ void InitTlsSize() { }
 
 #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) ||          \
      defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) ||    \
-     defined(__arm__)) &&                                                      \
+     defined(__arm__) || defined(__sw_64__)) &&				\
     SANITIZER_LINUX && !SANITIZER_ANDROID
 // sizeof(struct pthread) from glibc.
 static atomic_uintptr_t thread_descriptor_size;
@@ -309,6 +309,8 @@ uptr ThreadDescriptorSize() {
   val = 1776; // from glibc.ppc64le 2.20-8.fc21
 #elif defined(__s390__)
   val = FIRST_32_SECOND_64(1152, 1776); // valid for glibc 2.22
+#elif defined(__sw_64__)
+  val = 1776;
 #endif
   if (val)
     atomic_store_relaxed(&thread_descriptor_size, val);
@@ -356,7 +358,7 @@ uptr ThreadSelf() {
                 rdhwr %0,$29;\
                 .set pop" : "=r" (thread_pointer));
   descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
-# elif defined(__aarch64__) || defined(__arm__)
+# elif defined(__aarch64__) || defined(__arm__) || defined(__sw_64__)
   descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
                                       ThreadDescriptorSize();
 # elif defined(__s390__)
@@ -435,7 +437,7 @@ static void GetTls(uptr *addr, uptr *size) {
   *addr -= *size;
   *addr += ThreadDescriptorSize();
 # elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) \
-    || defined(__arm__)
+    || defined(__arm__) || defined(__sw_64__)
   *addr = ThreadSelf();
   *size = GetTlsSize();
 # else
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform.h b/libsanitizer/sanitizer_common/sanitizer_platform.h
index c68bfa25875585818721029b099bd4da4cddcb78..dea617abd40064616b475d827539b9f8d2578e41 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform.h
+++ b/libsanitizer/sanitizer_common/sanitizer_platform.h
@@ -147,6 +147,12 @@
 # define SANITIZER_MIPS64 0
 #endif
 
+#if defined(__sw_64__)
+# define SANITIZER_SW64 1
+#else
+# define SANITIZER_SW64 0
+#endif
+
 #if defined(__s390__)
 # define SANITIZER_S390 1
 # if defined(__s390x__)
@@ -242,6 +248,8 @@
 # endif
 #elif defined(__sparc__)
 #define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 52)
+#elif defined(__sw_64__)
+# define SANITIZER_MMAP_RANGE_SIZE 1ULL << 52
 #else
 # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47)
 #endif
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
index 61a6b82ef8184b98833ca482b6040cc6a9d3f198..820d458be0f6cd867744adfbcc658adb6ee11eb0 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
@@ -225,7 +225,11 @@
 #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
 #define SANITIZER_INTERCEPT_TIME SI_POSIX
 #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID || SI_SOLARIS
+#if SANITIZER_SW64
+#define SANITIZER_INTERCEPT_GLOB64 0
+#else
 #define SANITIZER_INTERCEPT_GLOB64 SI_LINUX_NOT_ANDROID
+#endif
 #define SANITIZER_INTERCEPT_WAIT SI_POSIX
 #define SANITIZER_INTERCEPT_INET SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM (SI_POSIX && !SI_OPENBSD)
@@ -261,7 +265,7 @@
 #if SI_LINUX_NOT_ANDROID && \
   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
     defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
-    defined(__s390__))
+    defined(__s390__) || defined(__sw_64__))
 #define SANITIZER_INTERCEPT_PTRACE 1
 #else
 #define SANITIZER_INTERCEPT_PTRACE 0
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
index f22f50391286b484a17c19b2b7efe17995fd858b..7a3e3ab60e97da46c6724dced8dc777a3619267c 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
@@ -68,7 +68,7 @@ namespace __sanitizer {
 
 #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
                             && !defined(__mips__) && !defined(__s390__)\
-                            && !defined(__sparc__) && !defined(__riscv)
+			    && !defined(__sparc__) && !defined(__riscv) && !defined(__sw_64__)
 COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
 #endif
 
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
index 8b4162bcd6a9131286a631b02db7384841ad99de..5585755f3f26ffbf3fb573907be2ade6a148ea4d 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
@@ -92,7 +92,7 @@
 #if SANITIZER_LINUX
 # include <utime.h>
 # include <sys/ptrace.h>
-# if defined(__mips64) || defined(__aarch64__) || defined(__arm__)
+# if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || defined(__sw_64__) // for pt_regs
 #  include <asm/ptrace.h>
 #  ifdef __arm__
 typedef struct user_fpregs elf_fpregset_t;
@@ -128,7 +128,7 @@ typedef struct user_fpregs elf_fpregset_t;
 #include <sys/shm.h>
 #include <sys/statvfs.h>
 #include <sys/timex.h>
-#if defined(__mips64)
+#if defined(__mips64) || defined(__sw_64__) // for elf_gregset_t
 # include <sys/procfs.h>
 #endif
 #include <sys/user.h>
@@ -232,7 +232,7 @@ namespace __sanitizer {
   // has been removed from glibc 2.28.
 #if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
   || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
-  || defined(__x86_64__) || (defined(__riscv) && __riscv_xlen == 64)
+  || defined(__x86_64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__sw_64__)
 #define SIZEOF_STRUCT_USTAT 32
 #elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
   || defined(__powerpc__) || defined(__s390__) || defined(__sparc__)
@@ -307,11 +307,11 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
 #if SANITIZER_LINUX && !SANITIZER_ANDROID && \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
       defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
-      defined(__s390__))
+      defined(__s390__) || defined(__sw_64__))
 #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__)
   unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs);
   unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t);
-#elif defined(__aarch64__)
+#elif defined(__aarch64__) || defined(__sw_64__)
   unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs);
   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state);
 #elif defined(__s390__)
@@ -322,12 +322,12 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct);
 #endif // __mips64 || __powerpc64__ || __aarch64__
 #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \
-    defined(__aarch64__) || defined(__arm__) || defined(__s390__)
+    defined(__aarch64__) || defined(__arm__) || defined(__s390__) || defined(__sw_64__)
   unsigned struct_user_fpxregs_struct_sz = 0;
 #else
   unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct);
 #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__
-// || __s390__
+// || __s390__ || __sw_64__
 #ifdef __arm__
   unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE;
 #else
@@ -1059,7 +1059,7 @@ CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask);
 // didn't exist.
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_flags);
 #endif
-#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32)
+#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64
 CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_restorer);
 #endif
 
diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
index d82fd5e400587a46c23ba66d27d9568582c37952..9c572f4d32aecf650fe5097c868a4486a204ceee 100644
--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
+++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
@@ -73,6 +73,9 @@ const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__aarch64__)
 const unsigned struct_kernel_stat_sz = 128;
 const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__sw_64__)
+const unsigned struct_kernel_stat_sz = 80;
+const unsigned struct_kernel_stat64_sz = 136;
 #elif defined(__powerpc__) && !defined(__powerpc64__)
 const unsigned struct_kernel_stat_sz = 72;
 const unsigned struct_kernel_stat64_sz = 104;
@@ -101,6 +104,9 @@ const unsigned struct_kernel_stat64_sz = 104;
 #elif defined(__riscv) && __riscv_xlen == 64
 const unsigned struct_kernel_stat_sz = 128;
 const unsigned struct_kernel_stat64_sz = 104;
+#elif defined(__sw_64__)
+const unsigned struct_kernel_stat_sz = 80;
+const unsigned struct_kernel_stat64_sz = 136;
 #endif
 struct __sanitizer_perf_event_attr {
   unsigned type;
@@ -259,15 +265,15 @@ struct __sanitizer_shmid_ds {
   u64 shm_ctime;
 #else
   uptr shm_atime;
-#if !defined(_LP64) && !defined(__mips__)
+#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__)
   uptr __unused1;
 #endif
   uptr shm_dtime;
-#if !defined(_LP64) && !defined(__mips__)
+#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__)
   uptr __unused2;
 #endif
   uptr shm_ctime;
-#if !defined(_LP64) && !defined(__mips__)
+#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__)
   uptr __unused3;
 #endif
 #endif
@@ -509,7 +515,7 @@ typedef int __sanitizer_clockid_t;
 
 #if SANITIZER_LINUX
 #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \
-    defined(__mips__)
+    defined(__mips__) && !defined(__sw_64__)
 typedef unsigned __sanitizer___kernel_uid_t;
 typedef unsigned __sanitizer___kernel_gid_t;
 #else
@@ -522,7 +528,7 @@ typedef long long __sanitizer___kernel_off_t;
 typedef long __sanitizer___kernel_off_t;
 #endif
 
-#if defined(__powerpc__) || defined(__mips__)
+#if defined(__powerpc__) || defined(__mips__) && !defined(__sw_64__)
 typedef unsigned int __sanitizer___kernel_old_uid_t;
 typedef unsigned int __sanitizer___kernel_old_gid_t;
 #else
@@ -634,7 +640,7 @@ struct __sanitizer_sigaction {
 #endif
 #endif
 #endif
-#if SANITIZER_LINUX
+#if SANITIZER_LINUX && !defined(__sw_64__)
   void (*sa_restorer)();
 #endif
 #if defined(__mips__) && (SANITIZER_WORDSIZE == 32)
@@ -797,7 +803,7 @@ typedef void __sanitizer_FILE;
 #if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
     (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
-     defined(__s390__))
+     defined(__s390__) || defined(__sw_64__))
 extern unsigned struct_user_regs_struct_sz;
 extern unsigned struct_user_fpregs_struct_sz;
 extern unsigned struct_user_fpxregs_struct_sz;
@@ -883,7 +889,7 @@ struct __sanitizer_cookie_io_functions_t {
 #define IOC_NRBITS 8
 #define IOC_TYPEBITS 8
 #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \
-    defined(__sparc__)
+    defined(__sparc__) || defined(__sw_64__)
 #define IOC_SIZEBITS 13
 #define IOC_DIRBITS 3
 #define IOC_NONE 1U
diff --git a/libsanitizer/sanitizer_common/sanitizer_stacktrace.h b/libsanitizer/sanitizer_common/sanitizer_stacktrace.h
index f1f29e9f32ee811ac771ab6d56e9e1a565c7d0bd..67ba06cf71761f9617bee4b5c333039be40be5d9 100644
--- a/libsanitizer/sanitizer_common/sanitizer_stacktrace.h
+++ b/libsanitizer/sanitizer_common/sanitizer_stacktrace.h
@@ -22,6 +22,8 @@ static const u32 kStackTraceMax = 256;
 
 #if SANITIZER_LINUX && defined(__mips__)
 # define SANITIZER_CAN_FAST_UNWIND 0
+#elif defined(__sw_64__)
+# define SANITIZER_CAN_FAST_UNWIND 0
 #elif SANITIZER_WINDOWS
 # define SANITIZER_CAN_FAST_UNWIND 0
 #elif SANITIZER_OPENBSD
diff --git a/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
index 651d5056dd9d5feb4a7ca2f8cd1fe52d2080fd77..0cdfa8fad93561fd080b6baf244d72d3fe400033 100644
--- a/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
@@ -16,7 +16,7 @@
 #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \
                         defined(__aarch64__) || defined(__powerpc64__) || \
                         defined(__s390__) || defined(__i386__) || \
-                        defined(__arm__))
+			defined(__arm__) || defined(__sw_64__))
 
 #include "sanitizer_stoptheworld.h"
 
@@ -498,6 +498,11 @@ typedef struct user regs_struct;
 #  define REG_SP regs[EF_REG29]
 # endif
 
+#elif defined(__sw_64__)
+typedef struct user regs_struct;
+#define REG_SP regs[EF_SP]
+#define ARCH_IOVEC_FOR_GETREGSET
+
 #elif defined(__aarch64__)
 typedef struct user_pt_regs regs_struct;
 #define REG_SP sp
diff --git a/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
index 3b19a6836ec53da3d2c4f71123e1e81cc6ffdecf..a5c7252cb31bb2dd7a1f3fa8d2399d4e21b0395c 100644
--- a/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+++ b/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
@@ -270,6 +270,8 @@ class LLVMSymbolizerProcess : public SymbolizerProcess {
     const char* const kSymbolizerArch = "--default-arch=s390x";
 #elif defined(__s390__)
     const char* const kSymbolizerArch = "--default-arch=s390";
+#elif defined(__sw_64__)
+    const char* const kSymbolizerArch = "--default-arch=sw_64";
 #else
     const char* const kSymbolizerArch = "--default-arch=unknown";
 #endif
diff --git a/libsanitizer/tsan/Makefile.am b/libsanitizer/tsan/Makefile.am
index 5d37abd20de5060a92800412c2871a0d01d1402e..32b87fc6f5c0d6aa92c31216afe50abde64eff1d 100644
--- a/libsanitizer/tsan/Makefile.am
+++ b/libsanitizer/tsan/Makefile.am
@@ -49,7 +49,7 @@ tsan_files = \
 	tsan_sync.cpp 
 
 libtsan_la_SOURCES = $(tsan_files)
-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S
+EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_sw64.S
 libtsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la $(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
 libtsan_la_DEPENDENCIES = $(top_builddir)/sanitizer_common/libsanitizer_common.la $(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
 if LIBBACKTRACE_SUPPORTED
diff --git a/libsanitizer/tsan/Makefile.in b/libsanitizer/tsan/Makefile.in
index 74896427edfe8349e86a8078abd0b0599c659e7b..6448de25553c9101d110a3a90c4fb884878121a7 100644
--- a/libsanitizer/tsan/Makefile.in
+++ b/libsanitizer/tsan/Makefile.in
@@ -451,7 +451,7 @@ tsan_files = \
 	tsan_sync.cpp 
 
 libtsan_la_SOURCES = $(tsan_files)
-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S
+EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_sw64.S
 libtsan_la_LIBADD =  \
 	$(top_builddir)/sanitizer_common/libsanitizer_common.la \
 	$(top_builddir)/interception/libinterception.la \
diff --git a/libsanitizer/tsan/tsan_interceptors_posix.cpp b/libsanitizer/tsan/tsan_interceptors_posix.cpp
index 8aea1e4ec0513bbddc55bd55e307fa456e26d03e..c30ceeaf3e57437d50ba91cb4284f0255192991a 100644
--- a/libsanitizer/tsan/tsan_interceptors_posix.cpp
+++ b/libsanitizer/tsan/tsan_interceptors_posix.cpp
@@ -73,7 +73,7 @@ struct ucontext_t {
 };
 #endif
 
-#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1
+#if defined(__x86_64__) || defined(__mips__) || defined(__sw_64__) || SANITIZER_PPC64V1
 #define PTHREAD_ABI_BASE  "GLIBC_2.3.2"
 #elif defined(__aarch64__) || SANITIZER_PPC64V2
 #define PTHREAD_ABI_BASE  "GLIBC_2.17"
@@ -142,7 +142,7 @@ typedef long long_t;
 # define F_TLOCK 2      /* Test and lock a region for exclusive use.  */
 # define F_TEST  3      /* Test a region for other processes locks.  */
 
-#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
+#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD || SANITIZER_SW64
 const int SA_SIGINFO = 0x40;
 const int SIG_SETMASK = 3;
 #elif defined(__mips__)
@@ -2371,7 +2371,7 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act,
     sigactions[sig].sa_flags = *(volatile int const *)&act->sa_flags;
     internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask,
                     sizeof(sigactions[sig].sa_mask));
-#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
+#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_SW64
     sigactions[sig].sa_restorer = act->sa_restorer;
 #endif
     internal_memcpy(&newact, act, sizeof(newact));
@@ -2674,6 +2674,14 @@ void InitializeInterceptors() {
   TSAN_INTERCEPT(pthread_timedjoin_np);
   #endif
 
+  #if SANITIZER_SW64
+  // sw64 have two version of timer function, osf_xxx with @glibc2.0,
+  // which is 32bits syscall for old kernal. xxx with @glibc2.1 is 64bits
+  // syscall for new kernal, we use the new one.
+  TSAN_INTERCEPT_VER(setitimer, "GLIBC_2.1");
+  TSAN_INTERCEPT_VER(setitimer, "GLIBC_2.1");
+  #endif
+
   TSAN_INTERCEPT_VER(pthread_cond_init, PTHREAD_ABI_BASE);
   TSAN_INTERCEPT_VER(pthread_cond_signal, PTHREAD_ABI_BASE);
   TSAN_INTERCEPT_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE);
diff --git a/libsanitizer/tsan/tsan_platform.h b/libsanitizer/tsan/tsan_platform.h
index 63eb14fcd3402abb8426ecfc30f2a62c4e32c9a7..e4e2e296148a51aca1017dbebc94d7e31fa66255 100644
--- a/libsanitizer/tsan/tsan_platform.h
+++ b/libsanitizer/tsan/tsan_platform.h
@@ -352,6 +352,44 @@ struct Mapping47 {
 
 // Indicates the runtime will define the memory regions at runtime.
 #define TSAN_RUNTIME_VMA 1
+
+#elif defined(__sw_64__)
+
+// TODO(sw64_map): as sw64 kernal doesn't map such large space, we just map
+// it for test, for now it works will.
+// TODO(sw64_map_la): as sw64 map all space in low address, we set all user
+// space
+// in Lo address, perhaps there is some way to change it.
+/*
+C/C++ on linux/sw64 (52-bit VMA)
+0000 0000 0000 - 0001 2000 0000: modules and main thread stack
+0001 2000 0000 - 0008 0000 0000: main binary
+0400 0000 0000 - 0600 0000 0000: pie main binary (including heap)
+0600 0000 0000 - 4000 0000 0000: -
+4000 0000 0000 - 6000 0000 0000: shadow
+6000 0000 0000 - 7000 0000 0000: metainfo
+7000 0000 0000 - 7c00 0000 0000: trace
+*/
+
+struct Mapping {
+  static const uptr kLoAppMemBeg   = 0x0000000000000ull;
+  static const uptr kLoAppMemEnd   = 0x0600000000000ull;
+  static const uptr kShadowBeg     = 0x4000000000000ull;
+  static const uptr kShadowEnd     = 0x6000000000000ull;
+  static const uptr kHiAppMemBeg   = 0xfff0000000000ull;
+  static const uptr kHiAppMemEnd   = 0xfff0000000000ull;
+  static const uptr kAppMemMsk     = 0x0000000000000ull;
+  //distans between lo address to shadow begin
+  static const uptr kAppMemXor     = 0x1000000000000ull;
+  static const uptr kHeapMemBeg    = 0xff00000000000ull;
+  static const uptr kHeapMemEnd    = 0xff00000000000ull;
+  static const uptr kMetaShadowBeg = 0x6000000000000ull;
+  static const uptr kMetaShadowEnd = 0x7000000000000ull;
+  static const uptr kTraceMemBeg   = 0x7000000000000ull;
+  static const uptr kTraceMemEnd   = 0x7c00000000000ull;
+  static const uptr kVdsoBeg       = 0x3c00000000000000ull;
+};
+#define TSAN_RUNTIME_VMA 1
 #endif
 
 #elif SANITIZER_GO && !SANITIZER_WINDOWS && defined(__x86_64__)
diff --git a/libsanitizer/tsan/tsan_platform_linux.cpp b/libsanitizer/tsan/tsan_platform_linux.cpp
index 33fa586ca1b0f174e913945d46d57972d5a75af9..7d3c2eb380037bfbcc0a51ad426b4f2669c6eace 100644
--- a/libsanitizer/tsan/tsan_platform_linux.cpp
+++ b/libsanitizer/tsan/tsan_platform_linux.cpp
@@ -378,6 +378,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
   return mangled_sp ^ xor_key;
 #elif defined(__mips__)
   return mangled_sp;
+#elif defined(__sw_64__)
+  return mangled_sp;
 #else
   #error "Unknown platform"
 #endif
@@ -394,6 +396,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
 #  define LONG_JMP_SP_ENV_SLOT 13
 # elif defined(__mips64)
 #  define LONG_JMP_SP_ENV_SLOT 1
+# elif defined(__sw_64__)
+#  define LONG_JMP_SP_ENV_SLOT 8
 # else
 #  define LONG_JMP_SP_ENV_SLOT 6
 # endif
diff --git a/libsanitizer/tsan/tsan_platform_posix.cpp b/libsanitizer/tsan/tsan_platform_posix.cpp
index 1a0faee0252e20ca730caf9587143e3f34ac2d43..5467951663c131df2eefc4ea04c57de63bfd7279 100644
--- a/libsanitizer/tsan/tsan_platform_posix.cpp
+++ b/libsanitizer/tsan/tsan_platform_posix.cpp
@@ -89,6 +89,9 @@ void InitializeShadowMemory() {
   } else {
     DCHECK(0);
   }
+#elif defined(__sw_64__)
+  uptr kMadviseRangeBeg  = 0x210000000000ull;
+  uptr kMadviseRangeSize = 0x010000000000ull;
 #endif
   NoHugePagesInShadow(MemToShadow(kMadviseRangeBeg),
                       kMadviseRangeSize * kShadowMultiplier);
diff --git a/libsanitizer/tsan/tsan_rtl.h b/libsanitizer/tsan/tsan_rtl.h
index c38fc43a9f848726ae40562b3de465d0a0a1d4bf..35f904f8f72bcc59b6c0276e7ab8ae8be55fa2a8 100644
--- a/libsanitizer/tsan/tsan_rtl.h
+++ b/libsanitizer/tsan/tsan_rtl.h
@@ -54,7 +54,7 @@ namespace __tsan {
 
 #if !SANITIZER_GO
 struct MapUnmapCallback;
-#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
+#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) || defined(__sw_64__)
 
 struct AP32 {
   static const uptr kSpaceBeg = 0;
diff --git a/libsanitizer/tsan/tsan_rtl_sw64.S b/libsanitizer/tsan/tsan_rtl_sw64.S
new file mode 100644
index 0000000000000000000000000000000000000000..f74bfef8d2a349bcdf8557b605723f5d205887ad
--- /dev/null
+++ b/libsanitizer/tsan/tsan_rtl_sw64.S
@@ -0,0 +1,236 @@
+// The content of this file is sw64-only:
+#if defined(__sw_64__)
+
+#include "sanitizer_common/sanitizer_asm.h"
+
+.section .text
+.set noreorder
+
+ASM_HIDDEN(__tsan_setjmp)
+.comm _ZN14__interception11real_setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
+ASM_SYMBOL_INTERCEPTOR(setjmp):
+	ldgp $r29, 0($r27)
+  CFI_STARTPROC
+
+  // Save frame/link register
+	ldi $sp, -32($sp)
+	stl $r26, 0($sp)
+	stl $fp, 8($sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (26, -32)
+  CFI_OFFSET (15, -24)
+
+  // Adjust the SP for previous frame
+  ldi $fp,0($sp)
+  CFI_DEF_CFA_REGISTER (15)
+
+  // Save env parameter
+	stl $r16, 16($sp)
+  CFI_OFFSET (0, -16)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  ldi   $r16, 32($sp)
+
+  // call tsan interceptor
+	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
+	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
+	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
+  call $r26, ($r27), 0
+	ldgp $r29, 0($r26)
+
+  // Restore env parameter
+  ldl     $r16, 16($sp)
+  CFI_RESTORE (0)
+
+  // Restore frame/link register
+	ldl $fp, 8($sp)
+	ldl $r26, 0($sp)
+  CFI_RESTORE (15)
+  CFI_RESTORE (26)
+  CFI_DEF_CFA (31, 0)
+  ldi $sp, 32($sp)
+
+  // tail jump to libc setjmp
+	ldl $r27, _ZN14__interception11real_setjmpE($r29) !literal
+  ldl $r27, 0($r27)
+
+	jmp $r31, ($r27)
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
+
+ASM_HIDDEN(__tsan_setjmp)
+.comm _ZN14__interception12real__setjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(_setjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+ASM_SYMBOL_INTERCEPTOR(_setjmp):
+	ldgp $r29, 0($r27)
+  CFI_STARTPROC
+
+  // Save frame/link register
+	ldi $sp, -32($sp)
+	stl $r26, 0($sp)
+	stl $fp, 8($sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (26, -32)
+  CFI_OFFSET (15, -24)
+
+  // Adjust the SP for previous frame
+  ldi $fp,0($sp)
+  CFI_DEF_CFA_REGISTER (15)
+
+  // Save env parameter
+	stl $r16, 16($sp)
+  CFI_OFFSET (0, -16)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  ldi   $r16, 32($sp)
+
+  // call tsan interceptor
+	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
+	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
+	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
+  call $r26, ($r27), 0
+	ldgp $r29, 0($r26)
+
+  // Restore env parameter
+  ldl     $r16, 16($sp)
+  CFI_RESTORE (0)
+
+  // Restore frame/link register
+	ldl $fp, 8($sp)
+	ldl $r26, 0($sp)
+  CFI_RESTORE (15)
+  CFI_RESTORE (26)
+  CFI_DEF_CFA (31, 0)
+  ldi $sp, 32($sp)
+
+  // tail jump to libc setjmp
+	ldl $r27, _ZN14__interception12real__setjmpE($r29) !literal
+  ldl $r27, 0($r27)
+
+	jmp $r31, ($r27)
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
+
+ASM_HIDDEN(__tsan_setjmp)
+.comm _ZN14__interception14real_sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
+	ldgp $r29, 0($r27)
+  CFI_STARTPROC
+
+  // Save frame/link register
+	ldi $sp, -32($sp)
+	stl $r26, 0($sp)
+	stl $fp, 8($sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (26, -32)
+  CFI_OFFSET (15, -24)
+
+  // Adjust the SP for previous frame
+  ldi $fp,0($sp)
+  CFI_DEF_CFA_REGISTER (15)
+
+  // Save env parameter
+	stl $r16, 16($sp)
+	stl $r17, 24($sp)
+  CFI_OFFSET (16, -16)
+  CFI_OFFSET (17, -8)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  ldi   $r16, 32($sp)
+
+  // call tsan interceptor
+	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
+	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
+	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
+  call $r26, ($r27), 0
+	ldgp $r29, 0($r26)
+
+  // Restore env parameter
+  ldl     $r16, 16($sp)
+  ldl     $r17, 24($sp)
+  CFI_RESTORE (0)
+  CFI_RESTORE (1)
+
+  // Restore frame/link register
+	ldl $fp, 8($sp)
+	ldl $r26, 0($sp)
+  CFI_RESTORE (15)
+  CFI_RESTORE (26)
+  CFI_DEF_CFA (31, 0)
+  ldi $sp, 32($sp)
+
+  // tail jump to libc setjmp
+	ldl $r27, _ZN14__interception14real_sigsetjmpE($r29) !literal
+  ldl $r27, 0($r27)
+
+	jmp $r31, ($r27)
+
+  CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
+
+ASM_HIDDEN(__tsan_setjmp)
+.comm _ZN14__interception16real___sigsetjmpE,8,8
+.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)
+ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
+	ldgp $r29, 0($r27)
+  CFI_STARTPROC
+
+  // Save frame/link register
+	ldi $sp, -32($sp)
+	stl $r26, 0($sp)
+	stl $fp, 8($sp)
+  CFI_DEF_CFA_OFFSET (32)
+  CFI_OFFSET (26, -32)
+  CFI_OFFSET (15, -24)
+
+  // Adjust the SP for previous frame
+  ldi $fp,0($sp)
+  CFI_DEF_CFA_REGISTER (15)
+
+  // Save env parameter
+	stl $r16, 16($sp)
+	stl $r17, 24($sp)
+  CFI_OFFSET (16, -16)
+  CFI_OFFSET (17, -8)
+
+  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
+  ldi   $r16, 32($sp)
+
+  // call tsan interceptor
+	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
+	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
+	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
+  call $r26, ($r27), 0
+	ldgp $r29, 0($r26)
+
+  // Restore env parameter
+  ldl     $r16, 16($sp)
+  ldl     $r17, 24($sp)
+  CFI_RESTORE (0)
+  CFI_RESTORE (1)
+
+  // Restore frame/link register
+	ldl $fp, 8($sp)
+	ldl $r26, 0($sp)
+  CFI_RESTORE (15)
+  CFI_RESTORE (26)
+  CFI_DEF_CFA (31, 0)
+  ldi $sp, 32($sp)
+
+  // tail jump to libc setjmp
+	ldl $r27, _ZN14__interception16real___sigsetjmpE($r29) !literal
+  ldl $r27, 0($r27)
+	jmp $r31, ($r27)
+
+ CFI_ENDPROC
+ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
+
+#endif
diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
index b6557a43465baed5e03677e869840cac2be233fc..302cce07ee729f301d1c33af91a62b45e9ae2a3f 100644
--- a/libstdc++-v3/acinclude.m4
+++ b/libstdc++-v3/acinclude.m4
@@ -4787,7 +4787,7 @@ AC_DEFUN([GLIBCXX_CHECK_EXCEPTION_PTR_SYMVER], [
     AC_MSG_CHECKING([for first version to support std::exception_ptr])
     case ${target} in
       aarch64-*-* | alpha-*-* | hppa*-*-* | i?86-*-* | x86_64-*-* | \
-      m68k-*-* | powerpc*-*-* | s390*-*-* | *-*-solaris* )
+      m68k-*-* | powerpc*-*-* | s390*-*-* | sw_64-*-* | *-*-solaris* )
         ac_exception_ptr_since_gcc46=yes
         ;;
       *)
diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
index 766a0a8d504182f090e2932a86da5ac5b4b24a97..f5e60c33968f1596c97bfff68fe47d632421379c 100644
--- a/libstdc++-v3/configure
+++ b/libstdc++-v3/configure
@@ -74627,7 +74627,8 @@ case "$target" in
   powerpc*-*-linux* | \
   sparc*-*-linux* | \
   s390*-*-linux* | \
-  alpha*-*-linux*)
+  alpha*-*-linux* | \
+  sw_64*-*-linux*)
   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
 /* end confdefs.h.  */
 
@@ -76289,7 +76290,7 @@ _ACEOF
 $as_echo_n "checking for first version to support std::exception_ptr... " >&6; }
     case ${target} in
       aarch64-*-* | alpha-*-* | hppa*-*-* | i?86-*-* | x86_64-*-* | \
-      m68k-*-* | powerpc*-*-* | s390*-*-* | *-*-solaris* )
+      m68k-*-* | powerpc*-*-* | s390*-*-* | sw_64-*-* | *-*-solaris* )
         ac_exception_ptr_since_gcc46=yes
         ;;
       *)
diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
index 07cf05b6856a06a101dddfef2928ce7c5f75a579..0ea7f299127729fb32903ababfeb7202bc0b3fad 100644
--- a/libstdc++-v3/configure.ac
+++ b/libstdc++-v3/configure.ac
@@ -403,7 +403,8 @@ case "$target" in
   powerpc*-*-linux* | \
   sparc*-*-linux* | \
   s390*-*-linux* | \
-  alpha*-*-linux*)
+  alpha*-*-linux* | \
+  sw_64*-*-linux*)
   AC_TRY_COMPILE(, [
 #if !defined __LONG_DOUBLE_128__ || (defined(__sparc__) && defined(__arch64__))
 #error no need for long double compatibility
diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
index 898db37d9a22a872a62fe6c7ae02c1955b0b66f6..52f7cf22599793716b304229340178cce660ec98 100644
--- a/libstdc++-v3/configure.host
+++ b/libstdc++-v3/configure.host
@@ -123,6 +123,9 @@ case "${host_cpu}" in
   sparc* | ultrasparc)
     try_cpu=sparc
     ;;
+  sw_64*)
+    try_cpu=sw_64
+    ;;
   *)
     if test -d ${glibcxx_srcdir}/config/cpu/${host_cpu}; then
       try_cpu=${host_cpu}
diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
index ef120134914df26ef594613b236bee57e185f5df..565f2ad80de772dfc3661e4751403e1467f251fe 100644
--- a/libstdc++-v3/include/bits/hashtable_policy.h
+++ b/libstdc++-v3/include/bits/hashtable_policy.h
@@ -460,7 +460,7 @@ namespace __detail
     // Return a bucket count appropriate for n elements
     std::size_t
     _M_bkt_for_elements(std::size_t __n) const
-    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }
+    { return __builtin_ceil(__n / (double)_M_max_load_factor); }
 
     // __n_bkt is current bucket count, __n_elt is current element count,
     // and __n_ins is number of elements to be inserted.  Do we need to
@@ -560,7 +560,7 @@ namespace __detail
 	_M_next_resize = numeric_limits<size_t>::max();
       else
 	_M_next_resize
-	  = __builtin_floorl(__res * (long double)_M_max_load_factor);
+	  = __builtin_floor(__res * (double)_M_max_load_factor);
 
       return __res;
     }
@@ -568,7 +568,7 @@ namespace __detail
     // Return a bucket count appropriate for n elements
     std::size_t
     _M_bkt_for_elements(std::size_t __n) const noexcept
-    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }
+    { return __builtin_ceil(__n / (double)_M_max_load_factor); }
 
     // __n_bkt is current bucket count, __n_elt is current element count,
     // and __n_ins is number of elements to be inserted.  Do we need to
@@ -588,11 +588,11 @@ namespace __detail
 	      / (long double)_M_max_load_factor;
 	  if (__min_bkts >= __n_bkt)
 	    return { true,
-	      _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
+	      _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
 						__n_bkt * _S_growth_factor)) };
 
 	  _M_next_resize
-	    = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
+	    = __builtin_floor(__n_bkt * (double)_M_max_load_factor);
 	  return { false, 0 };
 	}
       else
diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
index de8e2c7cb915bec8f9066bb00cfb60db49e3adca..5584efa7162073a2036f4ef0e69b9d43b8b543b4 100644
--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
@@ -58,7 +58,7 @@ namespace __detail
 	  return 1;
 
 	_M_next_resize =
-	  __builtin_floorl(__fast_bkt[__n] * (long double)_M_max_load_factor);
+	  __builtin_floor(__fast_bkt[__n] * (double)_M_max_load_factor);
 	return __fast_bkt[__n];
       }
 
@@ -81,7 +81,7 @@ namespace __detail
       _M_next_resize = numeric_limits<size_t>::max();
     else
       _M_next_resize =
-	__builtin_floorl(*__next_bkt * (long double)_M_max_load_factor);
+	__builtin_floor(*__next_bkt * (double)_M_max_load_factor);
 
     return *__next_bkt;
   }
@@ -105,16 +105,16 @@ namespace __detail
 	// If _M_next_resize is 0 it means that we have nothing allocated so
 	// far and that we start inserting elements. In this case we start
 	// with an initial bucket size of 11.
-	long double __min_bkts
+	double __min_bkts
 	  = std::max<std::size_t>(__n_elt + __n_ins, _M_next_resize ? 0 : 11)
-	  / (long double)_M_max_load_factor;
+	  / (double)_M_max_load_factor;
 	if (__min_bkts >= __n_bkt)
 	  return { true,
-	    _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
+	    _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
 					      __n_bkt * _S_growth_factor)) };
 
 	_M_next_resize
-	  = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
+	  = __builtin_floor(__n_bkt * (double)_M_max_load_factor);
 	return { false, 0 };
       }
     else