From 3f2a57229d812c5f8667e70ef4f78e9fc79ab903 Mon Sep 17 00:00:00 2001
From: swcompiler <lc@wxiat.com>
Date: Tue, 21 Jan 2025 17:33:02 +0800
Subject: [PATCH] Sw64: Add sw64 architecture support.

---
 0001-Sw64-Port-add-gcc-compiler.patch         | 24048 ++++++++++++++++
 0002-Sw64-Port-gcc-testsuite.patch            |   640 +
 0003-Sw64-Port-libatomic.patch                |    29 +
 0004-Sw64-Port-libffi.patch                   |  1055 +
 0005-Sw64-Port-libgcc.patch                   |   498 +
 0006-Sw64-Port-libgfortran.patch              |    55 +
 0007-Sw64-Port-libgo.patch                    |   565 +
 0008-Sw64-Port-libgomp.patch                  |   166 +
 0009-Sw64-Port-libitm.patch                   |   260 +
 0010-Sw64-Port-libstdc.patch                  |   169 +
 ...ise-FPE-when-DivbyZero-on-Sw_64-plat.patch |    26 +
 ...rt-add-lex-builtin-support-in-libcpp.patch |    34 +
 0013-Sw64-Port-libsanitizer.patch             |  1183 +
 gcc.spec                                      |    77 +-
 14 files changed, 28797 insertions(+), 8 deletions(-)
 create mode 100644 0001-Sw64-Port-add-gcc-compiler.patch
 create mode 100644 0002-Sw64-Port-gcc-testsuite.patch
 create mode 100644 0003-Sw64-Port-libatomic.patch
 create mode 100644 0004-Sw64-Port-libffi.patch
 create mode 100644 0005-Sw64-Port-libgcc.patch
 create mode 100644 0006-Sw64-Port-libgfortran.patch
 create mode 100644 0007-Sw64-Port-libgo.patch
 create mode 100644 0008-Sw64-Port-libgomp.patch
 create mode 100644 0009-Sw64-Port-libitm.patch
 create mode 100644 0010-Sw64-Port-libstdc.patch
 create mode 100644 0011-Sw64-Port-set-raise-FPE-when-DivbyZero-on-Sw_64-plat.patch
 create mode 100644 0012-Sw64-Port-add-lex-builtin-support-in-libcpp.patch
 create mode 100644 0013-Sw64-Port-libsanitizer.patch

diff --git a/0001-Sw64-Port-add-gcc-compiler.patch b/0001-Sw64-Port-add-gcc-compiler.patch
new file mode 100644
index 0000000..cc8193c
--- /dev/null
+++ b/0001-Sw64-Port-add-gcc-compiler.patch
@@ -0,0 +1,24048 @@
+From 93e71036f6b11ae8ea905e8cb2b279a8d846fdfe Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 13:56:17 +0800
+Subject: [PATCH 01/13] Sw64 Port: add gcc compiler
+
+---
+ Makefile.in                            |     1 +
+ Makefile.tpl                           |     1 +
+ config.guess                           |    12 +
+ config.sub                             |     1 +
+ config/intdiv0.m4                      |     2 +-
+ config/tcl.m4                          |     6 +
+ configure                              |    16 +-
+ configure.ac                           |    19 +-
+ contrib/compare-all-tests              |     3 +-
+ contrib/config-list.mk                 |     1 +
+ gcc/auto-inc-dec.c                     |    13 +-
+ gcc/builtins.c                         |    19 +-
+ gcc/c-family/c-opts.c                  |     7 +-
+ gcc/cfgexpand.c                        |     2 -
+ gcc/common/config/sw_64/sw_64-common.c |   114 +
+ gcc/config.gcc                         |    36 +
+ gcc/config.host                        |     8 +
+ gcc/config.in                          |    25 +-
+ gcc/config/host-linux.c                |     2 +
+ gcc/config/sw_64/constraints.md        |   123 +
+ gcc/config/sw_64/driver-sw_64.c        |   101 +
+ gcc/config/sw_64/elf.h                 |   194 +
+ gcc/config/sw_64/elf.opt               |    29 +
+ gcc/config/sw_64/freebsd.h             |    69 +
+ gcc/config/sw_64/gnu-user.h            |   177 +
+ gcc/config/sw_64/linux-elf.h           |    54 +
+ gcc/config/sw_64/linux.h               |   105 +
+ gcc/config/sw_64/m32.md                |   227 +
+ gcc/config/sw_64/netbsd.h              |    69 +
+ gcc/config/sw_64/openbsd.h             |    74 +
+ gcc/config/sw_64/predicates.md         |   649 ++
+ gcc/config/sw_64/sw6.md                |   181 +
+ gcc/config/sw_64/sw8.md                |   181 +
+ gcc/config/sw_64/sw_64-modes.def       |    27 +
+ gcc/config/sw_64/sw_64-passes.def      |    21 +
+ gcc/config/sw_64/sw_64-protos.h        |   146 +
+ gcc/config/sw_64/sw_64.c               | 10058 +++++++++++++++++++++++
+ gcc/config/sw_64/sw_64.h               |   999 +++
+ gcc/config/sw_64/sw_64.md              |  7865 ++++++++++++++++++
+ gcc/config/sw_64/sw_64.opt             |   318 +
+ gcc/config/sw_64/sync.md               |   499 ++
+ gcc/config/sw_64/t-linux               |     1 +
+ gcc/config/sw_64/t-sw_64               |    19 +
+ gcc/config/sw_64/x-sw_64               |     3 +
+ gcc/configure                          |   145 +-
+ gcc/configure.ac                       |    83 +-
+ gcc/doc/install.texi                   |    10 +
+ gcc/emit-rtl.c                         |    37 +
+ gcc/explow.c                           |     4 +
+ gcc/expr.c                             |     3 +
+ gcc/final.c                            |    15 +
+ gcc/flags.h                            |     5 +-
+ gcc/fortran/interface.c                |     8 +
+ gcc/gcc.c                              |    10 +
+ gcc/gimple-match-head.c                |     5 +
+ gcc/glimits.h                          |     4 +
+ gcc/optabs.c                           |    13 +-
+ gcc/sync-builtins.def                  |     2 +
+ gcc/target-insns.def                   |     1 +
+ gcc/targhooks.c                        |     3 +-
+ gcc/toplev.c                           |    15 +-
+ gcc/toplev.h                           |     1 +
+ gcc/tree-outof-ssa.c                   |     1 +
+ gcc/tree-ssa-loop-prefetch.c           |   121 +-
+ include/longlong.h                     |    54 +
+ 65 files changed, 22958 insertions(+), 59 deletions(-)
+ create mode 100644 gcc/common/config/sw_64/sw_64-common.c
+ create mode 100644 gcc/config/sw_64/constraints.md
+ create mode 100644 gcc/config/sw_64/driver-sw_64.c
+ create mode 100644 gcc/config/sw_64/elf.h
+ create mode 100644 gcc/config/sw_64/elf.opt
+ create mode 100644 gcc/config/sw_64/freebsd.h
+ create mode 100644 gcc/config/sw_64/gnu-user.h
+ create mode 100644 gcc/config/sw_64/linux-elf.h
+ create mode 100644 gcc/config/sw_64/linux.h
+ create mode 100644 gcc/config/sw_64/m32.md
+ create mode 100644 gcc/config/sw_64/netbsd.h
+ create mode 100644 gcc/config/sw_64/openbsd.h
+ create mode 100644 gcc/config/sw_64/predicates.md
+ create mode 100644 gcc/config/sw_64/sw6.md
+ create mode 100644 gcc/config/sw_64/sw8.md
+ create mode 100644 gcc/config/sw_64/sw_64-modes.def
+ create mode 100644 gcc/config/sw_64/sw_64-passes.def
+ create mode 100644 gcc/config/sw_64/sw_64-protos.h
+ create mode 100644 gcc/config/sw_64/sw_64.c
+ create mode 100644 gcc/config/sw_64/sw_64.h
+ create mode 100644 gcc/config/sw_64/sw_64.md
+ create mode 100644 gcc/config/sw_64/sw_64.opt
+ create mode 100644 gcc/config/sw_64/sync.md
+ create mode 100644 gcc/config/sw_64/t-linux
+ create mode 100644 gcc/config/sw_64/t-sw_64
+ create mode 100644 gcc/config/sw_64/x-sw_64
+
+diff --git a/Makefile.in b/Makefile.in
+index cfdca3d18..23b6fe4ea 100644
+--- a/Makefile.in
++++ b/Makefile.in
+@@ -640,6 +640,7 @@ all:
+ @target_makefile_frag@
+ @alphaieee_frag@
+ @ospace_frag@
++@sw_64ieee_frag@
+ @host_makefile_frag@
+ ###
+ 
+diff --git a/Makefile.tpl b/Makefile.tpl
+index efed15117..ebe66c546 100644
+--- a/Makefile.tpl
++++ b/Makefile.tpl
+@@ -563,6 +563,7 @@ all:
+ @target_makefile_frag@
+ @alphaieee_frag@
+ @ospace_frag@
++@sw_64ieee_frag@
+ @host_makefile_frag@
+ ###
+ 
+diff --git a/config.guess b/config.guess
+index 97ad07333..52cad983c 100644
+--- a/config.guess
++++ b/config.guess
+@@ -1083,6 +1083,18 @@ EOF
+     sparc:Linux:*:* | sparc64:Linux:*:*)
+ 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ 	exit ;;
++    sw_64:Linux:*:*)
++	case `sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' < /proc/cpuinfo` in
++	  SW6)   UNAME_MACHINE=sw_64sw6 ;;
++	  SW6A)   UNAME_MACHINE=sw_64sw6a ;;
++	  SW6B)  UNAME_MACHINE=sw_64sw6b ;;
++	  SW8A)  UNAME_MACHINE=sw_64sw8a ;;
++	  SW)  UNAME_MACHINE=sw_64 ;;
++	esac
++	objdump --private-headers /bin/sh | grep -q ld.so.1
++	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
++	echo ${UNAME_MACHINE}-unknown-linux-${LIBC}
++	exit ;;
+     tile*:Linux:*:*)
+ 	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
+ 	exit ;;
+diff --git a/config.sub b/config.sub
+index a318a4686..aa418e7b9 100644
+--- a/config.sub
++++ b/config.sub
+@@ -1237,6 +1237,7 @@ case $cpu-$vendor in
+ 			| sparclite \
+ 			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
+ 			| spu \
++			| sw_64 | sw_64sw6a | sw_64sw6b | sw_64sw8a \
+ 			| tahoe \
+ 			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
+ 			| tron \
+diff --git a/config/intdiv0.m4 b/config/intdiv0.m4
+index 55dddcf1c..53dc632bc 100644
+--- a/config/intdiv0.m4
++++ b/config/intdiv0.m4
+@@ -56,7 +56,7 @@ int main ()
+         [
+           # Guess based on the CPU.
+           case "$host_cpu" in
+-            alpha* | i[34567]86 | m68k | s390*)
++            alpha* | i[34567]86 | m68k | s390* | sw_64* )
+               gt_cv_int_divbyzero_sigfpe="guessing yes";;
+             *)
+               gt_cv_int_divbyzero_sigfpe="guessing no";;
+diff --git a/config/tcl.m4 b/config/tcl.m4
+index 4542a4b23..c58bf5343 100644
+--- a/config/tcl.m4
++++ b/config/tcl.m4
+@@ -1368,6 +1368,9 @@ dnl AC_CHECK_TOOL(AR, ar)
+ 	    if test "`uname -m`" = "alpha" ; then
+ 		CFLAGS="$CFLAGS -mieee"
+ 	    fi
++	    if test "`uname -m`" = "sw_64" ; then
++		CFLAGS="$CFLAGS -mieee"
++	    fi
+ 	    if test $do64bit = yes; then
+ 		AC_CACHE_CHECK([if compiler accepts -m64 flag], tcl_cv_cc_m64, [
+ 		    hold_cflags=$CFLAGS
+@@ -1418,6 +1421,9 @@ dnl AC_CHECK_TOOL(AR, ar)
+ 	    if test "`uname -m`" = "alpha" ; then
+ 		CFLAGS="$CFLAGS -mieee"
+ 	    fi
++	    if test "`uname -m`" = "sw_64" ; then
++		CFLAGS="$CFLAGS -mieee"
++	    fi
+ 	    ;;
+ 	Lynx*)
+ 	    SHLIB_CFLAGS="-fPIC"
+diff --git a/configure b/configure
+index 97d5ca4fc..1bee61dc7 100755
+--- a/configure
++++ b/configure
+@@ -777,6 +777,7 @@ ac_subst_files='serialization_dependencies
+ host_makefile_frag
+ target_makefile_frag
+ alphaieee_frag
++sw_64ieee_frag
+ ospace_frag'
+ ac_user_opts='
+ enable_option_checking
+@@ -4010,6 +4011,10 @@ case "${target}" in
+            use_gnu_ld=no
+     fi
+     ;;
++  sw_64*-*-*)
++    # newlib is not 64 bit ready
++    noconfigdirs="$noconfigdirs target-newlib target-libgloss"
++    ;;
+   tic6x-*-*)
+     noconfigdirs="$noconfigdirs sim"
+     ;;
+@@ -7161,6 +7166,15 @@ case $target in
+     ;;
+ esac
+ 
++sw_64ieee_frag=/dev/null
++case $target in
++  sw_64*-*-*)
++    # This just makes sure to use the -mieee option to build target libs.
++    # This should probably be set individually by each library.
++    sw_64ieee_frag="config/mt-sw_64ieee"
++    ;;
++esac
++
+ # If --enable-target-optspace always use -Os instead of -O2 to build
+ # the target libraries, similarly if it is not specified, use -Os
+ # on selected platforms.
+@@ -7856,7 +7870,7 @@ case "${target}" in
+ esac
+ 
+ # Makefile fragments.
+-for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag;
++for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag;
+ do
+   eval fragval=\$$frag
+   if test $fragval != /dev/null; then
+diff --git a/configure.ac b/configure.ac
+index 90ccd5ef8..1e3cd04d5 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1283,6 +1283,10 @@ case "${target}" in
+            use_gnu_ld=no
+     fi
+     ;;
++  sw_64*-*-*)
++    # newlib is not 64 bit ready
++    noconfigdirs="$noconfigdirs target-newlib target-libgloss"
++    ;;
+   tic6x-*-*)
+     noconfigdirs="$noconfigdirs sim"
+     ;;
+@@ -1342,6 +1346,9 @@ case "${host}" in
+   rs6000-*-aix*)
+     host_makefile_frag="config/mh-ppc-aix"
+     ;;
++  sw_64*-linux*)
++    host_makefile_frag="config/mh-sw_64-linux"
++    ;;
+ esac
+ fi
+ 
+@@ -2666,6 +2673,15 @@ case $target in
+     ;;
+ esac
+ 
++sw_64ieee_frag=/dev/null
++case $target in
++  sw_64*-*-*)
++    # This just makes sure to use the -mieee option to build target libs.
++    # This should probably be set individually by each library.
++    sw_64ieee_frag="config/mt-sw_64ieee"
++    ;;
++esac
++
+ # If --enable-target-optspace always use -Os instead of -O2 to build
+ # the target libraries, similarly if it is not specified, use -Os
+ # on selected platforms.
+@@ -3356,7 +3372,7 @@ case "${target}" in
+ esac
+ 
+ # Makefile fragments.
+-for frag in host_makefile_frag target_makefile_frag alphaieee_frag ospace_frag;
++for frag in host_makefile_frag target_makefile_frag alphaieee_frag sw_64ieee_frag ospace_frag;
+ do
+   eval fragval=\$$frag
+   if test $fragval != /dev/null; then
+@@ -3366,6 +3382,7 @@ done
+ AC_SUBST_FILE(host_makefile_frag)
+ AC_SUBST_FILE(target_makefile_frag)
+ AC_SUBST_FILE(alphaieee_frag)
++AC_SUBST_FILE(sw_64ieee_frag)
+ AC_SUBST_FILE(ospace_frag)
+ 
+ # Miscellanea: directories, flags, etc.
+diff --git a/contrib/compare-all-tests b/contrib/compare-all-tests
+index 502cc64f5..02519a1f3 100644
+--- a/contrib/compare-all-tests
++++ b/contrib/compare-all-tests
+@@ -33,8 +33,9 @@ ppc_opts='-m32 -m64'
+ s390_opts='-m31 -m31/-mzarch -m64'
+ sh_opts='-m3 -m3e -m4 -m4a -m4al -m4/-mieee -m1 -m1/-mno-cbranchdi -m2a -m2a/-mieee -m2e -m2e/-mieee'
+ sparc_opts='-mcpu=v8/-m32 -mcpu=v9/-m32 -m64'
++sw_64_opts='-mlong-double-64/-mieee -mlong-double-64 -mlong-double-128/-mieee -mlong-double-128'
+ 
+-all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc v850 vax xstormy16 xtensa' # e500 
++all_targets='alpha arm avr bfin cris fr30 frv h8300 ia64 iq2000 m32c m32r m68k mcore mips mmix mn10300 pa pdp11 ppc sh sparc sw_64 v850 vax xstormy16 xtensa' # e500
+ 
+ test_one_file ()
+ {
+diff --git a/contrib/config-list.mk b/contrib/config-list.mk
+index d154286a4..0a8fbf0e7 100644
+--- a/contrib/config-list.mk
++++ b/contrib/config-list.mk
+@@ -92,6 +92,7 @@ LIST = aarch64-elf aarch64-linux-gnu aarch64-rtems \
+   sparc64-sun-solaris2.11OPT-with-gnu-ldOPT-with-gnu-asOPT-enable-threads=posix \
+   sparc-wrs-vxworks sparc64-elf sparc64-rtems sparc64-linux sparc64-freebsd6 \
+   sparc64-netbsd sparc64-openbsd \
++  sw_64-linux-gnu sw_64-netbsd sw_64-openbsd \
+   tilegx-linux-gnu tilegxbe-linux-gnu tilepro-linux-gnu \
+   v850e-elf v850-elf v850-rtems vax-linux-gnu \
+   vax-netbsdelf vax-openbsd visium-elf x86_64-apple-darwin \
+diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c
+index 7d0d91403..d09a5fc7b 100644
+--- a/gcc/auto-inc-dec.c
++++ b/gcc/auto-inc-dec.c
+@@ -892,6 +892,10 @@ parse_add_or_inc (rtx_insn *insn, bool before_mem)
+ 	  inc_insn.reg1_val = -INTVAL (XEXP (SET_SRC (pat), 1));
+ 	  inc_insn.reg1 = GEN_INT (inc_insn.reg1_val);
+ 	}
++#ifdef FLAG_SW64_INC_DEC
++      if (inc_insn.reg1_val > 2047 || inc_insn.reg1_val < -2048)
++	return false;
++#endif
+       return true;
+     }
+   else if ((HAVE_PRE_MODIFY_REG || HAVE_POST_MODIFY_REG)
+@@ -1369,6 +1373,10 @@ find_mem (rtx *address_of_x)
+ 	  mem_insn.reg1_is_const = true;
+ 	  /* Match with *(reg0 + c) where c is a const. */
+ 	  mem_insn.reg1_val = INTVAL (reg1);
++#ifdef FLAG_SW64_INC_DEC
++	  if (mem_insn.reg1_val > 2047 || mem_insn.reg1_val < -2048)
++	    return false;
++#endif
+ 	  if (find_inc (true))
+ 	    return true;
+ 	}
+@@ -1696,8 +1704,11 @@ public:
+     {
+       if (!AUTO_INC_DEC)
+ 	return false;
+-
++#ifdef FLAG_SW64_INC_DEC
++      return (optimize > 0 && flag_auto_inc_dec && flag_sw_auto_inc_dec);
++#else
+       return (optimize > 0 && flag_auto_inc_dec);
++#endif
+     }
+ 
+ 
+diff --git a/gcc/builtins.c b/gcc/builtins.c
+index ffbb2cae9..69c394c8e 100644
+--- a/gcc/builtins.c
++++ b/gcc/builtins.c
+@@ -1542,6 +1542,7 @@ expand_builtin_prefetch_full (tree exp)
+     emit_insn (op0);
+ }
+ 
++
+ /* Get a MEM rtx for expression EXP which is the address of an operand
+    to be used in a string instruction (cmpstrsi, cpymemsi, ..).  LEN is
+    the maximum length of the block of memory that might be accessed or
+@@ -7460,6 +7461,17 @@ expand_builtin_sync_synchronize (void)
+   expand_mem_thread_fence (MEMMODEL_SYNC_SEQ_CST);
+ }
+ 
++#ifdef FLAG_SW64_WMEMB
++static void
++expand_builtin_sync_synchronize_write (void)
++{
++  if (TARGET_SW8A && targetm.have_memory_barrier ())
++    emit_insn (targetm.gen_write_memory_barrier ());
++  else
++    error ("Current arch don't support write memory barrier !!!");
++}
++#endif
++
+ static rtx
+ expand_builtin_thread_pointer (tree exp, rtx target)
+ {
+@@ -8677,7 +8689,11 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
+     case BUILT_IN_SYNC_SYNCHRONIZE:
+       expand_builtin_sync_synchronize ();
+       return const0_rtx;
+-
++#ifdef FLAG_SW64_WMEMB
++    case BUILT_IN_SYNC_SYNCHRONIZE_WRITE:
++      expand_builtin_sync_synchronize_write ();
++      return const0_rtx;
++#endif
+     case BUILT_IN_ATOMIC_EXCHANGE_1:
+     case BUILT_IN_ATOMIC_EXCHANGE_2:
+     case BUILT_IN_ATOMIC_EXCHANGE_4:
+@@ -11873,6 +11889,7 @@ is_inexpensive_builtin (tree decl)
+       case BUILT_IN_LABS:
+       case BUILT_IN_LLABS:
+       case BUILT_IN_PREFETCH:
++
+       case BUILT_IN_ACC_ON_DEVICE:
+ 	return true;
+ 
+diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
+index dc1a89848..8fdeca178 100644
+--- a/gcc/c-family/c-opts.c
++++ b/gcc/c-family/c-opts.c
+@@ -751,7 +751,12 @@ default_handle_c_option (size_t code ATTRIBUTE_UNUSED,
+ 			 const char *arg ATTRIBUTE_UNUSED,
+ 			 int value ATTRIBUTE_UNUSED)
+ {
+-  return false;
++#if defined FLAG_SW64_SIMD || defined FLAG_SW64_M32
++  if (code == OPT_msimd || code == OPT_m32 || code == OPT_msw_use_32align)
++	  return true;
++  else
++	  return false;
++#endif
+ }
+ 
+ /* Post-switch processing.  */
+diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
+index bf4f194ed..218dd8430 100644
+--- a/gcc/cfgexpand.c
++++ b/gcc/cfgexpand.c
+@@ -2167,7 +2167,6 @@ expand_used_vars (void)
+       if (dump_file)
+ 	dump_stack_var_partition ();
+     }
+-
+   switch (flag_stack_protect)
+     {
+     case SPCT_FLAG_ALL:
+@@ -2200,7 +2199,6 @@ expand_used_vars (void)
+     default:
+       break;
+     }
+-
+   /* Assign rtl to each variable based on these partitions.  */
+   if (stack_vars_num > 0)
+     {
+diff --git a/gcc/common/config/sw_64/sw_64-common.c b/gcc/common/config/sw_64/sw_64-common.c
+new file mode 100644
+index 000000000..eaf1f0d32
+--- /dev/null
++++ b/gcc/common/config/sw_64/sw_64-common.c
+@@ -0,0 +1,114 @@
++/* Common hooks for Sw_64.
++   Copyright (C) 1992-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "diagnostic-core.h"
++#include "tm.h"
++#include "common/common-target.h"
++#include "common/common-target-def.h"
++#include "opts.h"
++#include "flags.h"
++
++int flag_fpcr_set;
++
++/* Implement TARGET_OPTION_OPTIMIZATION_TABLE.  */
++static const struct default_options sw_64_option_optimization_table[] = {
++  /* Enable redundant extension instructions removal at -O2 and higher.  */
++  {OPT_LEVELS_2_PLUS, OPT_free, NULL, 1},
++  {OPT_LEVELS_NONE, 0, NULL, 0}};
++
++/* Implement TARGET_OPTION_INIT_STRUCT.  */
++
++static void
++sw_64_option_init_struct (struct gcc_options *opts ATTRIBUTE_UNUSED)
++{
++  opts->x_target_flags |= MASK_IEEE;
++  global_options.x_flag_prefetch_loop_arrays = 1;
++}
++
++/* Implement TARGET_HANDLE_OPTION.  */
++
++static bool
++sw_64_handle_option (struct gcc_options *opts,
++		     struct gcc_options *opts_set ATTRIBUTE_UNUSED,
++		     const struct cl_decoded_option *decoded, location_t loc)
++{
++  size_t code = decoded->opt_index;
++  const char *arg = decoded->arg;
++  int value = decoded->value;
++
++  switch (code)
++    {
++    case OPT_mfp_regs:
++      if (value == 0)
++	opts->x_target_flags |= MASK_SOFT_FP;
++      break;
++
++    case OPT_mieee:
++    case OPT_mieee_with_inexact:
++      /* add mieee for sw_64.  */
++    case OPT_mieee_main:
++      if (code == OPT_mieee)
++	flag_fpcr_set = 1;
++      else if (code == OPT_mieee_with_inexact)
++	flag_fpcr_set = 3;
++      else if (code == OPT_mieee_main)
++	flag_fpcr_set = 4;
++      opts->x_target_flags |= MASK_IEEE_CONFORMANT;
++      break;
++
++    case OPT_mtls_size_:
++      if (value != 16 && value != 32 && value != 64)
++	error_at (loc, "bad value %qs for %<-mtls-size%> switch", arg);
++      break;
++
++    case OPT_mtls_tlsgd_:
++      if (value != 16 && value != 32)
++	error_at (loc, "bad value %qs for -mtls-tlsgd switch", arg);
++      break;
++
++    case OPT_mtls_tlsldm_:
++      if (value != 16 && value != 32)
++	error_at (loc, "bad value %qs for -mtls-tlsldm switch", arg);
++      break;
++
++    case OPT_mgprel_size_:
++      if (value != 16 && value != 32)
++	error_at (loc, "bad value %qs for -mgprel-size switch", arg);
++      break;
++    }
++
++  return true;
++}
++
++#undef TARGET_DEFAULT_TARGET_FLAGS
++#define TARGET_DEFAULT_TARGET_FLAGS					    \
++  (TARGET_DEFAULT | TARGET_CPU_DEFAULT | TARGET_DEFAULT_EXPLICIT_RELOCS)
++#undef TARGET_HANDLE_OPTION
++#define TARGET_HANDLE_OPTION sw_64_handle_option
++
++#undef TARGET_OPTION_INIT_STRUCT
++#define TARGET_OPTION_INIT_STRUCT sw_64_option_init_struct
++
++#undef TARGET_OPTION_OPTIMIZATION_TABLE
++#define TARGET_OPTION_OPTIMIZATION_TABLE sw_64_option_optimization_table
++
++struct gcc_targetm_common targetm_common = TARGETM_COMMON_INITIALIZER;
+diff --git a/gcc/config.gcc b/gcc/config.gcc
+index 6fcdd771d..43ac24570 100644
+--- a/gcc/config.gcc
++++ b/gcc/config.gcc
+@@ -552,6 +552,10 @@ sh[123456789lbe]*-*-* | sh-*-*)
+ 	extra_options="${extra_options} fused-madd.opt"
+ 	extra_objs="${extra_objs} sh_treg_combine.o sh-mem.o sh_optimize_sett_clrt.o"
+ 	;;
++sw_64*-*-*)
++	cpu_type=sw_64
++	extra_options="${extra_options} g.opt"
++	;;
+ v850*-*-*)
+ 	cpu_type=v850
+ 	;;
+@@ -1168,6 +1172,7 @@ alpha*-dec-*vms*)
+ 	tm_file="${tm_file} vms/vms.h alpha/vms.h"
+ 	tmake_file="${tmake_file} alpha/t-vms alpha/t-alpha"
+ 	;;
++
+ arc*-*-elf*)
+ 	tm_file="arc/arc-arch.h dbxelf.h elfos.h newlib-stdint.h arc/elf.h ${tm_file}"
+ 	tmake_file="arc/t-multilib arc/t-arc"
+@@ -3407,6 +3412,11 @@ sparc64-*-openbsd*)
+ 	with_cpu=ultrasparc
+ 	tmake_file="${tmake_file} sparc/t-sparc"
+ 	;;
++sw_64*-*-linux*)
++	tm_file="elfos.h ${tm_file} sw_64/gnu-user.h sw_64/elf.h sw_64/linux.h sw_64/linux-elf.h glibc-stdint.h"
++	tmake_file="${tmake_file} sw_64/t-linux sw_64/t-sw_64"
++	extra_options="${extra_options} sw_64/elf.opt"
++	;;
+ tic6x-*-elf)
+ 	tm_file="elfos.h ${tm_file} c6x/elf-common.h c6x/elf.h"
+ 	tm_file="${tm_file} dbxelf.h tm-dwarf2.h newlib-stdint.h"
+@@ -3937,6 +3947,15 @@ if test x$with_cpu = x ; then
+ 	  ;;
+       esac
+       ;;
++    sw_64sw6a*-*-*)
++      with_cpu=sw6a
++      ;;
++    sw_64sw6b*-*-*)
++      with_cpu=sw6b
++      ;;
++    sw_64sw8a*-*-*)
++      with_cpu=sw8a
++      ;;
+     visium-*-*)
+       with_cpu=gr5
+       ;;
+@@ -5147,6 +5166,23 @@ case "${target}" in
+ 		esac
+ 		;;
+ 
++	sw_64*-*-*)
++		supported_defaults="cpu tune"
++		for which in cpu tune; do
++			eval "val=\$with_$which"
++			case "$val" in
++			"" \
++			| sw6 | sw6a | sw6b  \
++			| sw8a)
++				;;
++			*)
++				echo "Unknown CPU used in --with-$which=$val" 1>&2
++				exit 1
++				;;
++			esac
++		done
++		;;
++
+ 	tic6x-*-*)
+ 		supported_defaults="arch"
+ 
+diff --git a/gcc/config.host b/gcc/config.host
+index 230ab61ac..793cc7b50 100644
+--- a/gcc/config.host
++++ b/gcc/config.host
+@@ -201,6 +201,14 @@ case ${host} in
+ 	;;
+     esac
+     ;;
++  sw_64*-*-linux*)
++    case ${target} in
++      sw_64*-*-linux*)
++	host_extra_gcc_objs="driver-sw_64.o"
++	host_xmake_file="${host_xmake_file} sw_64/x-sw_64"
++	;;
++    esac
++    ;;
+ esac
+ 
+ # Machine-specific settings.
+diff --git a/gcc/config.in b/gcc/config.in
+index 80b421d99..2ee820307 100644
+--- a/gcc/config.in
++++ b/gcc/config.in
+@@ -199,6 +199,10 @@
+ #undef ENABLE_LD_BUILDID
+ #endif
+ 
++/* Define if gcc should always pass --no-relax to linker for sw_64.  */
++#ifndef USED_FOR_TARGET
++#undef ENABLE_LD_NORELAX
++#endif
+ 
+ /* Define to 1 to enable libquadmath support */
+ #ifndef USED_FOR_TARGET
+@@ -389,11 +393,15 @@
+ #endif
+ 
+ 
+-/* Define if your assembler supports explicit relocations. */
++/* Define if your assembler supports explicit relocations.  */
+ #ifndef USED_FOR_TARGET
+ #undef HAVE_AS_EXPLICIT_RELOCS
+ #endif
+ 
++/* Define if your assembler supports explicit relocations. */
++#ifndef USED_FOR_TARGET
++#undef SW_64_ENABLE_ASAN
++#endif
+ 
+ /* Define if your assembler supports FMAF, HPC, and VIS 3.0 instructions. */
+ #ifndef USED_FOR_TARGET
+@@ -2508,3 +2516,18 @@
+ #undef vfork
+ #endif
+ 
++/* Define only sw64 target.  */
++#undef FLAG_SW64_NOWARN
++#undef FLAG_SW64_NBLEN
++#undef FLAG_SW64_ATOMIC
++#undef FLAG_SW64_90139
++#undef FLAG_SW64_PREFETCH
++#undef FLAG_SW64_PROTECT
++#undef FLAG_SW64_SIMD
++#undef FLAG_SW64_AUTOSIMD
++#undef FLAG_SW64_M32
++#undef FLAG_SW64_INC_DEC
++#undef FLAG_SW64_DELNOP
++#undef FLAG_SW64_FM
++
++
+diff --git a/gcc/config/host-linux.c b/gcc/config/host-linux.c
+index 268725441..20522756b 100644
+--- a/gcc/config/host-linux.c
++++ b/gcc/config/host-linux.c
+@@ -84,6 +84,8 @@
+ # define TRY_EMPTY_VM_SPACE	0x8000000000
+ #elif defined(__sparc__)
+ # define TRY_EMPTY_VM_SPACE	0x60000000
++#elif defined(__sw_64)
++# define TRY_EMPTY_VM_SPACE	0x10000000000
+ #elif defined(__mc68000__)
+ # define TRY_EMPTY_VM_SPACE	0x40000000
+ #elif defined(__aarch64__) && defined(__ILP32__)
+diff --git a/gcc/config/sw_64/constraints.md b/gcc/config/sw_64/constraints.md
+new file mode 100644
+index 000000000..e5d5c7c76
+--- /dev/null
++++ b/gcc/config/sw_64/constraints.md
+@@ -0,0 +1,123 @@
++;; Constraint definitions for Sw_64.
++;; Copyright (C) 2007-2020 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;;; Unused letters:
++;;;    ABCDEF H	     V  YZ
++;;;       de ghijkl   pq  tu wxyz
++
++;; Integer register constraints.
++
++(define_register_constraint "a" "R24_REG"
++ "General register 24, input to division routine")
++
++(define_register_constraint "b" "R25_REG"
++ "General register 24, input to division routine")
++
++(define_register_constraint "c" "R27_REG"
++ "General register 27, function call address")
++
++(define_register_constraint "f" "TARGET_FPREGS ? FLOAT_REGS : NO_REGS"
++ "Any floating-point register")
++
++(define_register_constraint "v" "R0_REG"
++ "General register 0, function value return address")
++
++(define_memory_constraint "w"
++ "A memory whose address is only a register"
++ (match_operand 0 "mem_noofs_operand"))
++
++;; Integer constant constraints.
++(define_constraint "I"
++  "An unsigned 8 bit constant"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (ival, 0, 255)")))
++
++(define_constraint "J"
++  "The constant zero"
++  (and (match_code "const_int")
++       (match_test "ival == 0")))
++
++(define_constraint "K"
++  "Signed 16-bit integer constant"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (ival, -32768, 32767)")))
++
++(define_constraint "L"
++  "A shifted signed 16-bit constant appropriate for LDAH"
++  (and (match_code "const_int")
++       (match_test "(ival & 0xffff) == 0
++		    && (ival >> 31 == -1 || ival >> 31 == 0)")))
++
++(define_constraint "M"
++  "A valid operand of a ZAP insn"
++  (and (match_code "const_int")
++       (match_test "zap_mask (ival) != 0")))
++
++(define_constraint "N"
++  "A complemented unsigned 8-bit constant"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (~ival, 0, 255)")))
++
++(define_constraint "O"
++  "A negated unsigned 8-bit constant"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (-ival, 0, 255)")))
++
++(define_constraint "P"
++  "The constant 1, 2 or 3"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (ival, 1, 3)")))
++
++;; Floating-point constant constraints.
++(define_constraint "G"
++  "The floating point zero constant"
++  (and (match_code "const_double")
++       (match_test "op == CONST0_RTX (mode)")))
++
++;; "Extra" constraints.
++
++;; A memory location that is not a reference
++;; (using an AND) to an unaligned location.
++(define_memory_constraint "Q"
++  "@internal A normal_memory_operand"
++  (and (match_code "mem")
++       (not (match_code "and" "0"))))
++
++(define_constraint "R"
++  "@internal A direct_call_operand"
++  (match_operand:DI 0 "direct_call_operand"))
++
++(define_constraint "S"
++  "An unsigned 6-bit constant"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (ival, 0, 63)")))
++
++(define_constraint "T"
++  "@internal A high-part symbol"
++  (match_code "high"))
++
++(define_constraint "W"
++  "A vector zero constant"
++  (and (match_code "const_vector")
++       (match_test "op == CONST0_RTX (mode)")))
++
++(define_constraint "Y"
++  "An unsigned 5-bit constant"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (ival, 0, 31)")))
+diff --git a/gcc/config/sw_64/driver-sw_64.c b/gcc/config/sw_64/driver-sw_64.c
+new file mode 100644
+index 000000000..84a3692c8
+--- /dev/null
++++ b/gcc/config/sw_64/driver-sw_64.c
+@@ -0,0 +1,101 @@
++/* Subroutines for the gcc driver.
++   Copyright (C) 2009-2020 Free Software Foundation, Inc.
++   Contributed by Arthur Loiret <aloiret@debian.org>
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "tm.h"
++
++/* Chip family type IDs, returned by implver instruction.  */
++#define IMPLVER_SW6_FAMILY 2 /* SW6 */
++#define IMPLVER_SW8_FAMILY 4 /* SW8 */
++
++/* Bit defines for amask instruction.  */
++#define AMASK_BWX 0x1 /* byte/word extension.  */
++#define AMASK_FIX							      \
++  0x2			    /* sqrt and f <-> i conversions		    \
++			       extension.  */
++#define AMASK_CIX 0x4       /* count extension.  */
++#define AMASK_MVI 0x100     /* multimedia extension.  */
++#define AMASK_PRECISE 0x200 /* Precise arithmetic traps.  */
++#define AMASK_LOCKPFTCHOK						      \
++  0x1000 /* Safe to prefetch lock cache					\
++	    block.  */
++#define AMASK_SW6A (1U << 16)
++#define AMASK_SW6B (1U << 17)
++#define AMASK_SW8A (1U << 18)
++/* This will be called by the spec parser in gcc.c when it sees
++   a %:local_cpu_detect(args) construct.  Currently it will be called
++   with either "cpu" or "tune" as argument depending on if -mcpu=native
++   or -mtune=native is to be substituted.
++
++   It returns a string containing new command line parameters to be
++   put at the place of the above two options, depending on what CPU
++   this is executed.  E.g. "-mcpu=sw6" on an Sw_64 for
++   -mcpu=native.  If the routine can't detect a known processor,
++   the -mcpu or -mtune option is discarded.
++
++   ARGC and ARGV are set depending on the actual arguments given
++   in the spec.  */
++const char *
++host_detect_local_cpu (int argc, const char **argv)
++{
++  static const struct cpu_types
++  {
++    long implver;
++    long amask;
++    const char *const cpu;
++  } cpu_types[] = {{IMPLVER_SW6_FAMILY,
++		    AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW6A, "sw6a"},
++		   {IMPLVER_SW6_FAMILY,
++		    AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW6B, "sw6b"},
++		   {IMPLVER_SW8_FAMILY,
++		    AMASK_BWX | AMASK_FIX | AMASK_CIX | AMASK_SW8A, "sw8a"},
++		   {0, 0, NULL}};
++  long implver;
++  long amask;
++  const char *cpu;
++  int i;
++
++  if (argc < 1)
++    return NULL;
++
++  if (strcmp (argv[0], "cpu") && strcmp (argv[0], "tune"))
++    return NULL;
++
++  implver = __builtin_sw_64_implver ();
++  amask = __builtin_sw_64_amask (~0L);
++  cpu = NULL;
++
++  for (i = 0; cpu_types[i].cpu != NULL; i++)
++    if (implver == cpu_types[i].implver
++	&& (~amask & cpu_types[i].amask) == cpu_types[i].amask)
++      {
++	cpu = cpu_types[i].cpu;
++	break;
++      }
++
++  if (cpu == NULL)
++    return NULL;
++
++  return concat ("-m", argv[0], "=", cpu, NULL);
++}
+diff --git a/gcc/config/sw_64/elf.h b/gcc/config/sw_64/elf.h
+new file mode 100644
+index 000000000..559a8172a
+--- /dev/null
++++ b/gcc/config/sw_64/elf.h
+@@ -0,0 +1,194 @@
++/* Definitions of target machine for GNU compiler, for Sw_64 w/ELF.
++   Copyright (C) 1996-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson (rth@tamu.edu).
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#undef CC1_SPEC
++#define CC1_SPEC "%{G*}"
++
++#undef ASM_SPEC
++#define ASM_SPEC							       \
++  "%{G*} %{relax:-relax} %{!gstabs*:-no-mdebug}%{gstabs*:-mdebug} "	    \
++  "%{mcpu=*:-m%*}"
++
++/* Do not output a .file directive at the beginning of the input file.  */
++
++#undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
++#define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
++
++/* This is how to output an assembler line
++   that says to advance the location counter
++   to a multiple of 2**LOG bytes.  */
++
++#define ASM_OUTPUT_ALIGN(FILE, LOG)					    \
++  if ((LOG) != 0)							      \
++    fprintf (FILE, "\t.align %d\n", LOG);
++
++/* This says how to output assembler code to declare an
++   uninitialized internal linkage data object.  Under SVR4,
++   the linker seems to want the alignment of data objects
++   to depend on their types.  We do exactly that here.  */
++
++#undef ASM_OUTPUT_ALIGNED_LOCAL
++#define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		      \
++  do									   \
++    {									  \
++      if ((SIZE) <= (unsigned HOST_WIDE_INT) g_switch_value)		   \
++	switch_to_section (sbss_section);				      \
++      else								     \
++	switch_to_section (bss_section);				       \
++      ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");			\
++      if (!flag_inhibit_size_directive)					\
++	ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);			  \
++      ASM_OUTPUT_ALIGN ((FILE), exact_log2 ((ALIGN) / BITS_PER_UNIT));	 \
++      ASM_OUTPUT_LABEL (FILE, NAME);					   \
++      ASM_OUTPUT_SKIP ((FILE), (SIZE) ? (SIZE) : 1);			   \
++    }									  \
++  while (0)
++
++/* This says how to output assembler code to declare an
++   uninitialized external linkage data object.  */
++
++#undef ASM_OUTPUT_ALIGNED_BSS
++#define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		  \
++  do									   \
++    {									  \
++      ASM_OUTPUT_ALIGNED_LOCAL (FILE, NAME, SIZE, ALIGN);		      \
++    }									  \
++  while (0)
++
++#undef BSS_SECTION_ASM_OP
++#define BSS_SECTION_ASM_OP "\t.section\t.bss"
++#undef SBSS_SECTION_ASM_OP
++#define SBSS_SECTION_ASM_OP "\t.section\t.sbss,\"aw\""
++#undef SDATA_SECTION_ASM_OP
++#define SDATA_SECTION_ASM_OP "\t.section\t.sdata,\"aw\""
++
++/* This is how we tell the assembler that two symbols have the same value.  */
++
++#undef ASM_OUTPUT_DEF
++#define ASM_OUTPUT_DEF(FILE, ALIAS, NAME)				      \
++  do									   \
++    {									  \
++      assemble_name (FILE, ALIAS);					     \
++      fputs (" = ", FILE);						     \
++      assemble_name (FILE, NAME);					      \
++      fputc ('\n', FILE);						      \
++    }									  \
++  while (0)
++
++#undef ASM_OUTPUT_DEF_FROM_DECLS
++#define ASM_OUTPUT_DEF_FROM_DECLS(FILE, DECL, TARGET)			  \
++  do									   \
++    {									  \
++      const char *alias = XSTR (XEXP (DECL_RTL (DECL), 0), 0);		 \
++      const char *name = IDENTIFIER_POINTER (TARGET);			  \
++      if (TREE_CODE (DECL) == FUNCTION_DECL)				   \
++	{								      \
++	  fputc ('$', FILE);						   \
++	  assemble_name (FILE, alias);					 \
++	  fputs ("..ng = $", FILE);					    \
++	  assemble_name (FILE, name);					  \
++	  fputs ("..ng\n", FILE);					      \
++	}								      \
++      ASM_OUTPUT_DEF (FILE, alias, name);				      \
++    }									  \
++  while (0)
++
++/* Provide a STARTFILE_SPEC appropriate for ELF.  Here we add the
++   (even more) magical crtbegin.o file which provides part of the
++   support for getting C++ file-scope static object constructed
++   before entering `main'.  */
++
++#undef STARTFILE_SPEC
++#ifdef HAVE_LD_PIE
++#define STARTFILE_SPEC							 \
++  "%{!shared: %{pg|p:gcrt1.o%s;pie:Scrt1.o%s;:crt1.o%s}}\
++   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
++#else
++#define STARTFILE_SPEC							 \
++  "%{!shared: %{pg|p:gcrt1.o%s;:crt1.o%s}}\
++   crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
++#endif
++
++/* Provide a ENDFILE_SPEC appropriate for ELF.  Here we tack on the
++   magical crtend.o file which provides part of the support for
++   getting C++ file-scope static object constructed before entering
++   `main', followed by a normal ELF "finalizer" file, `crtn.o'.  */
++
++#undef ENDFILE_SPEC
++#define ENDFILE_SPEC							   \
++  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s} \
++   %{shared|pie:crtendS.o%s;:crtend.o%s} crtn.o%s"
++
++/* This variable should be set to 'true' if the target ABI requires
++   unwinding tables even when exceptions are not used.  */
++#define TARGET_UNWIND_TABLES_DEFAULT true
++
++/* Select a format to encode pointers in exception handling data.  CODE
++   is 0 for data, 1 for code labels, 2 for function pointers.  GLOBAL is
++   true if the symbol may be affected by dynamic relocations.
++
++   Since application size is already constrained to <2GB by the form of
++   the ldgp relocation, we can use a 32-bit pc-relative relocation to
++   static data.  Dynamic data is accessed indirectly to allow for read
++   only EH sections.  */
++#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)			     \
++  (((GLOBAL) ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | DW_EH_PE_sdata4)
++
++/* If defined, a C statement to be executed just prior to the output of
++   assembler code for INSN.  */
++#define FINAL_PRESCAN_INSN(INSN, OPVEC, NOPERANDS)			     \
++  (sw_64_this_literal_sequence_number = 0,				     \
++   sw_64_this_gpdisp_sequence_number = 0)
++extern int sw_64_this_literal_sequence_number;
++extern int sw_64_this_gpdisp_sequence_number;
++
++/* Since the bits of the _init and _fini function is spread across
++   many object files, each potentially with its own GP, we must assume
++   we need to load our GP.  Further, the .init/.fini section can
++   easily be more than 4MB away from the function to call so we can't
++   use bsr.  */
++// jsr->call
++#ifdef __sw_64_sw8a__
++#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)			     \
++  asm (SECTION_OP "\n"							  \
++		 "       addpi 0, $29\n"				       \
++		 "       ldgp $29,0($29)\n"				    \
++		 "       unop\n"					       \
++		 "       call $26," USER_LABEL_PREFIX #FUNC "\n"	       \
++		 "       .align 3\n"					   \
++		 "       .previous");
++#else
++#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC)			     \
++  asm (SECTION_OP "\n"							  \
++		 "	br $29,1f\n"						\
++		 "1:	ldgp $29,0($29)\n"					\
++		 "	unop\n"						     \
++		 "	call $26," USER_LABEL_PREFIX #FUNC "\n"		\
++		 "	.align 3\n"						 \
++		 "	.previous");
++#endif
++
++/* If we have the capability create headers for efficient EH lookup.
++   As of Jan 2002, only glibc 2.2.4 can actually make use of this, but
++   I imagine that other systems will catch up.  In the meantime, it
++   doesn't harm to make sure that the data exists to be used later.  */
++#if defined HAVE_LD_EH_FRAME_HDR
++#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
++#endif
+diff --git a/gcc/config/sw_64/elf.opt b/gcc/config/sw_64/elf.opt
+new file mode 100644
+index 000000000..9059fee8c
+--- /dev/null
++++ b/gcc/config/sw_64/elf.opt
+@@ -0,0 +1,29 @@
++; Sw_64 ELF options.
++
++; Copyright (C) 2011-2020 Free Software Foundation, Inc.
++;
++; This file is part of GCC.
++;
++; GCC is free software; you can redistribute it and/or modify it under
++; the terms of the GNU General Public License as published by the Free
++; Software Foundation; either version 3, or (at your option) any later
++; version.
++;
++; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++; WARRANTY; without even the implied warranty of MERCHANTABILITY or
++; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++; for more details.
++;
++; You should have received a copy of the GNU General Public License
++; along with GCC; see the file COPYING3.  If not see
++; <http://www.gnu.org/licenses/>.
++
++; See the GCC internals manual (options.texi) for a description of
++; this file's format.
++
++; Please try to keep this file in ASCII collating order.
++
++relax
++Driver
++
++; This comment is to ensure we retain the blank line above.
+diff --git a/gcc/config/sw_64/freebsd.h b/gcc/config/sw_64/freebsd.h
+new file mode 100644
+index 000000000..f0b599b79
+--- /dev/null
++++ b/gcc/config/sw_64/freebsd.h
+@@ -0,0 +1,69 @@
++/* Definitions for Sw_64 running FreeBSD using the ELF format
++   Copyright (C) 2000-2020 Free Software Foundation, Inc.
++   Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#undef EXTRA_SPECS
++#define EXTRA_SPECS							    \
++  {									    \
++    "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER				 \
++  }
++
++/* Provide a CPP_SPEC appropriate for FreeBSD/sw_64 -- dealing with
++   the GCC option `-posix'.  */
++
++#undef CPP_SPEC
++#define CPP_SPEC "%{posix:-D_POSIX_SOURCE}"
++
++#define LINK_SPEC							      \
++  "%{G*} %{relax:-relax}				\
++  %{p:%nconsider using '-pg' instead of '-p' with gprof (1)}		\
++  %{assert*} %{R*} %{rpath*} %{defsym*}					\
++  %{shared:-Bshareable %{h*} %{soname*}}				\
++  %{!shared:								\
++    %{!static:								\
++      %{rdynamic:-export-dynamic}					\
++      -dynamic-linker %(fbsd_dynamic_linker) }	\
++    %{static:-Bstatic}}							\
++  %{symbolic:-Bsymbolic}"
++
++/************************[  Target stuff  ]***********************************/
++
++/* Define the actual types of some ANSI-mandated types.
++   Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.c,
++   c-common.c, and config/<arch>/<arch>.h.  */
++
++/* sw_64.h gets this wrong for FreeBSD.  We use the GCC defaults instead.  */
++#undef WCHAR_TYPE
++
++#undef WCHAR_TYPE_SIZE
++#define WCHAR_TYPE_SIZE 32
++
++#define TARGET_ELF 1
++
++#undef HAS_INIT_SECTION
++
++/* Show that we need a GP when profiling.  */
++#undef TARGET_PROFILING_NEEDS_GP
++#define TARGET_PROFILING_NEEDS_GP 1
++
++/* Don't default to pcc-struct-return, we want to retain compatibility with
++   older FreeBSD releases AND pcc-struct-return may not be reentrant.  */
++
++#undef DEFAULT_PCC_STRUCT_RETURN
++#define DEFAULT_PCC_STRUCT_RETURN 0
+diff --git a/gcc/config/sw_64/gnu-user.h b/gcc/config/sw_64/gnu-user.h
+new file mode 100644
+index 000000000..2c40cb84b
+--- /dev/null
++++ b/gcc/config/sw_64/gnu-user.h
+@@ -0,0 +1,177 @@
++/* Definitions for systems using, at least optionally, a GNU
++   (glibc-based) userspace or other userspace with libc derived from
++   glibc (e.g. uClibc) or for which similar specs are appropriate.
++   Copyright (C) 1995-2020 Free Software Foundation, Inc.
++   Contributed by Eric Youngdale.
++   Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
++
++You should have received a copy of the GNU General Public License and
++a copy of the GCC Runtime Library Exception along with this program;
++see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++<http://www.gnu.org/licenses/>.  */
++
++/* Don't assume anything about the header files.  */
++//#define SYSTEM_IMPLICIT_EXTERN_C
++/*
++#undef ASM_APP_ON
++#define ASM_APP_ON "#APP\n"
++
++#undef ASM_APP_OFF
++#define ASM_APP_OFF "#NO_APP\n"
++*/
++#if ENABLE_OFFLOADING == 1
++#define CRTOFFLOADBEGIN "%{fopenacc|fopenmp:crtoffloadbegin%O%s}"
++#define CRTOFFLOADEND "%{fopenacc|fopenmp:crtoffloadend%O%s}"
++#else
++#define CRTOFFLOADBEGIN ""
++#define CRTOFFLOADEND ""
++#endif
++
++/* Provide a STARTFILE_SPEC appropriate for GNU userspace.  Here we add
++   the GNU userspace magical crtbegin.o file (see crtstuff.c) which
++   provides part of the support for getting C++ file-scope static
++   object constructed before entering `main'.  */
++
++#if defined HAVE_LD_PIE
++#define GNU_USER_TARGET_STARTFILE_SPEC					 \
++  "%{shared:; \
++     pg|p|profile:%{static-pie:grcrt1.o%s;:gcrt1.o%s}; \
++     static:crt1.o%s; \
++     static-pie:rcrt1.o%s; \
++     " PIE_SPEC ":Scrt1.o%s; \
++     :crt1.o%s} \
++   crti.o%s \
++   %{static:crtbeginT.o%s; \
++     shared|static-pie|" PIE_SPEC ":crtbeginS.o%s; \
++     :crtbegin.o%s} \
++   %{fvtable-verify=none:%s; \
++     fvtable-verify=preinit:vtv_start_preinit.o%s; \
++     fvtable-verify=std:vtv_start.o%s} \
++   " CRTOFFLOADBEGIN
++#else
++#define GNU_USER_TARGET_STARTFILE_SPEC					 \
++  "%{shared:; \
++     pg|p|profile:gcrt1.o%s; \
++     :crt1.o%s} \
++   crti.o%s \
++   %{static:crtbeginT.o%s; \
++     shared|pie|static-pie:crtbeginS.o%s; \
++     :crtbegin.o%s} \
++   %{fvtable-verify=none:%s; \
++     fvtable-verify=preinit:vtv_start_preinit.o%s; \
++     fvtable-verify=std:vtv_start.o%s} \
++   " CRTOFFLOADBEGIN
++#endif
++#undef STARTFILE_SPEC
++#define STARTFILE_SPEC GNU_USER_TARGET_STARTFILE_SPEC
++
++/* Provide a ENDFILE_SPEC appropriate for GNU userspace.  Here we tack on
++   the GNU userspace magical crtend.o file (see crtstuff.c) which
++   provides part of the support for getting C++ file-scope static
++   object constructed before entering `main', followed by a normal
++   GNU userspace "finalizer" file, `crtn.o'.  */
++
++#if defined HAVE_LD_PIE
++#define GNU_USER_TARGET_ENDFILE_SPEC					   \
++  "%{fvtable-verify=none:%s; \
++     fvtable-verify=preinit:vtv_end_preinit.o%s; \
++     fvtable-verify=std:vtv_end.o%s} \
++   %{static:crtend.o%s; \
++     shared|static-pie|" PIE_SPEC ":crtendS.o%s; \
++     :crtend.o%s} \
++   crtn.o%s \
++   " CRTOFFLOADEND
++#else
++#define GNU_USER_TARGET_ENDFILE_SPEC					   \
++  "%{fvtable-verify=none:%s; \
++     fvtable-verify=preinit:vtv_end_preinit.o%s; \
++     fvtable-verify=std:vtv_end.o%s} \
++   %{static:crtend.o%s; \
++     shared|pie|static-pie:crtendS.o%s; \
++     :crtend.o%s} \
++   crtn.o%s \
++   " CRTOFFLOADEND
++#endif
++#undef ENDFILE_SPEC
++#define ENDFILE_SPEC GNU_USER_TARGET_ENDFILE_SPEC
++
++/* This is for -profile to use -lc_p instead of -lc.  */
++#define GNU_USER_TARGET_CC1_SPEC "%{profile:-p}"
++#ifndef CC1_SPEC
++#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC
++#endif
++
++/* The GNU C++ standard library requires that these macros be defined.  */
++#undef CPLUSPLUS_CPP_SPEC
++#define CPLUSPLUS_CPP_SPEC "-D_GNU_SOURCE %(cpp)"
++
++#define GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC				   \
++  "%{shared:-lc} \
++   %{!shared:%{profile:-lc_p}%{!profile:-lc}}"
++
++#define GNU_USER_TARGET_LIB_SPEC					       \
++  "%{pthread:-lpthread} " GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC
++
++#undef LIB_SPEC
++#define LIB_SPEC GNU_USER_TARGET_LIB_SPEC
++
++#if defined HAVE_LD_EH_FRAME_HDR
++#define LINK_EH_SPEC "%{!static|static-pie:--eh-frame-hdr} "
++#endif
++
++#undef LINK_GCC_C_SEQUENCE_SPEC
++#define LINK_GCC_C_SEQUENCE_SPEC					       \
++  "%{static|static-pie:--start-group} %G %L \
++   %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}"
++
++/* Use --as-needed -lgcc_s for eh support.  */
++#ifdef HAVE_LD_AS_NEEDED
++#define USE_LD_AS_NEEDED 1
++#endif
++
++#define TARGET_POSIX_IO
++
++#undef TARGET_LIBC_HAS_FUNCTION
++#define TARGET_LIBC_HAS_FUNCTION gnu_libc_has_function
++
++/* Link -lasan early on the command line.  For -static-libasan, don't link
++   it for -shared link, the executable should be compiled with -static-libasan
++   in that case, and for executable link with --{,no-}whole-archive around
++   it to force everything into the executable.  And similarly for -ltsan
++   and -llsan.  */
++#if defined HAVE_LD_STATIC_DYNAMIC
++#undef LIBASAN_EARLY_SPEC
++#define LIBASAN_EARLY_SPEC						     \
++  "%{!shared:libasan_preinit%O%s} "					    \
++  "%{static-libasan:%{!shared:" LD_STATIC_OPTION			       \
++  " --whole-archive -lasan --no-whole-archive " LD_DYNAMIC_OPTION	      \
++  "}}%{!static-libasan:-lasan}"
++#undef LIBTSAN_EARLY_SPEC
++#define LIBTSAN_EARLY_SPEC						     \
++  "%{!shared:libtsan_preinit%O%s} "					    \
++  "%{static-libtsan:%{!shared:" LD_STATIC_OPTION			       \
++  " --whole-archive -ltsan --no-whole-archive " LD_DYNAMIC_OPTION	      \
++  "}}%{!static-libtsan:-ltsan}"
++#undef LIBLSAN_EARLY_SPEC
++#define LIBLSAN_EARLY_SPEC						     \
++  "%{!shared:liblsan_preinit%O%s} "					    \
++  "%{static-liblsan:%{!shared:" LD_STATIC_OPTION			       \
++  " --whole-archive -llsan --no-whole-archive " LD_DYNAMIC_OPTION	      \
++  "}}%{!static-liblsan:-llsan}"
++#endif
+diff --git a/gcc/config/sw_64/linux-elf.h b/gcc/config/sw_64/linux-elf.h
+new file mode 100644
+index 000000000..f3039c2ff
+--- /dev/null
++++ b/gcc/config/sw_64/linux-elf.h
+@@ -0,0 +1,54 @@
++/* Definitions of target machine for GNU compiler
++   for Sw_64 Linux-based GNU systems using ELF.
++   Copyright (C) 1996-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#undef EXTRA_SPECS
++#define EXTRA_SPECS {"elf_dynamic_linker", ELF_DYNAMIC_LINKER},
++
++#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux.so.2"
++#define UCLIBC_DYNAMIC_LINKER "/lib/ld-uClibc.so.0"
++#if DEFAULT_LIBC == LIBC_UCLIBC
++#define CHOOSE_DYNAMIC_LINKER(G, U) "%{mglibc:" G ";:" U "}"
++#elif DEFAULT_LIBC == LIBC_GLIBC
++#define CHOOSE_DYNAMIC_LINKER(G, U) "%{muclibc:" U ";:" G "}"
++#else
++#error "Unsupported DEFAULT_LIBC"
++#endif
++#define GNU_USER_DYNAMIC_LINKER						\
++  CHOOSE_DYNAMIC_LINKER (GLIBC_DYNAMIC_LINKER, UCLIBC_DYNAMIC_LINKER)
++
++#define ELF_DYNAMIC_LINKER GNU_USER_DYNAMIC_LINKER
++
++#define LINK_SPEC							      \
++  "-m elf64sw_64 %{G*} %{relax:-relax}		\
++  %{O*:-O3} %{!O*:-O1}						\
++  %{shared:-shared}						\
++  %{!shared:							\
++    %{!static:							\
++      %{rdynamic:-export-dynamic}				\
++      -dynamic-linker %(elf_dynamic_linker)}	\
++    %{static:-static}}"
++
++#undef LIB_SPEC
++#define LIB_SPEC							       \
++  "%{pthread:-lpthread} "						      \
++  "%{shared:-lc}%{!shared:%{profile:-lc_p}%{!profile:-lc}} "
++
++#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+diff --git a/gcc/config/sw_64/linux.h b/gcc/config/sw_64/linux.h
+new file mode 100644
+index 000000000..023fd9fde
+--- /dev/null
++++ b/gcc/config/sw_64/linux.h
+@@ -0,0 +1,105 @@
++/* Definitions of target machine for GNU compiler,
++   for Sw_64 Linux-based GNU systems.
++   Copyright (C) 1996-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define TARGET_OS_CPP_BUILTINS()					       \
++  do									   \
++    {									  \
++      builtin_define ("__gnu_linux__");					\
++      builtin_define ("_LONGLONG");					    \
++      builtin_define_std ("linux");					    \
++      builtin_define_std ("unix");					     \
++      builtin_assert ("system=linux");					 \
++      builtin_assert ("system=unix");					  \
++      builtin_assert ("system=posix");					 \
++      /* The GNU C++ standard library requires this.  */		       \
++      if (c_dialect_cxx ())						    \
++	builtin_define ("_GNU_SOURCE");					\
++    }									  \
++  while (0)
++
++#undef LIB_SPEC
++#define LIB_SPEC							       \
++  "%{pthread:-lpthread} \
++   %{shared:-lc} \
++   %{!shared: %{profile:-lc_p}%{!profile:-lc}}"
++
++#undef CPP_SPEC
++#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
++
++/* Show that we need a GP when profiling.  */
++#undef TARGET_PROFILING_NEEDS_GP
++#define TARGET_PROFILING_NEEDS_GP 1
++
++/* Don't care about faults in the prologue.  */
++#undef TARGET_CAN_FAULT_IN_PROLOGUE
++#define TARGET_CAN_FAULT_IN_PROLOGUE 1
++
++#undef WCHAR_TYPE
++#define WCHAR_TYPE "int"
++
++#ifdef SINGLE_LIBC
++#define OPTION_GLIBC (DEFAULT_LIBC == LIBC_GLIBC)
++#define OPTION_UCLIBC (DEFAULT_LIBC == LIBC_UCLIBC)
++#define OPTION_BIONIC (DEFAULT_LIBC == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL (DEFAULT_LIBC == LIBC_MUSL)
++#else
++#define OPTION_GLIBC (linux_libc == LIBC_GLIBC)
++#define OPTION_UCLIBC (linux_libc == LIBC_UCLIBC)
++#define OPTION_BIONIC (linux_libc == LIBC_BIONIC)
++#undef OPTION_MUSL
++#define OPTION_MUSL (linux_libc == LIBC_MUSL)
++#endif
++
++/* Determine what functions are present at the runtime;
++   this includes full c99 runtime and sincos.  */
++#undef TARGET_LIBC_HAS_FUNCTION
++#define TARGET_LIBC_HAS_FUNCTION linux_libc_has_function
++
++#define TARGET_POSIX_IO
++
++#define LINK_GCC_C_SEQUENCE_SPEC					       \
++  "%{static|static-pie:--start-group} %G %L \
++   %{static|static-pie:--end-group}%{!static:%{!static-pie:%G}}"
++
++/* Use --as-needed -lgcc_s for eh support.  */
++#ifdef HAVE_LD_AS_NEEDED
++#define USE_LD_AS_NEEDED 1
++#endif
++
++/* Define if long doubles should be mangled as 'g'.  */
++#define TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
++
++/* -mcpu=native handling only makes sense with compiler running on
++   an Sw_64 chip.  */
++#if defined __sw_64__ || defined __sw_64
++extern const char *
++host_detect_local_cpu (int argc, const char **argv);
++#define EXTRA_SPEC_FUNCTIONS {"local_cpu_detect", host_detect_local_cpu},
++
++#define MCPU_MTUNE_NATIVE_SPECS						\
++  " %{mcpu=native:%<mcpu=native %:local_cpu_detect(cpu)}"		      \
++  " %{mtune=native:%<mtune=native %:local_cpu_detect(tune)}"
++#else
++#define MCPU_MTUNE_NATIVE_SPECS ""
++#endif
++
++#define DRIVER_SELF_SPECS MCPU_MTUNE_NATIVE_SPECS
+diff --git a/gcc/config/sw_64/m32.md b/gcc/config/sw_64/m32.md
+new file mode 100644
+index 000000000..1ca033acb
+--- /dev/null
++++ b/gcc/config/sw_64/m32.md
+@@ -0,0 +1,227 @@
++;; Machine description for Sw_64 for GNU C compiler
++;; Copyright (C) 1992-2020 Free Software Foundation, Inc.
++;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_insn "*addsi_er_high_l"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(plus:SI (match_operand:SI 1 "register_operand" "r")
++		 (high:SI (match_operand:SI 2 "local_symbolic_operand"))))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
++  "ldih %0,%2(%1)\t\t!gprelhigh"
++  [(set_attr "usegp" "yes")])
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(high:SI (match_operand:SI 1 "local_symbolic_operand")))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(plus:SI (match_dup 2) (high:SI (match_dup 1))))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_insn "movsi_er_high_g"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:SI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_LITERAL))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++{
++  if (INTVAL (operands[3]) == 0)
++    return "ldw %0,%2(%1)\t\t!literal";
++  else
++    return "ldw %0,%2(%1)\t\t!literal!%3";
++}
++  [(set_attr "type" "ldsym")])
++
++(define_insn "movsi_er_high_g32"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_LITERAL))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++{
++  if (INTVAL (operands[3]) == 0)
++    return "ldw %0,%2(%1)\t\t!literal";
++  else
++    return "ldw %0,%2(%1)\t\t!literal!%3";
++}
++  [(set_attr "type" "ldsym")])
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(match_operand:SI 1 "global_symbolic_operand"))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(unspec:SI [(match_dup 2)
++		    (match_dup 1)
++		    (const_int 0)] UNSPEC_LITERAL))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_insn "*movsi_er_low_l"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(lo_sum:SI (match_operand:SI 1 "register_operand" "r")
++		   (match_operand:SI 2 "local_symbolic_operand")))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++{
++  if (true_regnum (operands[1]) == 29)
++    return "ldi %0,%2(%1)\t\t!gprel";
++  else
++    return "ldi %0,%2(%1)\t\t!gprellow";
++}
++  [(set_attr "usegp" "yes")])
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(match_operand:SI 1 "small_symbolic_operand"))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(lo_sum:SI (match_dup 2) (match_dup 1)))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(match_operand:SI 1 "local_symbolic_operand"))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(plus:SI (match_dup 2) (high:SI (match_dup 1))))
++   (set (match_dup 0)
++	(lo_sum:SI (match_dup 0) (match_dup 1)))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_expand "prologue_ldgp_32"
++  [(set (match_dup 0)
++	(unspec_volatile:SI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
++   (set (match_dup 0)
++	(unspec_volatile:SI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))]
++  "TARGET_SW_M32"
++{
++  operands[0] = pic_offset_table_rtx;
++  operands[1] = gen_rtx_REG (Pmode, 27);
++  operands[2] = (TARGET_EXPLICIT_RELOCS
++		 ? GEN_INT (sw_64_next_sequence_number++)
++		 : const0_rtx);
++})
++
++(define_insn "*ldgp_er_1_32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_LDGP1))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++  "ldih %0,0(%1)\t\t!gpdisp!%2"
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*ldgp_er_2_32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec:SI [(match_operand:SI 1 "register_operand" "r")
++		    (match_operand 2 "const_int_operand")]
++		   UNSPEC_LDGP2))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++  "ldi %0,0(%1)\t\t!gpdisp!%2"
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*prologue_ldgp_er_2_32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_PLDGP2))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++{
++  if (stfp3_flag == 1)
++    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec1";
++  else if (stfp3_flag == 2)
++    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec3";
++  else if (flag_fpcr_set == 1)
++    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec1";
++  else if (flag_fpcr_set == 3)
++    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:\;setfpec0";
++  else
++    return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:";
++}
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*prologue_ldgp_1_32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_LDGP1))]
++  "TARGET_SW_M32"
++{
++  if (stfp3_flag == 1)
++    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec1";
++  else if (stfp3_flag == 2)
++    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec3";
++  else if (flag_fpcr_set == 1)
++    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec1";
++  else if (flag_fpcr_set == 3)
++    return "ldgp %0,0(%1)\n$%~..ng:\;setfpec0";
++  else
++    return "ldgp %0,0(%1)\n$%~..ng:";
++}
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*prologue_ldgp_2_32"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_PLDGP2))]
++  "TARGET_SW_M32"
++)
++
++(define_insn "*call_value_osf_1_er_32"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:SI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++  "@
++   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%1\t\t!samegp
++   ldw $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "jsr")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*call_osf_1_er_noreturn_32"
++  [(call (mem:DI (match_operand:SI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS
++   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
++  "@
++   call $26,($27),0
++   bsr $26,%0\t\t!samegp
++   ldw $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#"
++  [(set_attr "type" "jsr")
++   (set_attr "length" "*,*,8")])
++
++(define_insn "*call_osf_1_er_32"
++  [(call (mem:DI (match_operand:SI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_SW_M32 && TARGET_EXPLICIT_RELOCS"
++  "@
++   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%0\t\t!samegp
++   ldw $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "jsr")
++   (set_attr "length" "12,*,16")])
+diff --git a/gcc/config/sw_64/netbsd.h b/gcc/config/sw_64/netbsd.h
+new file mode 100644
+index 000000000..c605c8df2
+--- /dev/null
++++ b/gcc/config/sw_64/netbsd.h
+@@ -0,0 +1,69 @@
++/* Definitions of target machine for GNU compiler,
++   for Sw_64 NetBSD systems.
++   Copyright (C) 1998-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define TARGET_OS_CPP_BUILTINS()					       \
++  do									   \
++    {									  \
++      NETBSD_OS_CPP_BUILTINS_ELF ();					   \
++    }									  \
++  while (0)
++
++/* NetBSD doesn't use the LANGUAGE* built-ins.  */
++#undef SUBTARGET_LANGUAGE_CPP_BUILTINS
++#define SUBTARGET_LANGUAGE_CPP_BUILTINS()	/* nothing.  */
++
++/* Show that we need a GP when profiling.  */
++#undef TARGET_PROFILING_NEEDS_GP
++#define TARGET_PROFILING_NEEDS_GP 1
++
++/* Provide a CPP_SPEC appropriate for NetBSD/sw_64.  We use
++   this to pull in CPP specs that all NetBSD configurations need.  */
++
++#undef CPP_SPEC
++#define CPP_SPEC NETBSD_CPP_SPEC
++
++#undef EXTRA_SPECS
++#define EXTRA_SPECS							    \
++  {"netbsd_link_spec", NETBSD_LINK_SPEC_ELF},				  \
++    {"netbsd_entry_point", NETBSD_ENTRY_POINT},				\
++    {"netbsd_endfile_spec", NETBSD_ENDFILE_SPEC},
++
++/* Provide a LINK_SPEC appropriate for a NetBSD/sw_64 ELF target.  */
++
++#undef LINK_SPEC
++#define LINK_SPEC							      \
++  "%{G*} %{relax:-relax} \
++   %{O*:-O3} %{!O*:-O1} \
++   %(netbsd_link_spec)"
++
++#define NETBSD_ENTRY_POINT "__start"
++
++/* Provide an ENDFILE_SPEC appropriate for NetBSD/sw_64 ELF.  Here we
++   add crtend.o, which provides part of the support for getting
++   C++ file-scope static objects deconstructed after exiting "main".
++
++   We also need to handle the GCC option `-ffast-math'.  */
++
++#undef ENDFILE_SPEC
++#define ENDFILE_SPEC							   \
++  "%{Ofast|ffast-math|funsafe-math-optimizations:crtfm%O%s} \
++   %(netbsd_endfile_spec)"
++
++#define HAVE_ENABLE_EXECUTE_STACK
+diff --git a/gcc/config/sw_64/openbsd.h b/gcc/config/sw_64/openbsd.h
+new file mode 100644
+index 000000000..6b20e8dc6
+--- /dev/null
++++ b/gcc/config/sw_64/openbsd.h
+@@ -0,0 +1,74 @@
++/* Configuration file for an sw_64 OpenBSD target.
++   Copyright (C) 1999-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* Controlling the compilation driver.  */
++#undef TARGET_DEFAULT
++#define TARGET_DEFAULT (MASK_FPREGS | MASK_IEEE | MASK_IEEE_CONFORMANT)
++
++#define LINK_SPEC							      \
++  "%{!shared:%{!nostdlib:%{!r*:%{!e*:-e __start}}}} \
++   %{shared:-shared} %{R*} \
++   %{static:-Bstatic} \
++   %{!static:-Bdynamic} \
++   %{rdynamic:-export-dynamic} \
++   %{assert*} \
++   %{!dynamic-linker:-dynamic-linker /usr/libexec/ld.so}"
++
++/* As an elf system, we need crtbegin/crtend stuff.  */
++#undef STARTFILE_SPEC
++#define STARTFILE_SPEC							 \
++  "\
++	%{!shared: %{pg:gcrt0%O%s} %{!pg:%{p:gcrt0%O%s} \
++	%{!p:%{!static:crt0%O%s} %{static:%{nopie:crt0%O%s} \
++	%{!nopie:rcrt0%O%s}}}} crtbegin%O%s} %{shared:crtbeginS%O%s}"
++#undef ENDFILE_SPEC
++#define ENDFILE_SPEC "%{!shared:crtend%O%s} %{shared:crtendS%O%s}"
++
++/* run-time target specifications.  */
++#define TARGET_OS_CPP_BUILTINS()		\
++    do {					\
++	OPENBSD_OS_CPP_BUILTINS_ELF();		\
++	OPENBSD_OS_CPP_BUILTINS_LP64();		\
++    } while (0)
++
++/* Layout of source language data types.  */
++
++/* This must agree with <machine/_types.h> */
++#undef SIZE_TYPE
++#define SIZE_TYPE "long unsigned int"
++
++#undef PTRDIFF_TYPE
++#define PTRDIFF_TYPE "long int"
++
++#undef INTMAX_TYPE
++#define INTMAX_TYPE "long long int"
++
++#undef UINTMAX_TYPE
++#define UINTMAX_TYPE "long long unsigned int"
++
++#undef WCHAR_TYPE
++#define WCHAR_TYPE "int"
++
++#undef WCHAR_TYPE_SIZE
++#define WCHAR_TYPE_SIZE 32
++
++#undef WINT_TYPE
++#define WINT_TYPE "int"
++
++#define LOCAL_LABEL_PREFIX "."
+diff --git a/gcc/config/sw_64/predicates.md b/gcc/config/sw_64/predicates.md
+new file mode 100644
+index 000000000..c82d5c7de
+--- /dev/null
++++ b/gcc/config/sw_64/predicates.md
+@@ -0,0 +1,649 @@
++;; Predicate definitions for Sw_64.
++;; Copyright (C) 2004-2020 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;; Return 1 if OP is the zero constant for MODE.
++(define_predicate "const0_operand"
++  (and (match_code "const_int,const_wide_int,const_double,const_vector")
++       (match_test "op == CONST0_RTX (mode)")))
++
++;; Returns true if OP is either the constant zero or a register.
++(define_predicate "reg_or_0_operand"
++  (ior (match_operand 0 "register_operand")
++       (match_operand 0 "const0_operand")))
++
++;; Return 1 if OP is a constant in the range of 0-63 (for a shift) or
++;; any register.
++(define_predicate "reg_or_6bit_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 64")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if OP is a constant in the range of 0-31 (for a shift) or
++;; any register.
++(define_predicate "reg_or_5bit_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 32")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if OP is an 8-bit constant.
++(define_predicate "cint8_operand"
++  (and (match_code "const_int")
++       (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")))
++
++;; Return 1 if OP is an 8-bit constant or any register.
++(define_predicate "reg_or_8bit_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "INTVAL (op) >= 0 && INTVAL (op) < 256")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if OP is a constant or any register.
++(define_predicate "reg_or_cint_operand"
++  (ior (match_operand 0 "register_operand")
++       (match_operand 0 "const_int_operand")))
++
++;; Return 1 if the operand is a valid second operand to an add insn.
++(define_predicate "add_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "satisfies_constraint_K (op) || satisfies_constraint_L (op)")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if the operand is a valid second operand to a
++;; sign-extending add insn.
++(define_predicate "sext_add_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "satisfies_constraint_I (op) || satisfies_constraint_O (op)")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if the operand is a non-symbolic constant operand that
++;; does not satisfy add_operand.
++(define_predicate "non_add_const_operand"
++  (and (match_code "const_int,const_wide_int,const_double,const_vector")
++       (not (match_operand 0 "add_operand"))))
++
++;; Return 1 if the operand is a non-symbolic, nonzero constant operand.
++(define_predicate "non_zero_const_operand"
++  (and (match_code "const_int,const_wide_int,const_double,const_vector")
++       (not (match_test "op == CONST0_RTX (mode)"))))
++
++;; Return 1 if OP is the constant 1, 2 or 3.
++(define_predicate "const123_operand"
++  (and (match_code "const_int")
++       (match_test "IN_RANGE (INTVAL (op), 1, 3)")))
++
++;; Return 1 if OP is the constant 2 or 3.
++(define_predicate "const23_operand"
++  (and (match_code "const_int")
++       (match_test "INTVAL (op) == 2 || INTVAL (op) == 3")))
++
++;; Return 1 if OP is the constant 4 or 8.
++(define_predicate "const48_operand"
++  (and (match_code "const_int")
++       (match_test "INTVAL (op) == 4 || INTVAL (op) == 8")))
++
++;; Return 1 if OP is a valid first operand to an AND insn.
++(define_predicate "and_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
++		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100
++		 || zap_mask (INTVAL (op))")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if OP is a valid first operand to an IOR or XOR insn.
++(define_predicate "or_operand"
++  (if_then_else (match_code "const_int")
++    (match_test "(unsigned HOST_WIDE_INT) INTVAL (op) < 0x100
++		 || (unsigned HOST_WIDE_INT) ~ INTVAL (op) < 0x100")
++    (match_operand 0 "register_operand")))
++
++;; Return 1 if OP is a constant that is the width, in bits, of an integral
++;; mode not larger than DImode.
++(define_predicate "mode_width_operand"
++  (match_code "const_int")
++{
++  HOST_WIDE_INT i = INTVAL (op);
++  return i == 8 || i == 16 || i == 32 || i == 64;
++})
++
++;; Return 1 if OP is a constant that is a mask of ones of width of an
++;; integral machine mode not larger than DImode.
++(define_predicate "mode_mask_operand"
++  (match_code "const_int")
++{
++  HOST_WIDE_INT value = INTVAL (op);
++
++  if (value == 0xff)
++    return 1;
++  if (value == 0xffff)
++    return 1;
++  if (value == 0xffffffff)
++    return 1;
++  if (value == -1)
++    return 1;
++
++  return 0;
++})
++
++;; Return 1 if OP is a multiple of 8 less than 64.
++(define_predicate "mul8_operand"
++  (match_code "const_int")
++{
++  unsigned HOST_WIDE_INT i = INTVAL (op);
++  return i < 64 && i % 8 == 0;
++})
++
++;; Return 1 if OP is a hard floating-point register.
++(define_predicate "hard_fp_register_operand"
++  (match_operand 0 "register_operand")
++{
++  if (SUBREG_P (op))
++    op = SUBREG_REG (op);
++  return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
++})
++
++;; Return 1 if OP is a hard general register.
++(define_predicate "hard_int_register_operand"
++  (match_operand 0 "register_operand")
++{
++  if (SUBREG_P (op))
++    op = SUBREG_REG (op);
++  return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
++})
++
++;; Return 1 if OP is a valid operand for the source of a move insn.
++(define_predicate "input_operand"
++  (match_operand 0 "general_operand")
++{
++  switch (GET_CODE (op))
++    {
++    case LABEL_REF:
++    case SYMBOL_REF:
++    case CONST:
++      if (TARGET_EXPLICIT_RELOCS)
++	{
++	  /* We don't split symbolic operands into something unintelligable
++	     until after reload, but we do not wish non-small, non-global
++	     symbolic operands to be reconstructed from their high/lo_sum
++	     form.  */
++	  return (small_symbolic_operand (op, mode)
++		  || global_symbolic_operand (op, mode)
++		  || gotdtp_symbolic_operand (op, mode)
++		  || gottp_symbolic_operand (op, mode));
++	}
++      /* VMS still has a 32-bit mode.  */
++      return mode == ptr_mode || mode == Pmode;
++
++    case HIGH:
++      return (TARGET_EXPLICIT_RELOCS
++	      && local_symbolic_operand (XEXP (op, 0), mode));
++
++    case REG:
++      return 1;
++
++    case SUBREG:
++      if (register_operand (op, mode))
++	return 1;
++      /* fall through.  */
++    case MEM:
++      return ((TARGET_BWX || (mode != HImode && mode != QImode))
++	      && general_operand (op, mode));
++
++    case CONST_WIDE_INT:
++    case CONST_DOUBLE:
++      return op == CONST0_RTX (mode);
++
++    case CONST_VECTOR:
++      if (reload_in_progress || reload_completed)
++	return sw_64_legitimate_constant_p (mode, op);
++      return op == CONST0_RTX (mode);
++
++    case CONST_INT:
++      if (mode == QImode || mode == HImode)
++	return true;
++      if (reload_in_progress || reload_completed)
++	return sw_64_legitimate_constant_p (mode, op);
++      return add_operand (op, mode);
++
++    default:
++      gcc_unreachable ();
++    }
++  return 0;
++})
++
++;; Return 1 if OP is a SYMBOL_REF for a function known to be in this
++;; file, and in the same section as the current function.
++
++(define_predicate "samegp_function_operand"
++  (match_code "symbol_ref")
++{
++  /* Easy test for recursion.  */
++  if (op == XEXP (DECL_RTL (current_function_decl), 0))
++    return true;
++
++  /* Functions that are not local can be overridden, and thus may
++     not share the same gp.  */
++  if (! SYMBOL_REF_LOCAL_P (op))
++    return false;
++
++  /* If -msmall-data is in effect, assume that there is only one GP
++     for the module, and so any local symbol has this property.  We
++     need explicit relocations to be able to enforce this for symbols
++     not defined in this unit of translation, however.  */
++  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
++    return true;
++
++  /* Functions that are not external are defined in this UoT,
++     and thus must share the same gp.  */
++  return ! SYMBOL_REF_EXTERNAL_P (op);
++})
++
++;; Return 1 if OP is a SYMBOL_REF for which we can make a call via bsr.
++(define_predicate "direct_call_operand"
++  (match_operand 0 "samegp_function_operand")
++{
++  /* If profiling is implemented via linker tricks, we can't jump
++     to the nogp alternate entry point.  Note that crtl->profile
++     would not be correct, since that doesn't indicate if the target
++     function uses profiling.  */
++  /* ??? TARGET_PROFILING_NEEDS_GP isn't really the right test,
++     but is approximately correct for the SYSV ABIs.  Don't know
++     what to do for VMS, NT, or UMK.  */
++  if (!TARGET_PROFILING_NEEDS_GP && profile_flag)
++    return false;
++
++  /* Must be a function.  In some cases folks create thunks in static
++     data structures and then make calls to them.  If we allow the
++     direct call, we'll get an error from the linker about !samegp reloc
++     against a symbol without a .prologue directive.  */
++  if (!SYMBOL_REF_FUNCTION_P (op))
++    return false;
++
++  /* Must be "near" so that the branch is assumed to reach.  With
++     -msmall-text, this is assumed true of all local symbols.  Since
++     we've already checked samegp, locality is already assured.  */
++  if (TARGET_SMALL_TEXT)
++    return true;
++
++  return false;
++})
++
++;; Return 1 if OP is a valid operand for the MEM of a CALL insn.
++;;
++;; For TARGET_ABI_SYSV, we want to restrict to R27 or a pseudo.
++
++(define_predicate "call_operand"
++  (ior (match_code "symbol_ref")
++       (and (match_code "reg")
++	    (ior (not (match_test "TARGET_ABI_OSF"))
++		 (not (match_test "HARD_REGISTER_P (op)"))
++		 (match_test "REGNO (op) == R27_REG")))))
++
++;; Return true if OP is a LABEL_REF, or SYMBOL_REF or CONST referencing
++;; a (non-tls) variable known to be defined in this file.
++(define_predicate "local_symbolic_operand"
++  (match_code "label_ref,const,symbol_ref")
++{
++  if (GET_CODE (op) == CONST
++      && GET_CODE (XEXP (op, 0)) == PLUS
++      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
++    op = XEXP (XEXP (op, 0), 0);
++
++  if (GET_CODE (op) == LABEL_REF)
++    return 1;
++
++  if (GET_CODE (op) != SYMBOL_REF)
++    return 0;
++
++  return (SYMBOL_REF_LOCAL_P (op)
++	  && !SYMBOL_REF_WEAK (op)
++	  && !SYMBOL_REF_TLS_MODEL (op));
++})
++
++;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
++;; known to be defined in this file in the small data area.
++(define_predicate "small_symbolic_operand"
++  (match_code "const,symbol_ref")
++{
++  HOST_WIDE_INT ofs = 0, max_ofs = 0;
++
++  if (! TARGET_SMALL_DATA)
++    return false;
++
++  if (GET_CODE (op) == CONST
++      && GET_CODE (XEXP (op, 0)) == PLUS
++      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
++    {
++      ofs = INTVAL (XEXP (XEXP (op, 0), 1));
++      op = XEXP (XEXP (op, 0), 0);
++    }
++
++  if (GET_CODE (op) != SYMBOL_REF)
++    return false;
++
++  /* ??? There's no encode_section_info equivalent for the rtl
++     constant pool, so SYMBOL_FLAG_SMALL never gets set.  */
++  if (CONSTANT_POOL_ADDRESS_P (op))
++    {
++      max_ofs = GET_MODE_SIZE (get_pool_mode (op));
++      if (max_ofs > g_switch_value)
++	return false;
++    }
++  else if (SYMBOL_REF_LOCAL_P (op)
++	    && SYMBOL_REF_SMALL_P (op)
++	    && !SYMBOL_REF_WEAK (op)
++	    && !SYMBOL_REF_TLS_MODEL (op))
++    {
++      if (SYMBOL_REF_DECL (op))
++	max_ofs = tree_to_uhwi (DECL_SIZE_UNIT (SYMBOL_REF_DECL (op)));
++    }
++  else
++    return false;
++
++  /* Given that we know that the GP is always 8 byte aligned, we can
++     always adjust by 7 without overflowing.  */
++  if (max_ofs < 8)
++    max_ofs = 8;
++
++  /* Since we know this is an object in a small data section, we know the
++     entire section is addressable via GP.  We don't know where the section
++     boundaries are, but we know the entire object is within.  */
++  /*return IN_RANGE (ofs, 0, max_ofs - 1);*/
++
++  if (sw_64_gprel_size == 16)
++    return IN_RANGE (ofs, 0, max_ofs - 1);
++  if (sw_64_gprel_size == 32)
++    return false;
++
++})
++
++;; Return true if OP is a SYMBOL_REF or CONST referencing a variable
++;; not known (or known not) to be defined in this file.
++(define_predicate "global_symbolic_operand"
++  (match_code "const,symbol_ref")
++{
++  if (GET_CODE (op) == CONST
++      && GET_CODE (XEXP (op, 0)) == PLUS
++      && CONST_INT_P (XEXP (XEXP (op, 0), 1)))
++    op = XEXP (XEXP (op, 0), 0);
++
++  if (GET_CODE (op) != SYMBOL_REF)
++    return 0;
++
++  return ((!SYMBOL_REF_LOCAL_P (op) || SYMBOL_REF_WEAK (op))
++	  && !SYMBOL_REF_TLS_MODEL (op));
++})
++
++;; Returns 1 if OP is a symbolic operand, i.e. a symbol_ref or a label_ref,
++;; possibly with an offset.
++(define_predicate "symbolic_operand"
++  (ior (match_code "symbol_ref,label_ref")
++       (and (match_code "const")
++	    (match_code "plus" "0")
++	    (match_code "symbol_ref,label_ref" "00")
++	    (match_code "const_int" "01"))))
++
++;; Return true if OP is valid for 16-bit DTP relative relocations.
++(define_predicate "dtp16_symbolic_operand"
++  (and (match_code "const")
++       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_DTPREL)")))
++
++;; Return true if OP is valid for 32-bit DTP relative relocations.
++(define_predicate "dtp32_symbolic_operand"
++  (and (match_code "const")
++       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_DTPREL)")))
++
++;; Return true if OP is valid for 64-bit DTP relative relocations.
++(define_predicate "gotdtp_symbolic_operand"
++  (and (match_code "const")
++       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_DTPREL)")))
++
++;; Return true if OP is valid for 16-bit TP relative relocations.
++(define_predicate "tp16_symbolic_operand"
++  (and (match_code "const")
++       (match_test "tls_symbolic_operand_1 (op, 16, UNSPEC_TPREL)")))
++
++;; Return true if OP is valid for 32-bit TP relative relocations.
++(define_predicate "tp32_symbolic_operand"
++  (and (match_code "const")
++       (match_test "tls_symbolic_operand_1 (op, 32, UNSPEC_TPREL)")))
++
++;; Return true if OP is valid for 64-bit TP relative relocations.
++(define_predicate "gottp_symbolic_operand"
++  (and (match_code "const")
++       (match_test "tls_symbolic_operand_1 (op, 64, UNSPEC_TPREL)")))
++
++;; Return 1 if this memory address is a known aligned register plus
++;; a constant.  It must be a valid address.  This means that we can do
++;; this as an aligned reference plus some offset.
++;;
++;; Take into account what reload will do.  Oh god this is awful.
++;; The horrible comma-operator construct below is to prevent genrecog
++;; from thinking that this predicate accepts REG and SUBREG.  We don't
++;; use recog during reload, so pretending these codes are accepted
++;; pessimizes things a tad.
++
++(define_special_predicate "aligned_memory_operand"
++  (ior (match_test "op = resolve_reload_operand (op), 0")
++       (match_code "mem"))
++{
++  rtx base;
++  int offset;
++
++  if (MEM_ALIGN (op) >= 32)
++    return 1;
++
++  op = XEXP (op, 0);
++
++  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
++     sorts of constructs.  Dig for the real base register.  */
++  if (reload_in_progress
++      && GET_CODE (op) == PLUS
++      && GET_CODE (XEXP (op, 0)) == PLUS)
++    {
++      base = XEXP (XEXP (op, 0), 0);
++      offset = INTVAL (XEXP (op, 1));
++    }
++  else
++    {
++      if (! memory_address_p (mode, op))
++	return 0;
++      if (GET_CODE (op) == PLUS)
++	{
++	  base = XEXP (op, 0);
++	  offset = INTVAL (XEXP (op, 1));
++	}
++      else
++	{
++	  base = op;
++	  offset = 0;
++	}
++    }
++
++  if (offset % GET_MODE_SIZE (mode))
++    return 0;
++
++  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) >= 32);
++})
++
++;; Similar, but return 1 if OP is a MEM which is not alignable.
++
++(define_special_predicate "unaligned_memory_operand"
++  (ior (match_test "op = resolve_reload_operand (op), 0")
++       (match_code "mem"))
++{
++  rtx base;
++  int offset;
++
++  if (MEM_ALIGN (op) >= 32)
++    return 0;
++
++  op = XEXP (op, 0);
++
++  /* LEGITIMIZE_RELOAD_ADDRESS creates (plus (plus reg const_hi) const_lo)
++     sorts of constructs.  Dig for the real base register.  */
++  if (reload_in_progress
++      && GET_CODE (op) == PLUS
++      && GET_CODE (XEXP (op, 0)) == PLUS)
++    {
++      base = XEXP (XEXP (op, 0), 0);
++      offset = INTVAL (XEXP (op, 1));
++    }
++  else
++    {
++      if (! memory_address_p (mode, op))
++	return 0;
++      if (GET_CODE (op) == PLUS)
++	{
++	  base = XEXP (op, 0);
++	  offset = INTVAL (XEXP (op, 1));
++	}
++      else
++	{
++	  base = op;
++	  offset = 0;
++	}
++    }
++
++  if (offset % GET_MODE_SIZE (mode))
++    return 1;
++
++  return (REG_P (base) && REGNO_POINTER_ALIGN (REGNO (base)) < 32);
++})
++
++;; Return 1 if OP is any memory location.  During reload a pseudo matches.
++(define_special_predicate "any_memory_operand"
++  (match_code "mem,reg,subreg")
++{
++  if (SUBREG_P (op))
++    op = SUBREG_REG (op);
++
++  if (MEM_P (op))
++    return true;
++  if (reload_in_progress && REG_P (op))
++    {
++      unsigned regno = REGNO (op);
++      if (HARD_REGISTER_NUM_P (regno))
++	return false;
++      else
++	return reg_renumber[regno] < 0;
++    }
++
++  return false;
++})
++
++;; Returns 1 if OP is not an eliminable register.
++;;
++;; This exists to cure a pathological failure in the s8addq (et al) patterns,
++;;
++;;	long foo () { long t; bar (); return (long) &t * 26107; }
++;;
++;; which run afoul of a hack in reload to cure a (presumably) similar
++;; problem with lea-type instructions on other targets.  But there is
++;; one of us and many of them, so work around the problem by selectively
++;; preventing combine from making the optimization.
++
++(define_predicate "reg_not_elim_operand"
++  (match_operand 0 "register_operand")
++{
++  if (SUBREG_P (op))
++    op = SUBREG_REG (op);
++  return op != frame_pointer_rtx && op != arg_pointer_rtx;
++})
++
++;; Accept a register, but not a subreg of any kind.  This allows us to
++;; avoid pathological cases in reload wrt data movement common in
++;; int->fp conversion.  */
++(define_predicate "reg_no_subreg_operand"
++  (and (match_code "reg")
++       (match_operand 0 "register_operand")))
++
++;; Return 1 if OP is a valid Sw_64 comparison operator for "cbranch"
++;; instructions.
++(define_predicate "sw_64_cbranch_operator"
++  (ior (match_operand 0 "ordered_comparison_operator")
++       (match_code "ordered,unordered")))
++
++;; Return 1 if OP is a valid Sw_64 comparison operator for "cmp" style
++;; instructions.
++(define_predicate "sw_64_comparison_operator"
++  (match_code "eq,le,lt,leu,ltu"))
++
++;; Similarly, but with swapped operands.
++(define_predicate "sw_64_swapped_comparison_operator"
++  (match_code "eq,ge,gt,gtu"))
++
++;; Return 1 if OP is a valid Sw_64 comparison operator against zero
++;; for "bcc" style instructions.
++(define_predicate "sw_64_zero_comparison_operator"
++  (match_code "eq,ne,le,lt,leu,ltu"))
++
++;; Return 1 if OP is a signed comparison operation.
++(define_predicate "signed_comparison_operator"
++  (match_code "eq,ne,le,lt,ge,gt"))
++
++;; Return 1 if OP is a valid Sw_64 floating point comparison operator.
++(define_predicate "sw_64_fp_comparison_operator"
++  (match_code "eq,le,lt,unordered"))
++
++;; Return 1 if this is a divide or modulus operator.
++(define_predicate "divmod_operator"
++  (match_code "div,mod,udiv,umod"))
++
++;; Return 1 if this is a float->int conversion operator.
++(define_predicate "fix_operator"
++  (match_code "fix,unsigned_fix"))
++
++;; Recognize an addition operation that includes a constant.  Used to
++;; convince reload to canonize (plus (plus reg c1) c2) during register
++;; elimination.
++
++(define_predicate "addition_operation"
++  (and (match_code "plus")
++       (match_test "register_operand (XEXP (op, 0), mode)
++		    && satisfies_constraint_K (XEXP (op, 1))")))
++
++;; For TARGET_EXPLICIT_RELOCS, we don't obfuscate a SYMBOL_REF to a
++;; small symbolic operand until after reload.  At which point we need
++;; to replace (mem (symbol_ref)) with (mem (lo_sum $29 symbol_ref))
++;; so that sched2 has the proper dependency information.  */
++(define_predicate "some_small_symbolic_operand"
++  (match_code "set,parallel,prefetch,unspec,unspec_volatile")
++{
++  /* Avoid search unless necessary.  */
++  if (!TARGET_EXPLICIT_RELOCS || !reload_completed)
++    return false;
++  return some_small_symbolic_operand_int (op);
++})
++
++;; Accept a register, or a memory if BWX is enabled.
++(define_predicate "reg_or_bwx_memory_operand"
++  (ior (match_operand 0 "register_operand")
++       (and (match_test "TARGET_BWX")
++	    (match_operand 0 "memory_operand"))))
++
++;; Accept a memory whose address is only a register.
++(define_predicate "mem_noofs_operand"
++  (and (match_code "mem")
++       (match_code "reg" "0")))
++
++(define_predicate "sw_64_branch_combination"
++  (match_code "eq,ne,le,lt,ge,gt,leu,ltu,geu,gtu"))
++
++(define_predicate "sw_64_swapped_branch_combination"
++  (match_code "ne,ge,gt,geu,gtu"))
++
+diff --git a/gcc/config/sw_64/sw6.md b/gcc/config/sw_64/sw6.md
+new file mode 100644
+index 000000000..615ddae70
+--- /dev/null
++++ b/gcc/config/sw_64/sw6.md
+@@ -0,0 +1,181 @@
++;; Scheduling description for Sw_64 SW6.
++;;   Copyright (C) 2002-2020 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++; SW6 can issue 4 insns per clock.  It's out-of-order, so this isn't
++; expected to help over-much, but a precise description can be important
++; for software pipelining.
++;
++; SW6 has two symmetric pairs ("clusters") of two asymmetric integer
++; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
++;
++; ??? The clusters have independent register files that are re-synced
++; every cycle.  Thus there is one additional cycle of latency between
++; insns issued on different clusters.  Possibly model that by duplicating
++; all EBOX insn_reservations that can issue to either cluster, increasing
++; all latencies by one, and adding bypasses within the cluster.
++;
++; ??? In addition, instruction order affects cluster issue.
++
++(define_automaton "sw6_0,sw6_1")
++(define_cpu_unit "sw6_u0,sw6_u1,sw6_l0,sw6_l1" "sw6_0")
++(define_reservation "sw6_u" "sw6_u0|sw6_u1")
++(define_reservation "sw6_l" "sw6_l0|sw6_l1")
++(define_reservation "sw6_ebox" "sw6_u|sw6_l")
++
++(define_cpu_unit "sw6_fa" "sw6_1")
++(define_cpu_unit "sw6_fm,sw6_fst0,sw6_fst1" "sw6_0")
++(define_reservation "sw6_fst" "sw6_fst0|sw6_fst1")
++
++; Assume type "multi" single issues.
++(define_insn_reservation "sw6_multi" 1
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "multi"))
++  "sw6_u0+sw6_u1+sw6_l0+sw6_l1+sw6_fa+sw6_fm+sw6_fst0+sw6_fst1")
++
++; Integer loads take at least 3 clocks, and only issue to lower units.
++; adjust_cost still factors in user-specified memory latency, so return 1 here.
++(define_insn_reservation "sw6_ild" 4
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "ild,ldsym,ld_l"))
++  "sw6_l")
++
++(define_insn_reservation "sw6_ist" 4
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "ist,st_c"))
++  "sw6_l")
++
++(define_insn_reservation "sw6_mb" 1
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "mb"))
++  "sw6_l1")
++
++; FP loads take at least 4 clocks.  adjust_cost still factors
++; in user-specified memory latency, so return 2 here.
++(define_insn_reservation "sw6_fld" 2
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "fld"))
++  "sw6_l")
++
++; The FPU communicates with memory and the integer register file
++; via two fp store units.  We need a slot in the fst immediately, and
++; a slot in LOW after the operand data is ready.  At which point the
++; data may be moved either to the store queue or the integer register
++; file and the insn retired.
++
++(define_insn_reservation "sw6_fst" 3
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "fst"))
++  "sw6_fst,nothing,sw6_l")
++
++; Arithmetic goes anywhere.
++(define_insn_reservation "sw6_arith" 1
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "iadd,ilog,icmp"))
++  "sw6_ebox")
++
++; Motion video insns also issue only to U0, and take three ticks.
++(define_insn_reservation "sw6_mvi" 3
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "mvi"))
++  "sw6_u0")
++
++; Shifts issue to upper units.
++(define_insn_reservation "sw6_shift" 1
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "shift"))
++  "sw6_u")
++
++; Multiplies issue only to U1, and all take 7 ticks.
++(define_insn_reservation "sw6_imul" 7
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "imul"))
++  "sw6_u1")
++
++; Conditional moves decompose into two independent primitives, each taking
++; one cycle.  Since sw6 is out-of-order, we can't see anything but two cycles.
++(define_insn_reservation "sw6_icmov" 2
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "icmov"))
++  "sw6_ebox,sw6_ebox")
++
++; Integer branches issue to upper units
++(define_insn_reservation "sw6_ibr" 1
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "ibr,callpal"))
++  "sw6_u")
++
++; Calls only issue to L0.
++(define_insn_reservation "sw6_jsr" 1
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "call"))
++  "sw6_l0")
++
++; Ftoi/itof only issue to lower pipes.
++(define_insn_reservation "sw6_itof" 3
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "itof"))
++  "sw6_l")
++
++(define_insn_reservation "sw6_ftoi" 3
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "ftoi"))
++  "sw6_fst,nothing,sw6_l")
++
++(define_insn_reservation "sw6_fmul" 4
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "fmul"))
++  "sw6_fm")
++
++(define_insn_reservation "sw6_fadd" 4
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "fadd,fcpys,fbr"))
++  "sw6_fa")
++
++(define_bypass 6 "sw6_fmul,sw6_fadd" "sw6_fst,sw6_ftoi")
++
++(define_insn_reservation "sw6_fcmov" 8
++  (and (eq_attr "tune" "sw6")
++       (eq_attr "type" "fcmov"))
++  "sw6_fa,nothing*3,sw6_fa")
++
++(define_bypass 10 "sw6_fcmov" "sw6_fst,sw6_ftoi")
++
++(define_insn_reservation "sw6_fdivsf" 12
++  (and (eq_attr "tune" "sw6")
++       (and (eq_attr "type" "fdiv")
++	    (eq_attr "opsize" "si")))
++  "sw6_fa*9")
++
++(define_insn_reservation "sw6_fdivdf" 15
++  (and (eq_attr "tune" "sw6")
++       (and (eq_attr "type" "fdiv")
++	    (eq_attr "opsize" "di")))
++  "sw6_fa*12")
++
++(define_insn_reservation "sw6_sqrtsf" 18
++  (and (eq_attr "tune" "sw6")
++       (and (eq_attr "type" "fsqrt")
++	    (eq_attr "opsize" "si")))
++  "sw6_fa*15")
++
++(define_insn_reservation "sw6_sqrtdf" 33
++  (and (eq_attr "tune" "sw6")
++       (and (eq_attr "type" "fsqrt")
++	    (eq_attr "opsize" "di")))
++  "sw6_fa*30")
+diff --git a/gcc/config/sw_64/sw8.md b/gcc/config/sw_64/sw8.md
+new file mode 100644
+index 000000000..414908dbc
+--- /dev/null
++++ b/gcc/config/sw_64/sw8.md
+@@ -0,0 +1,181 @@
++;; Scheduling description for Sw_64 SW8.
++;;   Copyright (C) 2002-2020 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++; SW8 can issue 4 insns per clock.  It's out-of-order, so this isn't
++; expected to help over-much, but a precise description can be important
++; for software pipelining.
++;
++; SW8 has two symmetric pairs ("clusters") of two asymmetric integer
++; units ("upper" and "lower"), yielding pipe names U0, U1, L0, L1.
++;
++; ??? The clusters have independent register files that are re-synced
++; every cycle.  Thus there is one additional cycle of latency between
++; insns issued on different clusters.  Possibly model that by duplicating
++; all EBOX insn_reservations that can issue to either cluster, increasing
++; all latencies by one, and adding bypasses within the cluster.
++;
++; ??? In addition, instruction order affects cluster issue.
++
++(define_automaton "sw8_0,sw8_1")
++(define_cpu_unit "sw8_u0,sw8_u1,sw8_l0,sw8_l1" "sw8_0")
++(define_reservation "sw8_u" "sw8_u0|sw8_u1")
++(define_reservation "sw8_l" "sw8_l0|sw8_l1")
++(define_reservation "sw8_ebox" "sw8_u|sw8_l")
++
++(define_cpu_unit "sw8_fa" "sw8_1")
++(define_cpu_unit "sw8_fm,sw8_fst0,sw8_fst1" "sw8_0")
++(define_reservation "sw8_fst" "sw8_fst0|sw8_fst1")
++
++; Assume type "multi" single issues.
++(define_insn_reservation "sw8_multi" 1
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "multi"))
++  "sw8_u0+sw8_u1+sw8_l0+sw8_l1+sw8_fa+sw8_fm+sw8_fst0+sw8_fst1")
++
++; Integer loads take at least 3 clocks, and only issue to lower units.
++; adjust_cost still factors in user-specified memory latency, so return 1 here.
++(define_insn_reservation "sw8_ild" 4
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "ild,ldsym,ld_l"))
++  "sw8_l")
++
++(define_insn_reservation "sw8_ist" 4
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "ist,st_c"))
++  "sw8_l")
++
++(define_insn_reservation "sw8_mb" 1
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "mb"))
++  "sw8_l1")
++
++; FP loads take at least 4 clocks.  adjust_cost still factors
++; in user-specified memory latency, so return 2 here.
++(define_insn_reservation "sw8_fld" 2
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "fld"))
++  "sw8_l")
++
++; The FPU communicates with memory and the integer register file
++; via two fp store units.  We need a slot in the fst immediately, and
++; a slot in LOW after the operand data is ready.  At which point the
++; data may be moved either to the store queue or the integer register
++; file and the insn retired.
++
++(define_insn_reservation "sw8_fst" 3
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "fst"))
++  "sw8_fst,nothing,sw8_l")
++
++; Arithmetic goes anywhere.
++(define_insn_reservation "sw8_arith" 1
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "iadd,ilog,icmp"))
++  "sw8_ebox")
++
++; Motion video insns also issue only to U0, and take three ticks.
++(define_insn_reservation "sw8_mvi" 3
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "mvi"))
++  "sw8_u0")
++
++; Shifts issue to upper units.
++(define_insn_reservation "sw8_shift" 1
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "shift"))
++  "sw8_u")
++
++; Multiplies issue only to U1, and all take 7 ticks.
++(define_insn_reservation "sw8_imul" 7
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "imul"))
++  "sw8_u1")
++
++; Conditional moves decompose into two independent primitives, each taking
++; one cycle.  Since sw8 is out-of-order, we can't see anything but two cycles.
++(define_insn_reservation "sw8_icmov" 2
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "icmov"))
++  "sw8_ebox,sw8_ebox")
++
++; Integer branches issue to upper units
++(define_insn_reservation "sw8_ibr" 1
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "ibr,callpal"))
++  "sw8_u")
++
++; Calls only issue to L0.
++(define_insn_reservation "sw8_jsr" 1
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "call"))
++  "sw8_l0")
++
++; Ftoi/itof only issue to lower pipes.
++(define_insn_reservation "sw8_itof" 3
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "itof"))
++  "sw8_l")
++
++(define_insn_reservation "sw8_ftoi" 3
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "ftoi"))
++  "sw8_fst,nothing,sw8_l")
++
++(define_insn_reservation "sw8_fmul" 4
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "fmul"))
++  "sw8_fm")
++
++(define_insn_reservation "sw8_fadd" 4
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "fadd,fcpys,fbr"))
++  "sw8_fa")
++
++(define_bypass 6 "sw8_fmul,sw8_fadd" "sw8_fst,sw8_ftoi")
++
++(define_insn_reservation "sw8_fcmov" 8
++  (and (eq_attr "tune" "sw8")
++       (eq_attr "type" "fcmov"))
++  "sw8_fa,nothing*3,sw8_fa")
++
++(define_bypass 10 "sw8_fcmov" "sw8_fst,sw8_ftoi")
++
++(define_insn_reservation "sw8_fdivsf" 12
++  (and (eq_attr "tune" "sw8")
++       (and (eq_attr "type" "fdiv")
++	    (eq_attr "opsize" "si")))
++  "sw8_fa*9")
++
++(define_insn_reservation "sw8_fdivdf" 15
++  (and (eq_attr "tune" "sw8")
++       (and (eq_attr "type" "fdiv")
++	    (eq_attr "opsize" "di")))
++  "sw8_fa*12")
++
++(define_insn_reservation "sw8_sqrtsf" 18
++  (and (eq_attr "tune" "sw8")
++       (and (eq_attr "type" "fsqrt")
++	    (eq_attr "opsize" "si")))
++  "sw8_fa*15")
++
++(define_insn_reservation "sw8_sqrtdf" 33
++  (and (eq_attr "tune" "sw8")
++       (and (eq_attr "type" "fsqrt")
++	    (eq_attr "opsize" "di")))
++  "sw8_fa*30")
+diff --git a/gcc/config/sw_64/sw_64-modes.def b/gcc/config/sw_64/sw_64-modes.def
+new file mode 100644
+index 000000000..537a1b654
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64-modes.def
+@@ -0,0 +1,27 @@
++/* Sw_64 extra machine modes.
++   Copyright (C) 2003-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* 128-bit floating point.  This gets reset in sw_64_option_override
++   if VAX float format is in use.  */
++FLOAT_MODE (TF, 16, ieee_quad_format);
++
++/* Vector modes.  */
++VECTOR_MODES (INT, 8);	/*       V8QI V4HI V2SI.  */
++VECTOR_MODE (INT, QI, 4);     /*		 V4QI.  */
++VECTOR_MODE (INT, QI, 2);     /*		 V2QI.  */
+diff --git a/gcc/config/sw_64/sw_64-passes.def b/gcc/config/sw_64/sw_64-passes.def
+new file mode 100644
+index 000000000..9d3964cdb
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64-passes.def
+@@ -0,0 +1,21 @@
++/* Description of target passes for Sw_64
++   Copyright (C) 2016-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify it under
++the terms of the GNU General Public License as published by the Free
++Software Foundation; either version 3, or (at your option) any later
++version.
++
++GCC is distributed in the hope that it will be useful, but WITHOUT ANY
++WARRANTY; without even the implied warranty of MERCHANTABILITY or
++FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++  INSERT_PASS_AFTER (pass_convert_to_eh_region_ranges, 1, pass_handle_trap_shadows);
++  INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_insns);
+diff --git a/gcc/config/sw_64/sw_64-protos.h b/gcc/config/sw_64/sw_64-protos.h
+new file mode 100644
+index 000000000..c20a1cfec
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64-protos.h
+@@ -0,0 +1,146 @@
++/* Prototypes for sw_64.c functions used in the md file & elsewhere.
++   Copyright (C) 1999-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++extern int sw_64_next_sequence_number;
++
++extern void
++literal_section (void);
++extern int zap_mask (HOST_WIDE_INT);
++extern bool
++direct_return (void);
++
++extern HOST_WIDE_INT
++sw_64_initial_elimination_offset (unsigned int, unsigned int);
++extern void
++sw_64_expand_prologue (void);
++extern void
++sw_64_expand_epilogue (void);
++extern void
++sw_64_output_filename (FILE *, const char *);
++
++extern bool sw_64_legitimate_constant_p (machine_mode, rtx);
++extern rtx
++sw_64_legitimize_reload_address (rtx, machine_mode, int, int, int);
++
++extern rtx split_small_symbolic_operand (rtx);
++
++extern void
++get_aligned_mem (rtx, rtx *, rtx *);
++extern rtx get_unaligned_address (rtx);
++extern rtx get_unaligned_offset (rtx, HOST_WIDE_INT);
++extern enum reg_class sw_64_preferred_reload_class (rtx, enum reg_class);
++
++extern void sw_64_set_memflags (rtx, rtx);
++extern bool
++sw_64_split_const_mov (machine_mode, rtx *);
++extern bool
++sw_64_expand_mov (machine_mode, rtx *);
++extern bool
++sw_64_expand_mov_nobwx (machine_mode, rtx *);
++extern void
++sw_64_expand_movmisalign (machine_mode, rtx *);
++extern void sw_64_emit_floatuns (rtx[]);
++extern rtx sw_64_emit_conditional_move (rtx, machine_mode);
++extern void
++sw_64_split_tmode_pair (rtx[], machine_mode, bool);
++extern void sw_64_split_tfmode_frobsign (rtx[], rtx (*) (rtx, rtx, rtx));
++extern void
++sw_64_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT, HOST_WIDE_INT, int);
++extern void sw_64_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT,
++					  HOST_WIDE_INT);
++extern int sw_64_expand_block_move (rtx[]);
++extern int sw_64_expand_block_clear (rtx[]);
++extern rtx sw_64_expand_zap_mask (HOST_WIDE_INT);
++extern void sw_64_expand_builtin_vector_binop (rtx (*) (rtx, rtx, rtx),
++					       machine_mode, rtx, rtx, rtx);
++
++extern rtx
++sw_64_return_addr (int, rtx);
++extern rtx
++sw_64_gp_save_rtx (void);
++extern void
++sw_64_initialize_trampoline (rtx, rtx, rtx, int, int, int);
++
++extern rtx sw_64_va_arg (tree, tree);
++
++extern void
++sw_64_start_function (FILE *, const char *, tree);
++extern void
++sw_64_end_function (FILE *, const char *, tree);
++
++extern bool sw_64_find_lo_sum_using_gp (rtx);
++
++#ifdef REAL_VALUE_TYPE
++extern int
++check_float_value (machine_mode, REAL_VALUE_TYPE *, int);
++#endif
++
++#ifdef RTX_CODE
++extern void sw_64_emit_conditional_branch (rtx[], machine_mode);
++extern bool sw_64_emit_setcc (rtx[], machine_mode);
++extern int sw_64_split_conditional_move (enum rtx_code, rtx, rtx, rtx, rtx);
++extern void sw_64_emit_xfloating_arith (enum rtx_code, rtx[]);
++extern void sw_64_emit_xfloating_cvt (enum rtx_code, rtx[]);
++extern void sw_64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx,
++				   enum memmodel);
++extern void
++sw_64_split_compare_and_swap (rtx op[]);
++extern void
++sw_64_expand_compare_and_swap_12 (rtx op[]);
++extern void
++sw_64_split_compare_and_swap_12 (rtx op[]);
++extern void
++sw_64_split_atomic_exchange (rtx op[]);
++extern void
++sw_64_expand_atomic_exchange_12 (rtx op[]);
++extern void
++sw_64_split_atomic_exchange_12 (rtx op[]);
++#endif
++
++extern void
++sw_64_split_atomic_cas (rtx op[]);
++extern void
++sw_64_split_atomic_cas_12 (rtx op[]);
++
++extern rtx
++sw_64_use_linkage (rtx, bool, bool);
++
++extern rtx unicosmk_add_call_info_word (rtx);
++
++extern bool some_small_symbolic_operand_int (rtx);
++extern int
++tls_symbolic_operand_1 (rtx, int, int);
++extern rtx resolve_reload_operand (rtx);
++
++namespace gcc {
++class context;
++}
++class rtl_opt_pass;
++
++extern rtl_opt_pass *
++make_pass_handle_trap_shadows (gcc::context *);
++extern rtl_opt_pass *
++make_pass_align_insns (gcc::context *);
++
++extern void
++sw_64_emit_rsqrt (rtx, rtx, bool);
++
++extern void
++sw_64_emit_swdiv (rtx, rtx, rtx, bool);
++extern rtx gen_move_reg (rtx);
+diff --git a/gcc/config/sw_64/sw_64.c b/gcc/config/sw_64/sw_64.c
+new file mode 100644
+index 000000000..68b85b828
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64.c
+@@ -0,0 +1,10058 @@
++/* Subroutines used for code generation on the Sw_64.
++   Copyright (C) 1992-2020 Free Software Foundation, Inc.
++   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++#define IN_TARGET_CODE 1
++
++#include "config.h"
++#include "system.h"
++#include "coretypes.h"
++#include "backend.h"
++#include "target.h"
++#include "rtl.h"
++#include "tree.h"
++#include "stringpool.h"
++#include "attribs.h"
++#include "memmodel.h"
++#include "gimple.h"
++#include "df.h"
++#include "predict.h"
++#include "tm_p.h"
++#include "ssa.h"
++#include "expmed.h"
++#include "optabs.h"
++#include "regs.h"
++#include "emit-rtl.h"
++#include "recog.h"
++#include "diagnostic-core.h"
++#include "alias.h"
++#include "fold-const.h"
++#include "stor-layout.h"
++#include "calls.h"
++#include "varasm.h"
++#include "output.h"
++#include "insn-attr.h"
++#include "explow.h"
++#include "expr.h"
++#include "reload.h"
++#include "except.h"
++#include "common/common-target.h"
++#include "debug.h"
++#include "langhooks.h"
++#include "cfgrtl.h"
++#include "tree-pass.h"
++#include "context.h"
++#include "gimple-iterator.h"
++#include "gimplify.h"
++#include "tree-stdarg.h"
++#include "tm-constrs.h"
++#include "libfuncs.h"
++#include "opts.h"
++#include "builtins.h"
++#include "rtl-iter.h"
++#include "asan.h"
++
++#include "flags.h"
++/* This file should be included last.  */
++#include "target-def.h"
++
++/* Specify which cpu to schedule for.  */
++enum processor_type sw_64_tune;
++
++/* Which cpu we're generating code for.  */
++enum processor_type sw_64_cpu;
++
++static const char *const sw_64_cpu_name[] = {"sw6", "sw8a"};
++
++/* Specify how accurate floating-point traps need to be.  */
++
++enum sw_64_trap_precision sw_64_tp;
++
++/* Specify the floating-point rounding mode.  */
++
++enum sw_64_fp_rounding_mode sw_64_fprm;
++
++/* Specify which things cause traps.  */
++
++enum sw_64_fp_trap_mode sw_64_fptm;
++
++/* Nonzero if inside of a function, because the Sw_64 asm can't
++   handle .files inside of functions.  */
++
++static int inside_function = FALSE;
++
++/* The number of cycles of latency we should assume on memory reads.  */
++
++static int sw_64_memory_latency = 3;
++
++/* Whether the function needs the GP.  */
++
++static int sw_64_function_needs_gp;
++
++/* The assembler name of the current function.  */
++
++static const char *sw_64_fnname;
++
++/* The next explicit relocation sequence number.  */
++extern GTY (()) int sw_64_next_sequence_number;
++int sw_64_next_sequence_number = 1;
++
++int stfp3_flag;
++extern int flag_fpcr_set;
++
++int warning_sbt_num = 0;
++int warning_cbt_num = 0;
++
++/* The literal and gpdisp sequence numbers for this insn, as printed
++   by %# and %* respectively.  */
++extern GTY (()) int sw_64_this_literal_sequence_number;
++extern GTY (()) int sw_64_this_gpdisp_sequence_number;
++int sw_64_this_literal_sequence_number;
++int sw_64_this_gpdisp_sequence_number;
++
++/* Costs of various operations on the different architectures.  */
++
++struct sw_64_rtx_cost_data
++{
++  unsigned char fp_add;
++  unsigned char fp_mult;
++  unsigned char fp_div_sf;
++  unsigned char fp_div_df;
++  unsigned char int_mult_si;
++  unsigned char int_mult_di;
++  unsigned char int_shift;
++  unsigned char int_cmov;
++  unsigned short int_div;
++};
++
++static struct sw_64_rtx_cost_data const sw_64_rtx_cost_data[PROCESSOR_MAX + 1]
++  = {
++    {
++      /* sw6b */
++      COSTS_N_INSNS (6),  /* fp_add */
++      COSTS_N_INSNS (6),  /* fp_mult */
++      COSTS_N_INSNS (19), /* fp_div_sf */
++      COSTS_N_INSNS (19), /* fp_div_df */
++      COSTS_N_INSNS (4),  /* int_mult_si */
++      COSTS_N_INSNS (4),  /* int_mult_di */
++      COSTS_N_INSNS (1),  /* int_shift */
++      COSTS_N_INSNS (1),  /* int_cmov */
++      COSTS_N_INSNS (83), /* int_div */
++    },
++    {
++      /* sw8a */
++      COSTS_N_INSNS (6),  /* fp_add */
++      COSTS_N_INSNS (6),  /* fp_mult */
++      COSTS_N_INSNS (19), /* fp_div_sf */
++      COSTS_N_INSNS (19), /* fp_div_df */
++      COSTS_N_INSNS (4),  /* int_mult_si */
++      COSTS_N_INSNS (4),  /* int_mult_di */
++      COSTS_N_INSNS (1),  /* int_shift */
++      COSTS_N_INSNS (1),  /* int_cmov */
++      COSTS_N_INSNS (20), /* int_div */
++    },
++    {
++      /* rtx-cost */
++      COSTS_N_INSNS (6),  /* fp_add */
++      COSTS_N_INSNS (6),  /* fp_mult */
++      COSTS_N_INSNS (19), /* fp_div_sf */
++      COSTS_N_INSNS (19), /* fp_div_df */
++      COSTS_N_INSNS (4),  /* int_mult_si */
++      COSTS_N_INSNS (4),  /* int_mult_di */
++      COSTS_N_INSNS (3),  /* int_shift */
++      COSTS_N_INSNS (1),  /* int_cmov */
++      COSTS_N_INSNS (20), /* int_div */
++    },
++};
++
++/* Similar but tuned for code size instead of execution latency.  The
++   extra +N is fractional cost tuning based on latency.  It's used to
++   encourage use of cheaper insns like shift, but only if there's just
++   one of them.  */
++
++static struct sw_64_rtx_cost_data const sw_64_rtx_cost_size = {
++  COSTS_N_INSNS (1),     /* fp_add */
++  COSTS_N_INSNS (1),     /* fp_mult */
++  COSTS_N_INSNS (1),     /* fp_div_sf */
++  COSTS_N_INSNS (1) + 1, /* fp_div_df */
++  COSTS_N_INSNS (1) + 1, /* int_mult_si */
++  COSTS_N_INSNS (1) + 2, /* int_mult_di */
++  COSTS_N_INSNS (1),     /* int_shift */
++  COSTS_N_INSNS (1),     /* int_cmov */
++  COSTS_N_INSNS (6),     /* int_div */
++};
++
++/* Get the number of args of a function in one of two ways.  */
++#define NUM_ARGS crtl->args.info
++
++#define REG_PV 27
++#define REG_RA 26
++
++/* Declarations of static functions.  */
++static struct machine_function *
++sw_64_init_machine_status (void);
++static rtx
++sw_64_emit_xfloating_compare (enum rtx_code *, rtx, rtx);
++static void
++sw_64_handle_trap_shadows (void);
++static void
++sw_64_align_insns (void);
++static void
++sw_64_override_options_after_change (void);
++
++static unsigned int
++rest_of_handle_trap_shadows (void)
++{
++  sw_64_handle_trap_shadows ();
++  return 0;
++}
++
++namespace {
++
++const pass_data pass_data_handle_trap_shadows = {
++  RTL_PASS,
++  "trap_shadows", /* name */
++  OPTGROUP_NONE,  /* optinfo_flags */
++  TV_NONE,	/* tv_id */
++  0,		  /* properties_required */
++  0,		  /* properties_provided */
++  0,		  /* properties_destroyed */
++  0,		  /* todo_flags_start */
++  TODO_df_finish, /* todo_flags_finish */
++};
++
++class pass_handle_trap_shadows : public rtl_opt_pass
++{
++public:
++  pass_handle_trap_shadows (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_handle_trap_shadows, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *)
++  {
++    return sw_64_tp != SW_64_TP_PROG || flag_exceptions;
++  }
++
++  virtual unsigned int execute (function *)
++  {
++    return rest_of_handle_trap_shadows ();
++  }
++
++}; // class pass_handle_trap_shadows
++
++} // namespace
++
++rtl_opt_pass *
++make_pass_handle_trap_shadows (gcc::context *ctxt)
++{
++  return new pass_handle_trap_shadows (ctxt);
++}
++
++static unsigned int
++rest_of_align_insns (void)
++{
++  sw_64_align_insns ();
++  return 0;
++}
++
++namespace {
++
++const pass_data pass_data_align_insns = {
++  RTL_PASS,
++  "align_insns",  /* name */
++  OPTGROUP_NONE,  /* optinfo_flags */
++  TV_NONE,	/* tv_id */
++  0,		  /* properties_required */
++  0,		  /* properties_provided */
++  0,		  /* properties_destroyed */
++  0,		  /* todo_flags_start */
++  TODO_df_finish, /* todo_flags_finish */
++};
++
++class pass_align_insns : public rtl_opt_pass
++{
++public:
++  pass_align_insns (gcc::context *ctxt)
++    : rtl_opt_pass (pass_data_align_insns, ctxt)
++  {}
++
++  /* opt_pass methods: */
++  virtual bool gate (function *)
++  {
++    /* Due to the number of extra memb insns, don't bother fixing up
++       alignment when trap precision is instruction.  Moreover, we can
++       only do our job when sched2 is run.  */
++    return ((sw_64_tune != PROCESSOR_SW6 && sw_64_tune != PROCESSOR_SW8)
++	    && optimize && !optimize_size && sw_64_tp != SW_64_TP_INSN
++	    && flag_schedule_insns_after_reload);
++  }
++
++  virtual unsigned int execute (function *) { return rest_of_align_insns (); }
++
++}; // class pass_align_insns
++
++} // namespace
++
++rtl_opt_pass *
++make_pass_align_insns (gcc::context *ctxt)
++{
++  return new pass_align_insns (ctxt);
++}
++
++#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
++/* Implement TARGET_MANGLE_TYPE.  */
++
++static const char *
++sw_64_mangle_type (const_tree type)
++{
++  if (TYPE_MAIN_VARIANT (type) == long_double_type_node
++      && TARGET_LONG_DOUBLE_128)
++    return "g";
++
++  /* For all other types, use normal C++ mangling.  */
++  return NULL;
++}
++#endif
++
++/* Parse target option strings.  */
++
++static void
++sw_64_option_override (void)
++{
++  static const struct cpu_table
++  {
++    const char *const name;
++    const enum processor_type processor;
++    const int flags;
++    const unsigned short line_size; /* in bytes.  */
++    const unsigned short l1_size;   /* in kb.  */
++    const unsigned short l2_size;   /* in kb.  */
++  } cpu_table[] = {
++    {"sw6a", PROCESSOR_SW6, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW6A, 128, 32,
++     512},
++    {"sw6b", PROCESSOR_SW6, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW6B, 128, 32,
++     512},
++    {"sw8a", PROCESSOR_SW8, MASK_BWX | MASK_CIX | MASK_FIX | MASK_SW8A, 128, 32,
++     512},
++  };
++
++  int const ct_size = ARRAY_SIZE (cpu_table);
++  int line_size = 0, l1_size = 0, l2_size = 0;
++  int i;
++
++#ifdef SUBTARGET_OVERRIDE_OPTIONS
++  SUBTARGET_OVERRIDE_OPTIONS;
++#endif
++
++  /* Default to full IEEE compliance mode for Go language.  */
++  if (strcmp (lang_hooks.name, "GNU Go") == 0
++      && !(target_flags_explicit & MASK_IEEE))
++    target_flags |= MASK_IEEE;
++
++  sw_64_fprm = SW_64_FPRM_NORM;
++  sw_64_tp = SW_64_TP_PROG;
++  sw_64_fptm = SW_64_FPTM_N;
++
++  if (TARGET_IEEE)
++    {
++      sw_64_tp = SW_64_TP_INSN;
++      sw_64_fptm = SW_64_FPTM_SU;
++    }
++  if (TARGET_IEEE_WITH_INEXACT)
++    {
++      sw_64_tp = SW_64_TP_INSN;
++      sw_64_fptm = SW_64_FPTM_SUI;
++    }
++  if (TARGET_IEEE_MAIN)
++    {
++      sw_64_tp = SW_64_TP_INSN;
++      sw_64_fptm = SW_64_FPTM_SU;
++    }
++
++  if (sw_64_tp_string)
++    {
++      if (!strcmp (sw_64_tp_string, "p"))
++	sw_64_tp = SW_64_TP_PROG;
++      else if (!strcmp (sw_64_tp_string, "f"))
++	sw_64_tp = SW_64_TP_FUNC;
++      else if (!strcmp (sw_64_tp_string, "i"))
++	sw_64_tp = SW_64_TP_INSN;
++      else
++	error ("bad value %qs for %<-mtrap-precision%> switch",
++	       sw_64_tp_string);
++    }
++
++  if (sw_64_fprm_string)
++    {
++      if (!strcmp (sw_64_fprm_string, "n"))
++	sw_64_fprm = SW_64_FPRM_NORM;
++      else if (!strcmp (sw_64_fprm_string, "m"))
++	sw_64_fprm = SW_64_FPRM_MINF;
++      else if (!strcmp (sw_64_fprm_string, "c"))
++	sw_64_fprm = SW_64_FPRM_CHOP;
++      else if (!strcmp (sw_64_fprm_string, "d"))
++	sw_64_fprm = SW_64_FPRM_DYN;
++      else
++	error ("bad value %qs for %<-mfp-rounding-mode%> switch",
++	       sw_64_fprm_string);
++    }
++
++  if (sw_64_fptm_string)
++    {
++      if (strcmp (sw_64_fptm_string, "n") == 0)
++	sw_64_fptm = SW_64_FPTM_N;
++      else if (strcmp (sw_64_fptm_string, "u") == 0)
++	sw_64_fptm = SW_64_FPTM_U;
++      else if (strcmp (sw_64_fptm_string, "su") == 0)
++	sw_64_fptm = SW_64_FPTM_SU;
++      else if (strcmp (sw_64_fptm_string, "sui") == 0)
++	sw_64_fptm = SW_64_FPTM_SUI;
++      else
++	error ("bad value %qs for %<-mfp-trap-mode%> switch",
++	       sw_64_fptm_string);
++    }
++
++  if (sw_64_cpu_string)
++    {
++      for (i = 0; i < ct_size; i++)
++	if (!strcmp (sw_64_cpu_string, cpu_table[i].name))
++	  {
++	    sw_64_tune = sw_64_cpu = cpu_table[i].processor;
++	    line_size = cpu_table[i].line_size;
++	    l1_size = cpu_table[i].l1_size;
++	    l2_size = cpu_table[i].l2_size;
++	    target_flags &= ~(MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX
++			      | MASK_SW6A | MASK_SW6B | MASK_SW8A);
++	    target_flags |= cpu_table[i].flags;
++	    break;
++	  }
++      if (i == ct_size)
++	error ("bad value %qs for %<-mcpu%> switch", sw_64_cpu_string);
++    }
++
++  if (sw_64_tune_string)
++    {
++      for (i = 0; i < ct_size; i++)
++	if (!strcmp (sw_64_tune_string, cpu_table[i].name))
++	  {
++	    sw_64_tune = cpu_table[i].processor;
++	    line_size = cpu_table[i].line_size;
++	    l1_size = cpu_table[i].l1_size;
++	    l2_size = cpu_table[i].l2_size;
++	    break;
++	  }
++      if (i == ct_size)
++	error ("bad value %qs for %<-mtune%> switch", sw_64_tune_string);
++    }
++  if (line_size)
++    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++			 param_l1_cache_line_size, line_size);
++  if (l1_size)
++    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++			 param_l1_cache_size, l1_size);
++  if (l2_size)
++    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++			 param_l2_cache_size, l2_size);
++
++  // generate prefetch for cases like stream add
++  if (flag_sw_prefetch_add == 1)
++    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++			 param_prefetch_min_insn_to_mem_ratio, 2);
++
++  if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch)
++    flag_prefetch_loop_arrays = 1;
++
++   /* set simultaneous prefetches and latency for sw
++    *  *     need add some conditions to decide what the cpu kind.  */
++  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++		       param_simultaneous_prefetches, 8);
++
++  if (flag_sw_prefetch_unroll == 1)
++    {
++      SET_OPTION_IF_UNSET (&global_options, &global_options_set,
++			   param_max_unrolled_insns, 400);
++    }
++  /* Do some sanity checks on the above options.  */
++
++  if ((sw_64_fptm == SW_64_FPTM_SU || sw_64_fptm == SW_64_FPTM_SUI)
++      && sw_64_tp != SW_64_TP_INSN && sw_64_cpu != PROCESSOR_SW6
++      && sw_64_cpu != PROCESSOR_SW8)
++    {
++      warning (0, "fp software completion requires %<-mtrap-precision=i%>");
++      sw_64_tp = SW_64_TP_INSN;
++    }
++
++  if (sw_64_cpu == PROCESSOR_SW6 || sw_64_cpu == PROCESSOR_SW8)
++    {
++      /* Except for SW6 pass 1 (not released), we always have precise
++	 arithmetic traps.  Which means we can do software completion
++	 without minding trap shadows.  */
++      sw_64_tp = SW_64_TP_PROG;
++    }
++
++  if (TARGET_FLOAT_VAX)
++    {
++      if (sw_64_fprm == SW_64_FPRM_MINF || sw_64_fprm == SW_64_FPRM_DYN)
++	{
++	  warning (0, "rounding mode not supported for VAX floats");
++	  sw_64_fprm = SW_64_FPRM_NORM;
++	}
++      if (sw_64_fptm == SW_64_FPTM_SUI)
++	{
++	  warning (0, "trap mode not supported for VAX floats");
++	  sw_64_fptm = SW_64_FPTM_SU;
++	}
++      if (target_flags_explicit & MASK_LONG_DOUBLE_128)
++	warning (0, "128-bit long double not supported for VAX floats");
++      target_flags &= ~MASK_LONG_DOUBLE_128;
++    }
++
++  {
++    char *end;
++    int lat;
++
++    if (!sw_64_mlat_string)
++      sw_64_mlat_string = "L1";
++
++    if (ISDIGIT ((unsigned char) sw_64_mlat_string[0])
++	&& (lat = strtol (sw_64_mlat_string, &end, 10), *end == '\0'))
++      ;
++    else if ((sw_64_mlat_string[0] == 'L' || sw_64_mlat_string[0] == 'l')
++	     && ISDIGIT ((unsigned char) sw_64_mlat_string[1])
++	     && sw_64_mlat_string[2] == '\0')
++      {
++	static int cache_latency[][4] = {
++	  {3, 12, 30}, /* sw6 -- Bcache from LMbench.  */
++	  //	  { 4, 15, 90 },	/* sw6b -- Bcache from LMbench.  */
++	  {3, 7, 11}, /* sw8a -- Bcache from LMbench.  */
++	};
++	if (flag_sw_rtx_cost)
++	  {
++	    cache_latency[sw_64_tune][0] = 3;
++	    cache_latency[sw_64_tune][1] = 7;
++	    cache_latency[sw_64_tune][2] = 11;
++	  }
++
++	lat = sw_64_mlat_string[1] - '0';
++	if (lat <= 0 || lat > 3 || cache_latency[sw_64_tune][lat - 1] == -1)
++	  {
++	    warning (0, "L%d cache latency unknown for %s", lat,
++		     sw_64_cpu_name[sw_64_tune]);
++	    lat = 3;
++	  }
++	else
++	  lat = cache_latency[sw_64_tune][lat - 1];
++      }
++    else if (!strcmp (sw_64_mlat_string, "main"))
++      {
++	/* Most current memories have about 370ns latency.  This is
++	   a reasonable guess for a fast cpu.  */
++	lat = 150;
++      }
++    else
++      {
++	warning (0, "bad value %qs for %<-mmemory-latency%>",
++		 sw_64_mlat_string);
++	lat = 3;
++      }
++
++    sw_64_memory_latency = lat;
++  }
++
++  /* Default the definition of "small data" to 8 bytes.  */
++  if (!global_options_set.x_g_switch_value)
++    g_switch_value = 8;
++
++  /* Infer TARGET_SMALL_DATA from -fpic/-fPIC.  */
++  if (flag_pic == 1)
++    target_flags |= MASK_SMALL_DATA;
++  else if (flag_pic == 2)
++    target_flags &= ~MASK_SMALL_DATA;
++
++  sw_64_override_options_after_change ();
++
++  /* Register variables and functions with the garbage collector.  */
++
++  /* Set up function hooks.  */
++  init_machine_status = sw_64_init_machine_status;
++
++  /* Tell the compiler when we're using VAX floating point.  */
++  if (TARGET_FLOAT_VAX)
++    {
++      REAL_MODE_FORMAT (SFmode) = &vax_f_format;
++      REAL_MODE_FORMAT (DFmode) = &vax_g_format;
++      REAL_MODE_FORMAT (TFmode) = NULL;
++    }
++
++#ifdef TARGET_DEFAULT_LONG_DOUBLE_128
++  if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
++    target_flags |= MASK_LONG_DOUBLE_128;
++#endif
++}
++
++/* Implement targetm.override_options_after_change.  */
++
++static void
++sw_64_override_options_after_change (void)
++{
++  /* Align labels and loops for optimal branching.  */
++  /* ??? Kludge these by not doing anything if we don't optimize.  */
++  if (optimize > 0)
++    {
++      if (flag_align_loops && !str_align_loops)
++	str_align_loops = "16";
++      if (flag_align_jumps && !str_align_jumps)
++	str_align_jumps = "16";
++    }
++  if (flag_align_functions && !str_align_functions)
++    str_align_functions = "16";
++}
++
++/* Returns 1 if VALUE is a mask that contains full bytes of zero or ones.  */
++
++int
++zap_mask (HOST_WIDE_INT value)
++{
++  int i;
++
++  for (i = 0; i < HOST_BITS_PER_WIDE_INT / HOST_BITS_PER_CHAR; i++, value >>= 8)
++    if ((value & 0xff) != 0 && (value & 0xff) != 0xff)
++      return 0;
++
++  return 1;
++}
++
++/* Return true if OP is valid for a particular TLS relocation.
++   We are already guaranteed that OP is a CONST.  */
++
++int
++tls_symbolic_operand_1 (rtx op, int size, int unspec)
++{
++  op = XEXP (op, 0);
++
++  if (GET_CODE (op) != UNSPEC || XINT (op, 1) != unspec)
++    return 0;
++  op = XVECEXP (op, 0, 0);
++
++  if (GET_CODE (op) != SYMBOL_REF)
++    return 0;
++
++  switch (SYMBOL_REF_TLS_MODEL (op))
++    {
++    case TLS_MODEL_LOCAL_DYNAMIC:
++      return unspec == UNSPEC_DTPREL && size == sw_64_tls_size;
++    case TLS_MODEL_INITIAL_EXEC:
++      return unspec == UNSPEC_TPREL && size == 64;
++    case TLS_MODEL_LOCAL_EXEC:
++      return unspec == UNSPEC_TPREL && size == sw_64_tls_size;
++    default:
++      gcc_unreachable ();
++    }
++}
++
++/* Used by aligned_memory_operand and unaligned_memory_operand to
++   resolve what reload is going to do with OP if it's a register.  */
++
++rtx
++resolve_reload_operand (rtx op)
++{
++  if (reload_in_progress)
++    {
++      rtx tmp = op;
++      if (SUBREG_P (tmp))
++	tmp = SUBREG_REG (tmp);
++      if (REG_P (tmp) && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
++	{
++	  op = reg_equiv_memory_loc (REGNO (tmp));
++	  if (op == 0)
++	    return 0;
++	}
++    }
++  return op;
++}
++
++/* The scalar modes supported differs from the default check-what-c-supports
++   version in that sometimes TFmode is available even when long double
++   indicates only DFmode.  */
++
++static bool
++sw_64_scalar_mode_supported_p (scalar_mode mode)
++{
++  switch (mode)
++    {
++    case E_QImode:
++    case E_HImode:
++    case E_SImode:
++    case E_DImode:
++    case E_TImode: /* via optabs.c.  */
++      return true;
++
++    case E_SFmode:
++    case E_DFmode:
++      return true;
++
++    case E_TFmode:
++      return TARGET_HAS_XFLOATING_LIBS;
++
++    default:
++      return false;
++    }
++}
++
++/* Sw_64 implements a couple of integer vector mode operations when
++   TARGET_MAX is enabled.  We do not check TARGET_MAX here, however,
++   which allows the vectorizer to operate on e.g. move instructions,
++   or when expand_vector_operations can do something useful.  */
++
++static bool
++sw_64_vector_mode_supported_p (machine_mode mode)
++{
++  return mode == V8QImode || mode == V4HImode || mode == V2SImode;
++}
++
++/* Return the TLS model to use for SYMBOL.  */
++
++static enum tls_model
++tls_symbolic_operand_type (rtx symbol)
++{
++  enum tls_model model;
++
++  if (GET_CODE (symbol) != SYMBOL_REF)
++    return TLS_MODEL_NONE;
++  model = SYMBOL_REF_TLS_MODEL (symbol);
++
++  /* Local-exec with a 64-bit size is the same code as initial-exec.  */
++  if (model == TLS_MODEL_LOCAL_EXEC && sw_64_tls_size == 64)
++    model = TLS_MODEL_INITIAL_EXEC;
++
++  return model;
++}
++
++/* Return true if the function DECL will share the same GP as any
++   function in the current unit of translation.  */
++
++static bool
++decl_has_samegp (const_tree decl)
++{
++  /* Functions that are not local can be overridden, and thus may
++     not share the same gp.  */
++  if (!(*targetm.binds_local_p) (decl))
++    return false;
++
++  /* If -msmall-data is in effect, assume that there is only one GP
++     for the module, and so any local symbol has this property.  We
++     need explicit relocations to be able to enforce this for symbols
++     not defined in this unit of translation, however.  */
++  if (TARGET_EXPLICIT_RELOCS && TARGET_SMALL_DATA)
++    return true;
++
++  /* Functions that are not external are defined in this UoT.  */
++  /* ??? Irritatingly, static functions not yet emitted are still
++     marked "external".  Apply this to non-static functions only.  */
++  return !TREE_PUBLIC (decl) || !DECL_EXTERNAL (decl);
++}
++
++/* Return true if EXP should be placed in the small data section.  */
++
++static bool
++sw_64_in_small_data_p (const_tree exp)
++{
++  /* We want to merge strings, so we never consider them small data.  */
++  if (TREE_CODE (exp) == STRING_CST)
++    return false;
++
++  /* Functions are never in the small data area.  Duh.  */
++  if (TREE_CODE (exp) == FUNCTION_DECL)
++    return false;
++
++  /* COMMON symbols are never small data.  */
++  if (TREE_CODE (exp) == VAR_DECL && DECL_COMMON (exp))
++    return false;
++
++  if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
++    {
++      const char *section = DECL_SECTION_NAME (exp);
++      if (strcmp (section, ".sdata") == 0 || strcmp (section, ".sbss") == 0)
++	return true;
++    }
++  else
++    {
++      HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
++
++      /* If this is an incomplete type with size 0, then we can't put it
++	 in sdata because it might be too big when completed.  */
++      if (size > 0 && size <= g_switch_value)
++	return true;
++    }
++
++  return false;
++}
++
++/* legitimate_address_p recognizes an RTL expression that is a valid
++   memory address for an instruction.  The MODE argument is the
++   machine mode for the MEM expression that wants to use this address.
++
++   For Sw_64, we have either a constant address or the sum of a
++   register and a constant address, or just a register.  For DImode,
++   any of those forms can be surrounded with an AND that clear the
++   low-order three bits; this is an "unaligned" access.  */
++
++static bool
++sw_64_legitimate_address_p (machine_mode mode, rtx x, bool strict)
++{
++  /* If this is an ldl_u type address, discard the outer AND.  */
++  if (((TARGET_SW_M32 && mode == SImode) || (!TARGET_SW_M32 && mode == DImode))
++      && GET_CODE (x) == AND && CONST_INT_P (XEXP (x, 1))
++      && INTVAL (XEXP (x, 1)) == -8)
++    x = XEXP (x, 0);
++
++  /* Discard non-paradoxical subregs.  */
++  if (SUBREG_P (x)
++      && (GET_MODE_SIZE (GET_MODE (x))
++	  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
++    x = SUBREG_REG (x);
++
++  /* Unadorned general registers are valid.  */
++  if (REG_P (x)
++      && (strict ? STRICT_REG_OK_FOR_BASE_P (x)
++		 : NONSTRICT_REG_OK_FOR_BASE_P (x)))
++    return true;
++
++  /* Constant addresses (i.e. +/- 32k) are valid.  */
++  if (CONSTANT_ADDRESS_P (x))
++    return true;
++
++  if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC
++       || GET_CODE (x) == POST_MODIFY)
++      && TARGET_SW8A
++      && (strict ? STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))
++		 : NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))))
++    return true;
++  /* Register plus a small constant offset is valid.  */
++  if (GET_CODE (x) == PLUS)
++    {
++      rtx ofs = XEXP (x, 1);
++      x = XEXP (x, 0);
++
++      /* Discard non-paradoxical subregs.  */
++      if (SUBREG_P (x)
++	  && (GET_MODE_SIZE (GET_MODE (x))
++	      < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
++	x = SUBREG_REG (x);
++
++      if (REG_P (x))
++	{
++	  if (!strict && NONSTRICT_REG_OK_FP_BASE_P (x) && CONST_INT_P (ofs))
++	    return true;
++	  if ((strict ? STRICT_REG_OK_FOR_BASE_P (x)
++		      : NONSTRICT_REG_OK_FOR_BASE_P (x))
++	      && CONSTANT_ADDRESS_P (ofs))
++	    return true;
++	}
++    }
++
++  /* If we're managing explicit relocations, LO_SUM is valid, as are small
++     data symbols.  Avoid explicit relocations of modes larger than word
++     mode since i.e. $LC0+8($1) can fold around +/- 32k offset.  */
++  else if (TARGET_EXPLICIT_RELOCS && GET_MODE_SIZE (mode) <= UNITS_PER_WORD)
++    {
++      if (small_symbolic_operand (x, Pmode))
++	return true;
++
++      if (GET_CODE (x) == LO_SUM)
++	{
++	  rtx ofs = XEXP (x, 1);
++	  x = XEXP (x, 0);
++
++	  /* Discard non-paradoxical subregs.  */
++	  if (SUBREG_P (x)
++	      && (GET_MODE_SIZE (GET_MODE (x))
++		  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)))))
++	    x = SUBREG_REG (x);
++
++	  /* Must have a valid base register.  */
++	  if (!(REG_P (x)
++		&& (strict ? STRICT_REG_OK_FOR_BASE_P (x)
++			   : NONSTRICT_REG_OK_FOR_BASE_P (x))))
++	    return false;
++
++	  /* The symbol must be local.  */
++	  if (local_symbolic_operand (ofs, Pmode)
++	      || dtp32_symbolic_operand (ofs, Pmode)
++	      || tp32_symbolic_operand (ofs, Pmode))
++	    return true;
++	}
++    }
++
++  return false;
++}
++
++/* Build the SYMBOL_REF for __tls_get_addr.  */
++
++static GTY (()) rtx tls_get_addr_libfunc;
++
++static rtx
++get_tls_get_addr (void)
++{
++  if (!tls_get_addr_libfunc)
++    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
++  return tls_get_addr_libfunc;
++}
++
++/* Try machine-dependent ways of modifying an illegitimate address
++   to be legitimate.  If we find one, return the new, valid address.  */
++
++static rtx
++sw_64_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
++{
++  HOST_WIDE_INT addend;
++
++  /* If the address is (plus reg const_int) and the CONST_INT is not a
++     valid offset, compute the high part of the constant and add it to
++     the register.  Then our address is (plus temp low-part-const).  */
++  if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))
++      && !CONSTANT_ADDRESS_P (XEXP (x, 1)))
++    {
++      addend = INTVAL (XEXP (x, 1));
++      x = XEXP (x, 0);
++      goto split_addend;
++    }
++
++  /* If the address is (const (plus FOO const_int)), find the low-order
++     part of the CONST_INT.  Then load FOO plus any high-order part of the
++     CONST_INT into a register.  Our address is (plus reg low-part-const).
++     This is done to reduce the number of GOT entries.  */
++  if (can_create_pseudo_p () && GET_CODE (x) == CONST
++      && GET_CODE (XEXP (x, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
++    {
++      addend = INTVAL (XEXP (XEXP (x, 0), 1));
++      x = force_reg (Pmode, XEXP (XEXP (x, 0), 0));
++      goto split_addend;
++    }
++
++  /* If we have a (plus reg const), emit the load as in (2), then add
++     the two registers, and finally generate (plus reg low-part-const) as
++     our address.  */
++  if (can_create_pseudo_p () && GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
++      && GET_CODE (XEXP (x, 1)) == CONST
++      && GET_CODE (XEXP (XEXP (x, 1), 0)) == PLUS
++      && CONST_INT_P (XEXP (XEXP (XEXP (x, 1), 0), 1)))
++    {
++      addend = INTVAL (XEXP (XEXP (XEXP (x, 1), 0), 1));
++      x = expand_simple_binop (Pmode, PLUS, XEXP (x, 0),
++			       XEXP (XEXP (XEXP (x, 1), 0), 0), NULL_RTX, 1,
++			       OPTAB_LIB_WIDEN);
++      goto split_addend;
++    }
++
++  /* If this is a local symbol, split the address into HIGH/LO_SUM parts.
++     Avoid modes larger than word mode since i.e. $LC0+8($1) can fold
++     around +/- 32k offset.  */
++  if (TARGET_EXPLICIT_RELOCS && GET_MODE_SIZE (mode) <= UNITS_PER_WORD
++      && symbolic_operand (x, Pmode))
++    {
++      rtx r0, r16, eqv, tga, tp, dest, seq;
++      rtx_insn *insn;
++
++      switch (tls_symbolic_operand_type (x))
++	{
++	case TLS_MODEL_NONE:
++	  break;
++
++	case TLS_MODEL_GLOBAL_DYNAMIC:
++	  {
++	    start_sequence ();
++
++	    r0 = gen_rtx_REG (Pmode, 0);
++	    r16 = gen_rtx_REG (Pmode, 16);
++	    tga = get_tls_get_addr ();
++	    dest = gen_reg_rtx (Pmode);
++	    seq = GEN_INT (sw_64_next_sequence_number++);
++	    if (sw_64_tls_gd == 16)
++	      {
++		emit_insn (
++		  gen_movdi_er_tlsgd (r16, pic_offset_table_rtx, x, seq));
++	      }
++	    else if (sw_64_tls_gd == 32)
++	      {
++		eqv
++		  = gen_rtx_UNSPEC (Pmode,
++				    gen_rtvec (3, pic_offset_table_rtx, x, seq),
++				    UNSPEC_TLSRELGOT);
++
++		emit_insn (gen_rtx_SET (r16, eqv));
++		emit_insn (gen_movdi_er_tlsgd (r16, r16, x, seq));
++	      }
++	    rtx val = gen_call_value_osf_tlsgd (r0, tga, seq);
++	    insn = emit_call_insn (val);
++	    RTL_CONST_CALL_P (insn) = 1;
++	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
++
++	    insn = get_insns ();
++	    end_sequence ();
++
++	    emit_libcall_block (insn, dest, r0, x);
++	    return dest;
++	  }
++
++	case TLS_MODEL_LOCAL_DYNAMIC:
++	  {
++	    start_sequence ();
++
++	    r0 = gen_rtx_REG (Pmode, 0);
++	    r16 = gen_rtx_REG (Pmode, 16);
++	    tga = get_tls_get_addr ();
++	    scratch = gen_reg_rtx (Pmode);
++	    seq = GEN_INT (sw_64_next_sequence_number++);
++	    if (sw_64_tls_ldm == 16)
++	      {
++		emit_insn (
++		  gen_movdi_er_tlsldm (r16, pic_offset_table_rtx, seq));
++	      }
++	    else if (sw_64_tls_ldm == 32)
++	      {
++		eqv
++		  = gen_rtx_UNSPEC (Pmode,
++				    gen_rtvec (3, pic_offset_table_rtx, x, seq),
++				    UNSPEC_TLSRELGOT);
++
++		emit_insn (gen_rtx_SET (r16, eqv));
++		emit_insn (gen_movdi_er_tlsldm (r16, r16, seq));
++	      }
++	    rtx val = gen_call_value_osf_tlsldm (r0, tga, seq);
++	    insn = emit_call_insn (val);
++	    RTL_CONST_CALL_P (insn) = 1;
++	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
++
++	    insn = get_insns ();
++	    end_sequence ();
++
++	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
++				  UNSPEC_TLSLDM_CALL);
++	    emit_libcall_block (insn, scratch, r0, eqv);
++
++	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPREL);
++	    eqv = gen_rtx_CONST (Pmode, eqv);
++
++	    if (sw_64_tls_size == 64)
++	      {
++		if (sw_64_tls_gotdtprel == 16)
++		  {
++		    dest = gen_reg_rtx (Pmode);
++		    emit_insn (gen_rtx_SET (dest, eqv));
++		    emit_insn (gen_adddi3 (dest, dest, scratch));
++		  }
++		else if (sw_64_tls_gotdtprel == 32)
++		  {
++		    seq = GEN_INT (sw_64_next_sequence_number++);
++		    eqv = gen_rtx_UNSPEC (Pmode,
++					  gen_rtvec (3, pic_offset_table_rtx, x,
++						     seq),
++					  UNSPEC_TLSRELGOT);
++		    dest = gen_reg_rtx (Pmode);
++		    emit_insn (gen_rtx_SET (dest, eqv));
++
++		    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, dest, x, seq),
++					  UNSPEC_GOTDTPREL);
++		    emit_insn (gen_rtx_SET (dest, eqv));
++
++		    emit_insn (gen_adddi3 (dest, dest, scratch));
++		  }
++		return dest;
++	      }
++	    if (sw_64_tls_size == 32)
++	      {
++		rtx temp = gen_rtx_HIGH (Pmode, eqv);
++		temp = gen_rtx_PLUS (Pmode, scratch, temp);
++		scratch = gen_reg_rtx (Pmode);
++		emit_insn (gen_rtx_SET (scratch, temp));
++	      }
++	    return gen_rtx_LO_SUM (Pmode, scratch, eqv);
++	  }
++
++	case TLS_MODEL_INITIAL_EXEC:
++	  {
++	    if (sw_64_tls_gottprel == 16)
++	      {
++		eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
++		eqv = gen_rtx_CONST (Pmode, eqv);
++		tp = gen_reg_rtx (Pmode);
++		scratch = gen_reg_rtx (Pmode);
++		dest = gen_reg_rtx (Pmode);
++
++		emit_insn (gen_get_thread_pointerdi (tp));
++		emit_insn (gen_rtx_SET (scratch, eqv));
++		emit_insn (gen_adddi3 (dest, tp, scratch));
++	      }
++	    else if (sw_64_tls_gottprel == 32)
++	      {
++		seq = GEN_INT (sw_64_next_sequence_number++);
++
++		tp = gen_reg_rtx (Pmode);
++		emit_insn (gen_get_thread_pointerdi (tp));
++
++		scratch = gen_reg_rtx (Pmode);
++		eqv
++		  = gen_rtx_UNSPEC (Pmode,
++				    gen_rtvec (3, pic_offset_table_rtx, x, seq),
++				    UNSPEC_TLSRELGOT);
++		emit_insn (gen_rtx_SET (scratch, eqv));
++		eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (3, scratch, x, seq),
++				      UNSPEC_TPREL);
++		emit_insn (gen_rtx_SET (scratch, eqv));
++
++		dest = gen_reg_rtx (Pmode);
++		emit_insn (gen_adddi3 (dest, tp, scratch));
++	      }
++	    return dest;
++	  }
++
++	case TLS_MODEL_LOCAL_EXEC:
++	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_TPREL);
++	  eqv = gen_rtx_CONST (Pmode, eqv);
++	  tp = gen_reg_rtx (Pmode);
++
++	  emit_insn (gen_get_thread_pointerdi (tp));
++	  if (sw_64_tls_size == 32)
++	    {
++	      rtx temp = gen_rtx_HIGH (Pmode, eqv);
++	      temp = gen_rtx_PLUS (Pmode, tp, temp);
++	      tp = gen_reg_rtx (Pmode);
++	      emit_insn (gen_rtx_SET (tp, temp));
++	    }
++	  return gen_rtx_LO_SUM (Pmode, tp, eqv);
++
++	default:
++	  gcc_unreachable ();
++	}
++
++      if (local_symbolic_operand (x, Pmode))
++	{
++	  if (small_symbolic_operand (x, Pmode))
++	    return x;
++	  else
++	    {
++	      if (can_create_pseudo_p ())
++		scratch = gen_reg_rtx (Pmode);
++	      emit_insn (gen_rtx_SET (scratch, gen_rtx_HIGH (Pmode, x)));
++	      return gen_rtx_LO_SUM (Pmode, scratch, x);
++	    }
++	}
++    }
++
++  return NULL;
++
++split_addend:
++  {
++    HOST_WIDE_INT low, high;
++
++    low = ((addend & 0xffff) ^ 0x8000) - 0x8000;
++    addend -= low;
++    high = ((addend & 0xffffffff) ^ 0x80000000) - 0x80000000;
++    addend -= high;
++
++    if (addend)
++      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (addend),
++			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
++			       1, OPTAB_LIB_WIDEN);
++    if (high)
++      x = expand_simple_binop (Pmode, PLUS, x, GEN_INT (high),
++			       (!can_create_pseudo_p () ? scratch : NULL_RTX),
++			       1, OPTAB_LIB_WIDEN);
++
++    return plus_constant (Pmode, x, low);
++  }
++}
++
++/* Try machine-dependent ways of modifying an illegitimate address
++   to be legitimate.  Return X or the new, valid address.  */
++
++static rtx
++sw_64_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, machine_mode mode)
++{
++  rtx new_x = sw_64_legitimize_address_1 (x, NULL_RTX, mode);
++  return new_x ? new_x : x;
++}
++
++/* Return true if ADDR has an effect that depends on the machine mode it
++   is used for.  On the Sw_64 this is true only for the unaligned modes.
++   We can simplify the test since we know that the address must be valid.  */
++
++static bool
++sw_64_mode_dependent_address_p (const_rtx addr,
++				addr_space_t as ATTRIBUTE_UNUSED)
++{
++  return GET_CODE (addr) == AND;
++}
++
++/* Primarily this is required for TLS symbols, but given that our move
++   patterns *ought* to be able to handle any symbol at any time, we
++   should never be spilling symbolic operands to the constant pool, ever.  */
++
++static bool
++sw_64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
++{
++  enum rtx_code code = GET_CODE (x);
++  return code == SYMBOL_REF || code == LABEL_REF || code == CONST;
++}
++
++/* We do not allow indirect calls to be optimized into sibling calls, nor
++   can we allow a call to a function with a different GP to be optimized
++   into a sibcall.  */
++
++static bool
++sw_64_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
++{
++  /* Can't do indirect tail calls, since we don't know if the target
++     uses the same GP.  */
++  if (!decl)
++    return false;
++
++  /* Otherwise, we can make a tail call if the target function shares
++     the same GP.  */
++  return decl_has_samegp (decl);
++}
++
++bool
++some_small_symbolic_operand_int (rtx x)
++{
++  subrtx_var_iterator::array_type array;
++  FOR_EACH_SUBRTX_VAR (iter, array, x, ALL)
++    {
++      rtx x = *iter;
++      /* Don't re-split.  */
++      if (GET_CODE (x) == LO_SUM)
++	iter.skip_subrtxes ();
++      else if (small_symbolic_operand (x, Pmode))
++	return true;
++    }
++  return false;
++}
++
++rtx
++split_small_symbolic_operand (rtx x)
++{
++  x = copy_insn (x);
++  subrtx_ptr_iterator::array_type array;
++  FOR_EACH_SUBRTX_PTR (iter, array, &x, ALL)
++    {
++      rtx *ptr = *iter;
++      rtx x = *ptr;
++      /* Don't re-split.  */
++      if (GET_CODE (x) == LO_SUM)
++	iter.skip_subrtxes ();
++      else if (small_symbolic_operand (x, Pmode))
++	{
++	  *ptr = gen_rtx_LO_SUM (Pmode, pic_offset_table_rtx, x);
++	  iter.skip_subrtxes ();
++	}
++    }
++  return x;
++}
++
++/* Indicate that INSN cannot be duplicated.  This is true for any insn
++   that we've marked with gpdisp relocs, since those have to stay in
++   1-1 correspondence with one another.
++
++   Technically we could copy them if we could set up a mapping from one
++   sequence number to another, across the set of insns to be duplicated.
++   This seems overly complicated and error-prone since interblock motion
++   from sched-ebb could move one of the pair of insns to a different block.
++
++   Also cannot allow call insns to be duplicated.  If they throw exceptions,
++   then they'll be in a different block from their ldgp.  Which could lead
++   the bb reorder code to think that it would be ok to copy just the block
++   containing the call and branch to the block containing the ldgp.  */
++
++static bool
++sw_64_cannot_copy_insn_p (rtx_insn *insn)
++{
++  if (!reload_completed || !TARGET_EXPLICIT_RELOCS)
++    return false;
++  if (recog_memoized (insn) >= 0)
++    return get_attr_cannot_copy (insn);
++  else
++    return false;
++}
++
++/* Try a machine-dependent way of reloading an illegitimate address
++   operand.  If we find one, push the reload and return the new rtx.  */
++
++rtx
++sw_64_legitimize_reload_address (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
++				 int opnum, int type,
++				 int ind_levels ATTRIBUTE_UNUSED)
++{
++  /* We must recognize output that we have already generated ourselves.  */
++  if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
++      && REG_P (XEXP (XEXP (x, 0), 0)) && CONST_INT_P (XEXP (XEXP (x, 0), 1))
++      && CONST_INT_P (XEXP (x, 1)))
++    {
++      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, BASE_REG_CLASS,
++		   GET_MODE (x), VOIDmode, 0, 0, opnum,
++		   (enum reload_type) type);
++      return x;
++    }
++
++  /* We wish to handle large displacements off a base register by
++     splitting the addend across an ldih and the mem insn.  This
++     cuts number of extra insns needed from 3 to 1.  */
++  if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
++      && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
++      && REGNO_OK_FOR_BASE_P (REGNO (XEXP (x, 0))) && CONST_INT_P (XEXP (x, 1)))
++    {
++      HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
++      HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
++      HOST_WIDE_INT high
++	= (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
++
++      /* Check for 32-bit overflow.  */
++      if (high + low != val)
++	return NULL_RTX;
++
++      /* Reload the high part into a base reg; leave the low part
++	 in the mem directly.  */
++      x = gen_rtx_PLUS (GET_MODE (x),
++			gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
++				      GEN_INT (high)),
++			GEN_INT (low));
++
++      push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL, BASE_REG_CLASS,
++		   GET_MODE (x), VOIDmode, 0, 0, opnum,
++		   (enum reload_type) type);
++      return x;
++    }
++
++  return NULL_RTX;
++}
++
++/* Return the cost of moving between registers of various classes.  Moving
++   between FLOAT_REGS and anything else except float regs is expensive.
++   In fact, we make it quite expensive because we really don't want to
++   do these moves unless it is clearly worth it.  Optimizations may
++   reduce the impact of not being able to allocate a pseudo to a
++   hard register.  */
++
++static int
++sw_64_register_move_cost (machine_mode mode, reg_class_t from_i,
++			  reg_class_t to_i)
++{
++  enum reg_class from = (enum reg_class) from_i;
++  enum reg_class to = (enum reg_class) to_i;
++  if (!flag_sw_rtx_cost)
++    {
++      if ((from == FLOAT_REGS) == (to == FLOAT_REGS))
++	return 2;
++      if (TARGET_FIX)
++	return (from == FLOAT_REGS) ? 6 : 8;
++      return 4 + 2 * sw_64_memory_latency;
++    }
++  if (from == R0_REG || from == R24_REG || from == R25_REG || from == R27_REG)
++    from = GENERAL_REGS;
++  if (to == R0_REG || to == R24_REG || to == R25_REG || to == R27_REG)
++    to = GENERAL_REGS;
++  if (GET_MODE_SIZE (mode) == 32)
++    {
++      if (from == GENERAL_REGS && to == GENERAL_REGS)
++	return 1;
++      else if (from == GENERAL_REGS)
++	return 16;
++      else if (to == GENERAL_REGS)
++	return 16;
++      if (!TARGET_SW_SIMD)
++	return 34;
++      return 2;
++    }
++  if (from == GENERAL_REGS && to == GENERAL_REGS)
++    return 1;
++  else if (from == GENERAL_REGS)
++    return 4;
++  else if (to == GENERAL_REGS)
++    return 4;
++  return 2;
++}
++
++/* Return the cost of moving data of MODE from a register to
++   or from memory.  On the Sw_64, bump this up a bit.  */
++
++static int
++sw_64_memory_move_cost (machine_mode /*mode.  */, reg_class_t /*regclass.  */,
++			bool /*in.  */)
++{
++  if (flag_sw_rtx_cost)
++    return sw_64_memory_latency;
++  return 2 * sw_64_memory_latency;
++}
++
++/* Compute a (partial) cost for rtx X.  Return true if the complete
++   cost has been computed, and false if subexpressions should be
++   scanned.  In either case, *TOTAL contains the cost result.  */
++
++static bool
++sw_64_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno, int *total,
++		 bool speed)
++{
++  int code = GET_CODE (x);
++  bool float_mode_p = FLOAT_MODE_P (mode);
++  const struct sw_64_rtx_cost_data *cost_data;
++
++  if (!speed)
++    cost_data = &sw_64_rtx_cost_size;
++  else if (flag_sw_rtx_cost)
++    cost_data = &sw_64_rtx_cost_data[2];
++  else
++    cost_data = &sw_64_rtx_cost_data[sw_64_tune];
++
++  switch (code)
++    {
++    case CONST_INT:
++      /* If this is an 8-bit constant, return zero since it can be used
++	 nearly anywhere with no cost.  If it is a valid operand for an
++	 ADD or AND, likewise return 0 if we know it will be used in that
++	 context.  Otherwise, return 2 since it might be used there later.
++	 All other constants take at least two insns.  */
++      if (INTVAL (x) >= 0 && INTVAL (x) < 256)
++	{
++	  *total = 0;
++	  return true;
++	}
++      /* FALLTHRU */
++
++    case CONST_DOUBLE:
++    case CONST_WIDE_INT:
++      if (x == CONST0_RTX (mode))
++	*total = 0;
++      else if ((outer_code == PLUS && add_operand (x, VOIDmode))
++	       || (outer_code == AND && and_operand (x, VOIDmode)))
++	*total = 0;
++      else if (add_operand (x, VOIDmode) || and_operand (x, VOIDmode))
++	*total = 2;
++      else
++	*total = COSTS_N_INSNS (2);
++      return true;
++
++    case CONST:
++    case SYMBOL_REF:
++    case LABEL_REF:
++      if (TARGET_EXPLICIT_RELOCS && small_symbolic_operand (x, VOIDmode))
++	*total = COSTS_N_INSNS (outer_code != MEM);
++      else if (TARGET_EXPLICIT_RELOCS && local_symbolic_operand (x, VOIDmode))
++	*total = COSTS_N_INSNS (1 + (outer_code != MEM));
++      else if (tls_symbolic_operand_type (x))
++	/* ??? How many insns do we emit here?  More than one...  */
++	*total = COSTS_N_INSNS (15);
++      else
++	/* Otherwise we do a load from the GOT.  */
++	*total = COSTS_N_INSNS (!speed ? 1 : sw_64_memory_latency);
++      return true;
++
++    case HIGH:
++      /* This is effectively an add_operand.  */
++      *total = 2;
++      return true;
++
++    case PLUS:
++    case MINUS:
++      if (float_mode_p)
++	*total = cost_data->fp_add;
++      else if ((GET_CODE (XEXP (x, 0)) == ASHIFT)
++	       || (GET_CODE (XEXP (x, 0)) == MULT)
++		    && const48_operand (XEXP (XEXP (x, 0), 1), VOIDmode))
++	{
++	  *total = (rtx_cost (XEXP (XEXP (x, 0), 0), mode,
++			      (enum rtx_code) outer_code, opno, speed)
++		    + rtx_cost (XEXP (x, 1), mode, (enum rtx_code) outer_code,
++				opno, speed)
++		    + COSTS_N_INSNS (1));
++	  return true;
++	}
++      return false;
++
++    case MULT:
++      if (float_mode_p)
++	*total = cost_data->fp_mult;
++      else if (mode == DImode)
++	*total = cost_data->int_mult_di;
++      else
++	*total = cost_data->int_mult_si;
++      return false;
++
++    case ASHIFT:
++      if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) <= 3)
++	{
++	  *total = COSTS_N_INSNS (1);
++	  return false;
++	}
++      /* FALLTHRU */
++
++    case ASHIFTRT:
++    case LSHIFTRT:
++      *total = cost_data->int_shift;
++      return false;
++
++    case IF_THEN_ELSE:
++      if (float_mode_p)
++	*total = cost_data->fp_add;
++      else
++	*total = cost_data->int_cmov;
++      if (flag_sw_rtx_cost && float_mode_p)
++	*total = COSTS_N_INSNS (2);
++      return false;
++
++    case DIV:
++    case UDIV:
++    case MOD:
++    case UMOD:
++      if (!float_mode_p)
++	*total = cost_data->int_div;
++      else if (mode == SFmode)
++	*total = cost_data->fp_div_sf;
++      else
++	*total = cost_data->fp_div_df;
++      return false;
++
++    case MEM:
++      *total = COSTS_N_INSNS (!speed ? 1 : sw_64_memory_latency);
++      return true;
++
++    case NEG:
++      if (!float_mode_p)
++	{
++	  *total = COSTS_N_INSNS (1);
++	  return false;
++	}
++      /* FALLTHRU */
++
++    case ABS:
++      if (!float_mode_p)
++	{
++	  *total = COSTS_N_INSNS (1) + cost_data->int_cmov;
++	  return false;
++	}
++      if (flag_sw_rtx_cost)
++	{
++	  *total = COSTS_N_INSNS (2);
++	  return false;
++	}
++      /* FALLTHRU */
++
++    case FLOAT:
++    case UNSIGNED_FLOAT:
++    case FIX:
++    case UNSIGNED_FIX:
++      if (flag_sw_rtx_cost)
++	{
++	  *total = COSTS_N_INSNS (4);
++	  return false;
++	}
++    case FLOAT_TRUNCATE:
++      *total = cost_data->fp_add;
++      return false;
++
++    case FLOAT_EXTEND:
++      if (MEM_P (XEXP (x, 0)))
++	*total = 0;
++      else
++	*total = cost_data->fp_add;
++      return false;
++
++    default:
++      return false;
++    }
++}
++
++/* REF is an alignable memory location.  Place an aligned SImode
++   reference into *PALIGNED_MEM and the number of bits to shift into
++   *PBITNUM.  SCRATCH is a free register for use in reloading out
++   of range stack slots.  */
++
++void
++get_aligned_mem (rtx ref, rtx *paligned_mem, rtx *pbitnum)
++{
++  rtx base;
++  HOST_WIDE_INT disp, offset;
++
++  gcc_assert (MEM_P (ref));
++
++  if (reload_in_progress)
++    {
++      base = find_replacement (&XEXP (ref, 0));
++      gcc_assert (memory_address_p (GET_MODE (ref), base));
++    }
++  else
++    base = XEXP (ref, 0);
++
++  if (GET_CODE (base) == PLUS)
++    disp = INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
++  else
++    disp = 0;
++
++  /* Find the byte offset within an aligned word.  If the memory itself is
++     claimed to be aligned, believe it.  Otherwise, aligned_memory_operand
++     will have examined the base register and determined it is aligned, and
++     thus displacements from it are naturally alignable.  */
++  if (MEM_ALIGN (ref) >= 32)
++    offset = 0;
++  else
++    offset = disp & 3;
++
++  /* The location should not cross aligned word boundary.  */
++  gcc_assert (offset + GET_MODE_SIZE (GET_MODE (ref))
++	      <= GET_MODE_SIZE (SImode));
++
++  /* Access the entire aligned word.  */
++  *paligned_mem = widen_memory_access (ref, SImode, -offset);
++
++  /* Convert the byte offset within the word to a bit offset.  */
++  offset *= BITS_PER_UNIT;
++  *pbitnum = GEN_INT (offset);
++}
++
++/* Similar, but just get the address.  Handle the two reload cases.
++   Add EXTRA_OFFSET to the address we return.  */
++
++rtx
++get_unaligned_address (rtx ref)
++{
++  rtx base;
++  HOST_WIDE_INT offset = 0;
++
++  gcc_assert (MEM_P (ref));
++
++  if (reload_in_progress)
++    {
++      base = find_replacement (&XEXP (ref, 0));
++      gcc_assert (memory_address_p (GET_MODE (ref), base));
++    }
++  else
++    base = XEXP (ref, 0);
++
++  if (GET_CODE (base) == PLUS)
++    offset += INTVAL (XEXP (base, 1)), base = XEXP (base, 0);
++
++  return plus_constant (Pmode, base, offset);
++}
++
++/* Compute a value X, such that X & 7 == (ADDR + OFS) & 7.
++   X is always returned in a register.  */
++
++rtx
++get_unaligned_offset (rtx addr, HOST_WIDE_INT ofs)
++{
++  if (GET_CODE (addr) == PLUS)
++    {
++      ofs += INTVAL (XEXP (addr, 1));
++      addr = XEXP (addr, 0);
++    }
++
++  return expand_simple_binop (Pmode, PLUS, addr, GEN_INT (ofs & 7), NULL_RTX, 1,
++			      OPTAB_LIB_WIDEN);
++}
++
++/* On the Sw_64, all (non-symbolic) constants except zero go into
++   a floating-point register via memory.  Note that we cannot
++   return anything that is not a subset of RCLASS, and that some
++   symbolic constants cannot be dropped to memory.  */
++
++enum reg_class
++sw_64_preferred_reload_class (rtx x, enum reg_class rclass)
++{
++  /* Zero is present in any register class.  */
++  if (x == CONST0_RTX (GET_MODE (x)))
++    return rclass;
++
++  /* These sorts of constants we can easily drop to memory.  */
++  if (CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x)
++      || GET_CODE (x) == CONST_VECTOR)
++    {
++      if (rclass == FLOAT_REGS)
++	return NO_REGS;
++      if (rclass == ALL_REGS)
++	return GENERAL_REGS;
++      return rclass;
++    }
++
++  /* All other kinds of constants should not (and in the case of HIGH
++     cannot) be dropped to memory -- instead we use a GENERAL_REGS
++     secondary reload.  */
++  if (CONSTANT_P (x))
++    return (rclass == ALL_REGS ? GENERAL_REGS : rclass);
++
++  return rclass;
++}
++
++/* Inform reload about cases where moving X with a mode MODE to a register in
++   RCLASS requires an extra scratch or immediate register.  Return the class
++   needed for the immediate register.  */
++
++static reg_class_t
++sw_64_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
++			machine_mode mode, secondary_reload_info *sri)
++{
++  enum reg_class rclass = (enum reg_class) rclass_i;
++
++  /* Loading and storing HImode or QImode values to and from memory
++     usually requires a scratch register.  */
++  if (!TARGET_BWX && (mode == QImode || mode == HImode || mode == CQImode))
++    {
++      if (any_memory_operand (x, mode))
++	{
++	  if (in_p)
++	    {
++	      if (!aligned_memory_operand (x, mode))
++		sri->icode = direct_optab_handler (reload_in_optab, mode);
++	    }
++	  else
++	    sri->icode = direct_optab_handler (reload_out_optab, mode);
++	  return NO_REGS;
++	}
++    }
++
++  /* We also cannot do integral arithmetic into FP regs, as might result
++     from register elimination into a DImode fp register.  */
++  if (rclass == FLOAT_REGS)
++    {
++      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
++	return GENERAL_REGS;
++      if (in_p && INTEGRAL_MODE_P (mode) && !MEM_P (x) && !REG_P (x)
++	  && !CONST_INT_P (x))
++	return GENERAL_REGS;
++    }
++
++  return NO_REGS;
++}
++
++/* Implement TARGET_SECONDARY_MEMORY_NEEDED.
++
++   If we are copying between general and FP registers, we need a memory
++   location unless the FIX extension is available.  */
++
++static bool
++sw_64_secondary_memory_needed (machine_mode, reg_class_t class1,
++			       reg_class_t class2)
++{
++  return (!TARGET_FIX
++	  && ((class1 == FLOAT_REGS && class2 != FLOAT_REGS)
++	      || (class2 == FLOAT_REGS && class1 != FLOAT_REGS)));
++}
++
++/* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.  If MODE is
++   floating-point, use it.  Otherwise, widen to a word like the default.
++   This is needed because we always store integers in FP registers in
++   quadword format.  This whole area is very tricky!  */
++
++static machine_mode
++sw_64_secondary_memory_needed_mode (machine_mode mode)
++{
++  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
++    return mode;
++  if (GET_MODE_SIZE (mode) >= 4)
++    return mode;
++  return mode_for_size (BITS_PER_WORD, GET_MODE_CLASS (mode), 0).require ();
++}
++
++/* Given SEQ, which is an INSN list, look for any MEMs in either
++   a SET_DEST or a SET_SRC and copy the in-struct, unchanging, and
++   volatile flags from REF into each of the MEMs found.  If REF is not
++   a MEM, don't do anything.  */
++
++void
++sw_64_set_memflags (rtx seq, rtx ref)
++{
++  rtx_insn *insn;
++
++  if (!MEM_P (ref))
++    return;
++
++  /* This is only called from sw_64.md, after having had something
++     generated from one of the insn patterns.  So if everything is
++     zero, the pattern is already up-to-date.  */
++  if (!MEM_VOLATILE_P (ref) && !MEM_NOTRAP_P (ref) && !MEM_READONLY_P (ref))
++    return;
++
++  subrtx_var_iterator::array_type array;
++  for (insn = as_a<rtx_insn *> (seq); insn; insn = NEXT_INSN (insn))
++    if (INSN_P (insn))
++      FOR_EACH_SUBRTX_VAR (iter, array, PATTERN (insn), NONCONST)
++	{
++	  rtx x = *iter;
++	  if (MEM_P (x))
++	    {
++	      MEM_VOLATILE_P (x) = MEM_VOLATILE_P (ref);
++	      MEM_NOTRAP_P (x) = MEM_NOTRAP_P (ref);
++	      MEM_READONLY_P (x) = MEM_READONLY_P (ref);
++	      /* Sadly, we cannot use alias sets because the extra
++		 aliasing produced by the AND interferes.  Given that
++		 two-byte quantities are the only thing we would be
++		 able to differentiate anyway, there does not seem to
++		 be any point in convoluting the early out of the
++		 alias check.  */
++	      iter.skip_subrtxes ();
++	    }
++	}
++    else
++      gcc_unreachable ();
++}
++
++static rtx
++sw_64_emit_set_const (rtx, machine_mode, HOST_WIDE_INT, int, bool);
++
++/* Internal routine for sw_64_emit_set_const to check for N or below insns.
++   If NO_OUTPUT is true, then we only check to see if N insns are possible,
++   and return pc_rtx if successful.  */
++
++static rtx
++sw_64_emit_set_const_1 (rtx target, machine_mode mode, HOST_WIDE_INT c, int n,
++			bool no_output)
++{
++  HOST_WIDE_INT new_const;
++  int i, bits;
++  /* Use a pseudo if highly optimizing and still generating RTL.  */
++  rtx subtarget
++    = (flag_expensive_optimizations && can_create_pseudo_p () ? 0 : target);
++  rtx temp, insn;
++
++  /* If this is a sign-extended 32-bit constant, we can do this in at most
++     three insns, so do it if we have enough insns left.  */
++
++  if (c >> 31 == -1 || c >> 31 == 0)
++    {
++      HOST_WIDE_INT low = ((c & 0xffff) ^ 0x8000) - 0x8000;
++      HOST_WIDE_INT tmp1 = c - low;
++      HOST_WIDE_INT high = (((tmp1 >> 16) & 0xffff) ^ 0x8000) - 0x8000;
++      HOST_WIDE_INT extra = 0;
++
++      /* If HIGH will be interpreted as negative but the constant is
++	 positive, we must adjust it to do two ldha insns.  */
++
++      if ((high & 0x8000) != 0 && c >= 0)
++	{
++	  extra = 0x4000;
++	  tmp1 -= 0x40000000;
++	  high = ((tmp1 >> 16) & 0xffff) - 2 * ((tmp1 >> 16) & 0x8000);
++	}
++
++      if (c == low || (low == 0 && extra == 0))
++	{
++	  /* We used to use copy_to_suggested_reg (GEN_INT (c), target, mode)
++	     but that meant that we can't handle INT_MIN on 32-bit machines
++	     (like NT/Sw_64), because we recurse indefinitely through
++	     emit_move_insn to gen_movdi.  So instead, since we know exactly
++	     what we want, create it explicitly.  */
++
++	  if (no_output)
++	    return pc_rtx;
++	  if (target == NULL)
++	    target = gen_reg_rtx (mode);
++	  emit_insn (gen_rtx_SET (target, GEN_INT (c)));
++	  return target;
++	}
++      else if (n >= 2 + (extra != 0))
++	{
++	  if (no_output)
++	    return pc_rtx;
++	  if (!can_create_pseudo_p ())
++	    {
++	      emit_insn (gen_rtx_SET (target, GEN_INT (high << 16)));
++	      temp = target;
++	    }
++	  else
++	    temp
++	      = copy_to_suggested_reg (GEN_INT (high << 16), subtarget, mode);
++
++	  /* As of 2002-02-23, addsi3 is only available when not optimizing.
++	     This means that if we go through expand_binop, we'll try to
++	     generate extensions, etc, which will require new pseudos, which
++	     will fail during some split phases.  The SImode add patterns
++	     still exist, but are not named.  So build the insns by hand.  */
++
++	  if (extra != 0)
++	    {
++	      if (!subtarget)
++		subtarget = gen_reg_rtx (mode);
++	      insn = gen_rtx_PLUS (mode, temp, GEN_INT (extra << 16));
++	      insn = gen_rtx_SET (subtarget, insn);
++	      emit_insn (insn);
++	      temp = subtarget;
++	    }
++
++	  if (target == NULL)
++	    target = gen_reg_rtx (mode);
++	  insn = gen_rtx_PLUS (mode, temp, GEN_INT (low));
++	  insn = gen_rtx_SET (target, insn);
++	  emit_insn (insn);
++	  return target;
++	}
++    }
++
++  /* If we couldn't do it that way, try some other methods.  But if we have
++     no instructions left, don't bother.  Likewise, if this is SImode and
++     we can't make pseudos, we can't do anything since the expand_binop
++     and expand_unop calls will widen and try to make pseudos.  */
++
++  if (n == 1 || (mode == SImode && !can_create_pseudo_p ()))
++    return 0;
++
++  /* Next, see if we can load a related constant and then shift and possibly
++     negate it to get the constant we want.  Try this once each increasing
++     numbers of insns.  */
++
++  for (i = 1; i < n; i++)
++    {
++      /* First, see if minus some low bits, we've an easy load of
++	 high bits.  */
++
++      new_const = ((c & 0xffff) ^ 0x8000) - 0x8000;
++      if (new_const != 0)
++	{
++	  temp = sw_64_emit_set_const (subtarget, mode, c - new_const, i,
++				       no_output);
++	  if (temp)
++	    {
++	      if (no_output)
++		return temp;
++	      return expand_binop (mode, add_optab, temp, GEN_INT (new_const),
++				   target, 0, OPTAB_WIDEN);
++	    }
++	}
++
++      /* Next try complementing.  */
++      temp = sw_64_emit_set_const (subtarget, mode, ~c, i, no_output);
++      if (temp)
++	{
++	  if (no_output)
++	    return temp;
++	  return expand_unop (mode, one_cmpl_optab, temp, target, 0);
++	}
++
++      /* Next try to form a constant and do a left shift.  We can do this
++	 if some low-order bits are zero; the exact_log2 call below tells
++	 us that information.  The bits we are shifting out could be any
++	 value, but here we'll just try the 0- and sign-extended forms of
++	 the constant.  To try to increase the chance of having the same
++	 constant in more than one insn, start at the highest number of
++	 bits to shift, but try all possibilities in case a ZAPNOT will
++	 be useful.  */
++
++      bits = exact_log2 (c & -c);
++      if (bits > 0)
++	for (; bits > 0; bits--)
++	  {
++	    new_const = c >> bits;
++	    temp
++	      = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output);
++	    if (!temp && c < 0)
++	      {
++		new_const = (unsigned HOST_WIDE_INT) c >> bits;
++		temp = sw_64_emit_set_const (subtarget, mode, new_const, i,
++					     no_output);
++	      }
++	    if (temp)
++	      {
++		if (no_output)
++		  return temp;
++		return expand_binop (mode, ashl_optab, temp, GEN_INT (bits),
++				     target, 0, OPTAB_WIDEN);
++	      }
++	  }
++
++      /* Now try high-order zero bits.  Here we try the shifted-in bits as
++	 all zero and all ones.  Be careful to avoid shifting outside the
++	 mode and to avoid shifting outside the host wide int size.  */
++
++      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
++	      - floor_log2 (c) - 1);
++      if (bits > 0)
++	for (; bits > 0; bits--)
++	  {
++	    new_const = c << bits;
++	    temp
++	      = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output);
++	    if (!temp)
++	      {
++		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
++		temp = sw_64_emit_set_const (subtarget, mode, new_const, i,
++					     no_output);
++	      }
++	    if (temp)
++	      {
++		if (no_output)
++		  return temp;
++		return expand_binop (mode, lshr_optab, temp, GEN_INT (bits),
++				     target, 1, OPTAB_WIDEN);
++	      }
++	  }
++
++      /* Now try high-order 1 bits.  We get that with a sign-extension.
++	 But one bit isn't enough here.  Be careful to avoid shifting outside
++	 the mode and to avoid shifting outside the host wide int size.  */
++
++      bits = (MIN (HOST_BITS_PER_WIDE_INT, GET_MODE_SIZE (mode) * 8)
++	      - floor_log2 (~c) - 2);
++      if (bits > 0)
++	for (; bits > 0; bits--)
++	  {
++	    new_const = c << bits;
++	    temp
++	      = sw_64_emit_set_const (subtarget, mode, new_const, i, no_output);
++	    if (!temp)
++	      {
++		new_const = (c << bits) | ((HOST_WIDE_INT_1U << bits) - 1);
++		temp = sw_64_emit_set_const (subtarget, mode, new_const, i,
++					     no_output);
++	      }
++	    if (temp)
++	      {
++		if (no_output)
++		  return temp;
++		return expand_binop (mode, ashr_optab, temp, GEN_INT (bits),
++				     target, 0, OPTAB_WIDEN);
++	      }
++	  }
++    }
++
++  /* Finally, see if can load a value into the target that is the same as the
++     constant except that all bytes that are 0 are changed to be 0xff.  If we
++     can, then we can do a ZAPNOT to obtain the desired constant.  */
++
++  new_const = c;
++  for (i = 0; i < 64; i += 8)
++    if ((new_const & ((HOST_WIDE_INT) 0xff << i)) == 0)
++      new_const |= (HOST_WIDE_INT) 0xff << i;
++
++  /* We are only called for SImode and DImode.  If this is SImode, ensure that
++     we are sign extended to a full word.  */
++
++  if (mode == SImode)
++    new_const = ((new_const & 0xffffffff) ^ 0x80000000) - 0x80000000;
++
++  if (new_const != c)
++    {
++      temp
++	= sw_64_emit_set_const (subtarget, mode, new_const, n - 1, no_output);
++      if (temp)
++	{
++	  if (no_output)
++	    return temp;
++	  return expand_binop (mode, and_optab, temp, GEN_INT (c | ~new_const),
++			       target, 0, OPTAB_WIDEN);
++	}
++    }
++
++  return 0;
++}
++
++/* Try to output insns to set TARGET equal to the constant C if it can be
++   done in less than N insns.  Do all computations in MODE.  Returns the place
++   where the output has been placed if it can be done and the insns have been
++   emitted.  If it would take more than N insns, zero is returned and no
++   insns and emitted.  */
++
++static rtx
++sw_64_emit_set_const (rtx target, machine_mode mode, HOST_WIDE_INT c, int n,
++		      bool no_output)
++{
++  machine_mode orig_mode = mode;
++  rtx orig_target = target;
++  rtx result = 0;
++  int i;
++
++  /* If we can't make any pseudos, TARGET is an SImode hard register, we
++     can't load this constant in one insn, do this in DImode.  */
++  if (!can_create_pseudo_p () && mode == SImode && REG_P (target)
++      && REGNO (target) < FIRST_PSEUDO_REGISTER)
++    {
++      result = sw_64_emit_set_const_1 (target, mode, c, 1, no_output);
++      if (result)
++	return result;
++
++      target = no_output ? NULL : gen_lowpart (DImode, target);
++      mode = DImode;
++    }
++  else if (mode == V8QImode || mode == V4HImode || mode == V2SImode)
++    {
++      target = no_output ? NULL : gen_lowpart (DImode, target);
++      mode = DImode;
++    }
++
++  /* Try 1 insn, then 2, then up to N.  */
++  for (i = 1; i <= n; i++)
++    {
++      result = sw_64_emit_set_const_1 (target, mode, c, i, no_output);
++      if (result)
++	{
++	  rtx_insn *insn;
++	  rtx set;
++
++	  if (no_output)
++	    return result;
++
++	  insn = get_last_insn ();
++	  set = single_set (insn);
++	  if (!CONSTANT_P (SET_SRC (set)))
++	    set_unique_reg_note (get_last_insn (), REG_EQUAL, GEN_INT (c));
++	  break;
++	}
++    }
++
++  /* Allow for the case where we changed the mode of TARGET.  */
++  if (result)
++    {
++      if (result == target)
++	result = orig_target;
++      else if (mode != orig_mode)
++	result = gen_lowpart (orig_mode, result);
++    }
++
++  return result;
++}
++
++/* Having failed to find a 3 insn sequence in sw_64_emit_set_const,
++   fall back to a straight forward decomposition.  We do this to avoid
++   exponential run times encountered when looking for longer sequences
++   with sw_64_emit_set_const.  */
++
++static rtx
++sw_64_emit_set_long_const (rtx target, HOST_WIDE_INT c1)
++{
++  HOST_WIDE_INT d1, d2, d3, d4;
++
++  /* Decompose the entire word.  */
++
++  d1 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
++  c1 -= d1;
++  d2 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
++  c1 = (c1 - d2) >> 32;
++  d3 = ((c1 & 0xffff) ^ 0x8000) - 0x8000;
++  c1 -= d3;
++  d4 = ((c1 & 0xffffffff) ^ 0x80000000) - 0x80000000;
++  gcc_assert (c1 == d4);
++
++  /* Construct the high word.  */
++  if (d4)
++    {
++      emit_move_insn (target, GEN_INT (d4));
++      if (d3)
++	emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d3)));
++    }
++  else
++    emit_move_insn (target, GEN_INT (d3));
++
++  /* Shift it into place.  */
++  emit_move_insn (target, gen_rtx_ASHIFT (DImode, target, GEN_INT (32)));
++
++  /* Add in the low bits.  */
++  if (d2)
++    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d2)));
++  if (d1)
++    emit_move_insn (target, gen_rtx_PLUS (DImode, target, GEN_INT (d1)));
++
++  return target;
++}
++
++/* Given an integral CONST_INT or CONST_VECTOR, return the low 64 bits.  */
++
++static HOST_WIDE_INT
++sw_64_extract_integer (rtx x)
++{
++  if (GET_CODE (x) == CONST_VECTOR)
++    x = simplify_subreg (DImode, x, GET_MODE (x), 0);
++
++  gcc_assert (CONST_INT_P (x));
++
++  return INTVAL (x);
++}
++
++/* Implement TARGET_LEGITIMATE_CONSTANT_P.  This is all constants for which
++   we are willing to load the value into a register via a move pattern.
++   Normally this is all symbolic constants, integral constants that
++   take three or fewer instructions, and floating-point zero.  */
++
++bool
++sw_64_legitimate_constant_p (machine_mode mode, rtx x)
++{
++  HOST_WIDE_INT i0;
++
++  switch (GET_CODE (x))
++    {
++    case LABEL_REF:
++    case HIGH:
++      return true;
++
++    case CONST:
++      if (GET_CODE (XEXP (x, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
++	x = XEXP (XEXP (x, 0), 0);
++      else
++	return true;
++
++      if (GET_CODE (x) != SYMBOL_REF)
++	return true;
++      /* FALLTHRU  */
++
++    case SYMBOL_REF:
++      /* TLS symbols are never valid.  */
++      return SYMBOL_REF_TLS_MODEL (x) == 0;
++
++    case CONST_WIDE_INT:
++      if (TARGET_BUILD_CONSTANTS)
++	return true;
++      if (x == CONST0_RTX (mode))
++	return true;
++      mode = DImode;
++      gcc_assert (CONST_WIDE_INT_NUNITS (x) == 2);
++      i0 = CONST_WIDE_INT_ELT (x, 1);
++      if (sw_64_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) == NULL)
++	return false;
++      i0 = CONST_WIDE_INT_ELT (x, 0);
++      goto do_integer;
++
++    case CONST_DOUBLE:
++      if (x == CONST0_RTX (mode))
++	return true;
++      return false;
++
++    case CONST_VECTOR:
++      if (x == CONST0_RTX (mode))
++	return true;
++      if (GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
++	return false;
++      if (GET_MODE_SIZE (mode) != 8)
++	return false;
++      /* FALLTHRU  */
++
++    case CONST_INT:
++      if (TARGET_BUILD_CONSTANTS)
++	return true;
++      i0 = sw_64_extract_integer (x);
++    do_integer:
++      return sw_64_emit_set_const_1 (NULL_RTX, mode, i0, 3, true) != NULL;
++
++    default:
++      return false;
++    }
++}
++
++/* Operand 1 is known to be a constant, and should require more than one
++   instruction to load.  Emit that multi-part load.  */
++
++bool
++sw_64_split_const_mov (machine_mode mode, rtx *operands)
++{
++  HOST_WIDE_INT i0;
++  rtx temp = NULL_RTX;
++
++  i0 = sw_64_extract_integer (operands[1]);
++
++  temp = sw_64_emit_set_const (operands[0], mode, i0, 3, false);
++
++  if (!temp && TARGET_BUILD_CONSTANTS)
++    temp = sw_64_emit_set_long_const (operands[0], i0);
++
++  if (temp)
++    {
++      if (!rtx_equal_p (operands[0], temp))
++	emit_move_insn (operands[0], temp);
++      return true;
++    }
++
++  return false;
++}
++
++/* Expand a move instruction; return true if all work is done.
++   We don't handle non-bwx subword loads here.  */
++
++bool
++sw_64_expand_mov (machine_mode mode, rtx *operands)
++{
++  rtx tmp;
++
++  /* If the output is not a register, the input must be.  */
++  if (MEM_P (operands[0]) && !reg_or_0_operand (operands[1], mode))
++    operands[1] = force_reg (mode, operands[1]);
++
++  /* Allow legitimize_address to perform some simplifications.  */
++  if (mode == Pmode && symbolic_operand (operands[1], mode))
++    {
++      tmp = sw_64_legitimize_address_1 (operands[1], operands[0], mode);
++      if (tmp)
++	{
++	  if (tmp == operands[0])
++	    return true;
++	  operands[1] = tmp;
++	  return false;
++	}
++    }
++
++  /* Early out for non-constants and valid constants.  */
++  if (!CONSTANT_P (operands[1]) || input_operand (operands[1], mode))
++    return false;
++
++  /* Split large integers.  */
++  if (CONST_INT_P (operands[1]) || GET_CODE (operands[1]) == CONST_VECTOR)
++    {
++      if (sw_64_split_const_mov (mode, operands))
++	return true;
++    }
++
++  /* Otherwise we've nothing left but to drop the thing to memory.  */
++  tmp = force_const_mem (mode, operands[1]);
++
++  if (tmp == NULL_RTX)
++    return false;
++
++  if (reload_in_progress)
++    {
++      emit_move_insn (operands[0], XEXP (tmp, 0));
++      operands[1] = replace_equiv_address (tmp, operands[0]);
++    }
++  else
++    operands[1] = validize_mem (tmp);
++  return false;
++}
++
++/* Expand a non-bwx QImode or HImode move instruction;
++   return true if all work is done.  */
++
++bool
++sw_64_expand_mov_nobwx (machine_mode mode, rtx *operands)
++{
++  rtx seq;
++
++  /* If the output is not a register, the input must be.  */
++  if (MEM_P (operands[0]))
++    operands[1] = force_reg (mode, operands[1]);
++
++  /* Handle four memory cases, unaligned and aligned for either the input
++     or the output.  The only case where we can be called during reload is
++     for aligned loads; all other cases require temporaries.  */
++
++  if (any_memory_operand (operands[1], mode))
++    {
++      if (aligned_memory_operand (operands[1], mode))
++	{
++	  if (reload_in_progress)
++	    {
++	      seq = gen_reload_in_aligned (mode, operands[0], operands[1]);
++	      emit_insn (seq);
++	    }
++	  else
++	    {
++	      rtx aligned_mem, bitnum;
++	      rtx scratch = gen_reg_rtx (SImode);
++	      rtx subtarget;
++	      bool copyout;
++
++	      get_aligned_mem (operands[1], &aligned_mem, &bitnum);
++
++	      subtarget = operands[0];
++	      if (REG_P (subtarget))
++		subtarget = gen_lowpart (DImode, subtarget), copyout = false;
++	      else
++		subtarget = gen_reg_rtx (DImode), copyout = true;
++
++	      if (mode == QImode)
++		seq = gen_aligned_loadqi (subtarget, aligned_mem, bitnum,
++					  scratch);
++	      else
++		seq = gen_aligned_loadhi (subtarget, aligned_mem, bitnum,
++					  scratch);
++	      emit_insn (seq);
++
++	      if (copyout)
++		emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
++	    }
++	}
++      else
++	{
++	  /* Don't pass these as parameters since that makes the generated
++	     code depend on parameter evaluation order which will cause
++	     bootstrap failures.  */
++
++	  rtx temp1, temp2, subtarget, ua;
++	  bool copyout;
++
++	  temp1 = gen_reg_rtx (DImode);
++	  temp2 = gen_reg_rtx (DImode);
++
++	  subtarget = operands[0];
++	  if (REG_P (subtarget))
++	    subtarget = gen_lowpart (DImode, subtarget), copyout = false;
++	  else
++	    subtarget = gen_reg_rtx (DImode), copyout = true;
++
++	  ua = get_unaligned_address (operands[1]);
++	  if (mode == QImode)
++	    seq = gen_unaligned_loadqi (subtarget, ua, temp1, temp2);
++	  else
++	    seq = gen_unaligned_loadhi (subtarget, ua, temp1, temp2);
++
++	  sw_64_set_memflags (seq, operands[1]);
++	  emit_insn (seq);
++
++	  if (copyout)
++	    emit_move_insn (operands[0], gen_lowpart (mode, subtarget));
++	}
++      return true;
++    }
++
++  if (any_memory_operand (operands[0], mode))
++    {
++      if (aligned_memory_operand (operands[0], mode))
++	{
++	  rtx aligned_mem, bitnum;
++	  rtx temp1 = gen_reg_rtx (SImode);
++	  rtx temp2 = gen_reg_rtx (SImode);
++
++	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
++
++	  emit_insn (
++	    gen_aligned_store (aligned_mem, operands[1], bitnum, temp1, temp2));
++	}
++      else
++	{
++	  rtx temp1 = gen_reg_rtx (DImode);
++	  rtx temp2 = gen_reg_rtx (DImode);
++	  rtx temp3 = gen_reg_rtx (DImode);
++	  rtx ua = get_unaligned_address (operands[0]);
++
++	  seq
++	    = gen_unaligned_store (mode, ua, operands[1], temp1, temp2, temp3);
++
++	  sw_64_set_memflags (seq, operands[0]);
++	  emit_insn (seq);
++	}
++      return true;
++    }
++
++  return false;
++}
++
++/* Implement the movmisalign patterns.  One of the operands is a memory
++   that is not naturally aligned.  Emit instructions to load it.  */
++
++void
++sw_64_expand_movmisalign (machine_mode mode, rtx *operands)
++{
++  /* Honor misaligned loads, for those we promised to do so.  */
++  if (MEM_P (operands[1]))
++    {
++      rtx tmp;
++
++      if (register_operand (operands[0], mode))
++	tmp = operands[0];
++      else
++	tmp = gen_reg_rtx (mode);
++
++      sw_64_expand_unaligned_load (tmp, operands[1], 8, 0, 0);
++      if (tmp != operands[0])
++	emit_move_insn (operands[0], tmp);
++    }
++  else if (MEM_P (operands[0]))
++    {
++      if (!reg_or_0_operand (operands[1], mode))
++	operands[1] = force_reg (mode, operands[1]);
++      sw_64_expand_unaligned_store (operands[0], operands[1], 8, 0);
++    }
++  else
++    gcc_unreachable ();
++}
++
++/* Generate an unsigned DImode to FP conversion.  This is the same code
++   optabs would emit if we didn't have TFmode patterns.
++
++   For SFmode, this is the only construction I've found that can pass
++   gcc.c-torture/execute/ieee/rbug.c.  No scenario that uses DFmode
++   intermediates will work, because you'll get intermediate rounding
++   that ruins the end result.  Some of this could be fixed by turning
++   on round-to-positive-infinity, but that requires diddling the fpsr,
++   which kills performance.  I tried turning this around and converting
++   to a negative number, so that I could turn on /m, but either I did
++   it wrong or there's something else cause I wound up with the exact
++   same single-bit error.  There is a branch-less form of this same code:
++
++	srl     $16,1,$1
++	and     $16,1,$2
++	cmplt   $16,0,$3
++	or      $1,$2,$2
++	selge   $16,$16,$2
++	ifmovd	$3,$f10
++	ifmovd	$2,$f11
++	fcvtlf  $f11,$f11
++	fadds   $f11,$f11,$f0
++	fseleq $f10,$f11,$f0
++
++   I'm not using it because it's the same number of instructions as
++   this branch-full form, and it has more serialized long latency
++   instructions on the critical path.
++
++   For DFmode, we can avoid rounding errors by breaking up the word
++   into two pieces, converting them separately, and adding them back:
++
++   LC0: .long 0,0x5f800000
++
++	ifmovd	$16,$f11
++	ldi	$2,LC0
++	cmplt	$16,0,$1
++	fcpyse	$f11,$f31,$f10
++	fcpyse	$f31,$f11,$f11
++	s4addw	$1,$2,$1
++	lds	$f12,0($1)
++	fcvtls	$f10,$f10
++	fcvtls	$f11,$f11
++	faddd	$f12,$f10,$f0
++	faddd	$f0,$f11,$f0
++
++   This doesn't seem to be a clear-cut win over the optabs form.
++   It probably all depends on the distribution of numbers being
++   converted -- in the optabs form, all but high-bit-set has a
++   much lower minimum execution time.  */
++
++void
++sw_64_emit_floatuns (rtx operands[2])
++{
++  rtx neglab, donelab, i0, i1, f0, in, out;
++  machine_mode mode;
++
++  out = operands[0];
++  in = force_reg (DImode, operands[1]);
++  mode = GET_MODE (out);
++  neglab = gen_label_rtx ();
++  donelab = gen_label_rtx ();
++  i0 = gen_reg_rtx (DImode);
++  i1 = gen_reg_rtx (DImode);
++  f0 = gen_reg_rtx (mode);
++
++  emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
++
++  emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
++  emit_jump_insn (gen_jump (donelab));
++  emit_barrier ();
++
++  emit_label (neglab);
++
++  emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
++  emit_insn (gen_anddi3 (i1, in, const1_rtx));
++  emit_insn (gen_iordi3 (i0, i0, i1));
++  emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
++  emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
++
++  emit_label (donelab);
++}
++
++/* Generate the comparison for a conditional branch.  */
++
++void
++sw_64_emit_conditional_branch (rtx operands[], machine_mode cmp_mode)
++{
++  enum rtx_code cmp_code, branch_code;
++  machine_mode branch_mode = VOIDmode;
++  enum rtx_code code = GET_CODE (operands[0]);
++  rtx op0 = operands[1], op1 = operands[2];
++  rtx tem;
++
++  if (cmp_mode == TFmode)
++    {
++      op0 = sw_64_emit_xfloating_compare (&code, op0, op1);
++      op1 = const0_rtx;
++      cmp_mode = DImode;
++    }
++
++  /* The general case: fold the comparison code to the types of compares
++     that we have, choosing the branch as necessary.  */
++  switch (code)
++    {
++    case EQ:
++    case LE:
++    case LT:
++    case LEU:
++    case LTU:
++    case UNORDERED:
++      /* We have these compares.  */
++      cmp_code = code, branch_code = NE;
++      break;
++
++    case NE:
++    case ORDERED:
++      /* These must be reversed.  */
++      cmp_code = reverse_condition (code), branch_code = EQ;
++      break;
++
++    case GE:
++    case GT:
++    case GEU:
++    case GTU:
++      /* For FP, we swap them, for INT, we reverse them.  */
++      if (cmp_mode == DFmode || (cmp_mode == SFmode && flag_sw_sf_cmpsel))
++	{
++	  cmp_code = swap_condition (code);
++	  branch_code = NE;
++	  std::swap (op0, op1);
++	}
++      else
++	{
++	  cmp_code = reverse_condition (code);
++	  branch_code = EQ;
++	}
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (cmp_mode == DFmode)
++    {
++      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
++	{
++	  /* When we are not as concerned about non-finite values, and we
++	     are comparing against zero, we can branch directly.  */
++	  if (op1 == CONST0_RTX (DFmode))
++	    cmp_code = UNKNOWN, branch_code = code;
++	  else if (op0 == CONST0_RTX (DFmode))
++	    {
++	      /* Undo the swap we probably did just above.  */
++	      std::swap (op0, op1);
++	      branch_code = swap_condition (cmp_code);
++	      cmp_code = UNKNOWN;
++	    }
++	}
++      else
++	{
++	  /* ??? We mark the branch mode to be CCmode to prevent the
++	     compare and branch from being combined, since the compare
++	     insn follows IEEE rules that the branch does not.  */
++	  branch_mode = CCmode;
++	}
++    }
++  else if (cmp_mode == SFmode && flag_sw_sf_cmpsel)
++    {
++      if (flag_unsafe_math_optimizations && cmp_code != UNORDERED)
++	{
++	  /* When we are not as concerned about non-finite values, and we
++	     are comparing against zero, we can branch directly.  */
++	  if (op1 == CONST0_RTX (SFmode))
++	    cmp_code = UNKNOWN, branch_code = code;
++	  else if (op0 == CONST0_RTX (SFmode))
++	    {
++	      /* Undo the swap we probably did just above.  */
++	      std::swap (op0, op1);
++	      branch_code = swap_condition (cmp_code);
++	      cmp_code = UNKNOWN;
++	    }
++	}
++      else
++	{
++	  /* ??? We mark the branch mode to be CCmode to prevent the
++	     compare and branch from being combined, since the compare
++	     insn follows IEEE rules that the branch does not.  */
++	  branch_mode = CCmode;
++	}
++    }
++  else
++    {
++      /* The following optimizations are only for signed compares.  */
++      if (code != LEU && code != LTU && code != GEU && code != GTU)
++	{
++	  /* Whee.  Compare and branch against 0 directly.  */
++	  if (op1 == const0_rtx)
++	    cmp_code = UNKNOWN, branch_code = code;
++
++	  /* If the constants doesn't fit into an immediate, but can
++	     be generated by ldi/ldih, we adjust the argument and
++	     compare against zero, so we can use beq/bne directly.  */
++	  /* ??? Don't do this when comparing against symbols, otherwise
++	     we'll reduce (&x == 0x1234) to (&x-0x1234 == 0), which will
++	     be declared false out of hand (at least for non-weak).  */
++	  else if (CONST_INT_P (op1) && (code == EQ || code == NE)
++		   && !(symbolic_operand (op0, VOIDmode)
++			|| (REG_P (op0) && REG_POINTER (op0))))
++	    {
++	      rtx n_op1 = GEN_INT (-INTVAL (op1));
++
++	      if (!satisfies_constraint_I (op1)
++		  && (satisfies_constraint_K (n_op1)
++		      || satisfies_constraint_L (n_op1)))
++		cmp_code = PLUS, branch_code = code, op1 = n_op1;
++	    }
++	}
++
++      if (!reg_or_0_operand (op0, DImode))
++	op0 = force_reg (DImode, op0);
++      if (cmp_code != PLUS && !reg_or_8bit_operand (op1, DImode))
++	op1 = force_reg (DImode, op1);
++    }
++
++  /* Emit an initial compare instruction, if necessary.  */
++  tem = op0;
++  if (cmp_code != UNKNOWN)
++    {
++      tem = gen_reg_rtx (cmp_mode);
++      emit_move_insn (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1));
++    }
++
++  /* Emit the branch instruction.  */
++  tem = gen_rtx_SET (
++    pc_rtx,
++    gen_rtx_IF_THEN_ELSE (VOIDmode,
++			  gen_rtx_fmt_ee (branch_code, branch_mode, tem,
++					  CONST0_RTX (cmp_mode)),
++			  gen_rtx_LABEL_REF (VOIDmode, operands[3]), pc_rtx));
++  emit_jump_insn (tem);
++}
++
++/* Certain simplifications can be done to make invalid setcc operations
++   valid.  Return the final comparison, or NULL if we can't work.  */
++
++bool
++sw_64_emit_setcc (rtx operands[], machine_mode cmp_mode)
++{
++  enum rtx_code cmp_code;
++  enum rtx_code code = GET_CODE (operands[1]);
++  rtx op0 = operands[2], op1 = operands[3];
++  rtx tmp;
++
++  if (cmp_mode == TFmode)
++    {
++      op0 = sw_64_emit_xfloating_compare (&code, op0, op1);
++      op1 = const0_rtx;
++      cmp_mode = DImode;
++    }
++
++  if (cmp_mode == DFmode && !TARGET_FIX)
++    return 0;
++
++  /* The general case: fold the comparison code to the types of compares
++     that we have, choosing the branch as necessary.  */
++
++  cmp_code = UNKNOWN;
++  switch (code)
++    {
++    case EQ:
++    case LE:
++    case LT:
++    case LEU:
++    case LTU:
++    case UNORDERED:
++      /* We have these compares.  */
++      if (cmp_mode == DFmode)
++	cmp_code = code, code = NE;
++      break;
++
++    case NE:
++      if (cmp_mode == DImode && op1 == const0_rtx)
++	break;
++      /* FALLTHRU */
++
++    case ORDERED:
++      cmp_code = reverse_condition (code);
++      code = EQ;
++      break;
++
++    case GE:
++    case GT:
++    case GEU:
++    case GTU:
++      /* These normally need swapping, but for integer zero we have
++	 special patterns that recognize swapped operands.  */
++      if (cmp_mode == DImode && op1 == const0_rtx)
++	break;
++      code = swap_condition (code);
++      if (cmp_mode == DFmode)
++	cmp_code = code, code = NE;
++      std::swap (op0, op1);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (cmp_mode == DImode)
++    {
++      if (!register_operand (op0, DImode))
++	op0 = force_reg (DImode, op0);
++      if (!reg_or_8bit_operand (op1, DImode))
++	op1 = force_reg (DImode, op1);
++    }
++
++  /* Emit an initial compare instruction, if necessary.  */
++  if (cmp_code != UNKNOWN)
++    {
++      tmp = gen_reg_rtx (cmp_mode);
++      emit_insn (
++	gen_rtx_SET (tmp, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
++
++      op0 = cmp_mode != DImode ? gen_lowpart (DImode, tmp) : tmp;
++      op1 = const0_rtx;
++    }
++
++  /* Emit the setcc instruction.  */
++  emit_insn (
++    gen_rtx_SET (operands[0], gen_rtx_fmt_ee (code, DImode, op0, op1)));
++  return true;
++}
++
++/* Rewrite a comparison against zero CMP of the form
++   (CODE (cc0) (const_int 0)) so it can be written validly in
++   a conditional move (if_then_else CMP ...).
++   If both of the operands that set cc0 are nonzero we must emit
++   an insn to perform the compare (it can't be done within
++   the conditional move).  */
++
++rtx
++sw_64_emit_conditional_move (rtx cmp, machine_mode mode)
++{
++  enum rtx_code code = GET_CODE (cmp);
++  enum rtx_code cmov_code = NE;
++  rtx op0 = XEXP (cmp, 0);
++  rtx op1 = XEXP (cmp, 1);
++  machine_mode cmp_mode
++    = (GET_MODE (op0) == VOIDmode ? DImode : GET_MODE (op0));
++  machine_mode cmov_mode = VOIDmode;
++  int local_fast_math = flag_unsafe_math_optimizations;
++  rtx tem;
++
++  if (cmp_mode == TFmode)
++    {
++      op0 = sw_64_emit_xfloating_compare (&code, op0, op1);
++      op1 = const0_rtx;
++      cmp_mode = DImode;
++    }
++
++  gcc_assert (cmp_mode == DFmode || cmp_mode == DImode || cmp_mode == SFmode);
++
++  if (FLOAT_MODE_P (cmp_mode) != FLOAT_MODE_P (mode))
++    {
++      enum rtx_code cmp_code;
++
++      if (!TARGET_FIX)
++	return 0;
++
++      /* If we have fp<->int register move instructions, do a cmov by
++	 performing the comparison in fp registers, and move the
++	 zero/nonzero value to integer registers, where we can then
++	 use a normal cmov, or vice-versa.  */
++
++      switch (code)
++	{
++	case EQ:
++	case LE:
++	case LT:
++	case LEU:
++	case LTU:
++	case UNORDERED:
++	  /* We have these compares.  */
++	  cmp_code = code, code = NE;
++	  break;
++
++	case NE:
++	case ORDERED:
++	  /* These must be reversed.  */
++	  cmp_code = reverse_condition (code), code = EQ;
++	  break;
++
++	case GE:
++	case GT:
++	case GEU:
++	case GTU:
++	  /* These normally need swapping, but for integer zero we have
++	     special patterns that recognize swapped operands.  */
++	  if (cmp_mode == DImode && op1 == const0_rtx)
++	    cmp_code = code, code = NE;
++	  else
++	    {
++	      cmp_code = swap_condition (code);
++	      code = NE;
++	      std::swap (op0, op1);
++	    }
++	  break;
++
++	default:
++	  gcc_unreachable ();
++	}
++
++      if (cmp_mode == DImode)
++	{
++	  if (!reg_or_0_operand (op0, DImode))
++	    op0 = force_reg (DImode, op0);
++	  if (!reg_or_8bit_operand (op1, DImode))
++	    op1 = force_reg (DImode, op1);
++	}
++
++      tem = gen_reg_rtx (cmp_mode);
++      emit_insn (
++	gen_rtx_SET (tem, gen_rtx_fmt_ee (cmp_code, cmp_mode, op0, op1)));
++
++      cmp_mode = cmp_mode == DImode ? E_DFmode : E_DImode;
++      op0 = gen_lowpart (cmp_mode, tem);
++      op1 = CONST0_RTX (cmp_mode);
++      cmp = gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
++      local_fast_math = 1;
++    }
++
++  if (cmp_mode == DImode)
++    {
++      if (!reg_or_0_operand (op0, DImode))
++	op0 = force_reg (DImode, op0);
++      if (!reg_or_8bit_operand (op1, DImode))
++	op1 = force_reg (DImode, op1);
++    }
++
++  /* We may be able to use a conditional move directly.
++     This avoids emitting spurious compares.  */
++  if (signed_comparison_operator (cmp, VOIDmode)
++      && (cmp_mode == DImode || local_fast_math)
++      && (op0 == CONST0_RTX (cmp_mode) || op1 == CONST0_RTX (cmp_mode)))
++    return gen_rtx_fmt_ee (code, VOIDmode, op0, op1);
++
++  /* We can't put the comparison inside the conditional move;
++     emit a compare instruction and put that inside the
++     conditional move.  Make sure we emit only comparisons we have;
++     swap or reverse as necessary.  */
++
++  if (!can_create_pseudo_p ())
++    return NULL_RTX;
++
++  switch (code)
++    {
++    case EQ:
++    case LE:
++    case LT:
++    case LEU:
++    case LTU:
++    case UNORDERED:
++      /* We have these compares: */
++      break;
++
++    case NE:
++    case ORDERED:
++      /* These must be reversed.  */
++      code = reverse_condition (code);
++      cmov_code = EQ;
++      break;
++
++    case GE:
++    case GT:
++    case GEU:
++    case GTU:
++      /* These normally need swapping, but for integer zero we have
++	 special patterns that recognize swapped operands.  */
++      if (cmp_mode == DImode && op1 == const0_rtx)
++	break;
++      code = swap_condition (code);
++      std::swap (op0, op1);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (cmp_mode == DImode)
++    {
++      if (!reg_or_0_operand (op0, DImode))
++	op0 = force_reg (DImode, op0);
++      if (!reg_or_8bit_operand (op1, DImode))
++	op1 = force_reg (DImode, op1);
++    }
++
++  /* ??? We mark the branch mode to be CCmode to prevent the compare
++     and cmov from being combined, since the compare insn follows IEEE
++     rules that the cmov does not.  */
++  if (cmp_mode == DFmode && !local_fast_math)
++    cmov_mode = CCmode;
++
++  tem = gen_reg_rtx (cmp_mode);
++  emit_move_insn (tem, gen_rtx_fmt_ee (code, cmp_mode, op0, op1));
++  return gen_rtx_fmt_ee (cmov_code, cmov_mode, tem, CONST0_RTX (cmp_mode));
++}
++
++/* Simplify a conditional move of two constants into a setcc with
++   arithmetic.  This is done with a splitter since combine would
++   just undo the work if done during code generation.  It also catches
++   cases we wouldn't have before cse.  */
++
++int
++sw_64_split_conditional_move (enum rtx_code code, rtx dest, rtx cond, rtx t_rtx,
++			      rtx f_rtx)
++{
++  HOST_WIDE_INT t, f, diff;
++  machine_mode mode;
++  rtx target, subtarget, tmp;
++
++  mode = GET_MODE (dest);
++  t = INTVAL (t_rtx);
++  f = INTVAL (f_rtx);
++  diff = t - f;
++
++  if (((code == NE || code == EQ) && diff < 0) || (code == GE || code == GT))
++    {
++      code = reverse_condition (code);
++      std::swap (t, f);
++      diff = -diff;
++    }
++
++  subtarget = target = dest;
++  if (mode != DImode)
++    {
++      target = gen_lowpart (DImode, dest);
++      if (can_create_pseudo_p ())
++	subtarget = gen_reg_rtx (DImode);
++      else
++	subtarget = target;
++    }
++  /* Below, we must be careful to use copy_rtx on target and subtarget
++     in intermediate insns, as they may be a subreg rtx, which may not
++     be shared.  */
++
++  if (f == 0
++      && exact_log2 (diff) > 0
++      /* On SW6, we've got enough shifters to make non-arithmetic shifts
++	 viable over a longer latency cmove.  */
++      && (diff <= 8 || sw_64_tune == PROCESSOR_SW6
++	  || sw_64_tune == PROCESSOR_SW8))
++    {
++      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
++      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
++
++      tmp = gen_rtx_ASHIFT (DImode, copy_rtx (subtarget),
++			    GEN_INT (exact_log2 (t)));
++      emit_insn (gen_rtx_SET (target, tmp));
++    }
++  else if (f == 0 && t == -1)
++    {
++      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
++      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
++
++      emit_insn (gen_negdi2 (target, copy_rtx (subtarget)));
++    }
++  else if (diff == 1 || diff == 4 || diff == 8)
++    {
++      rtx add_op;
++
++      tmp = gen_rtx_fmt_ee (code, DImode, cond, const0_rtx);
++      emit_insn (gen_rtx_SET (copy_rtx (subtarget), tmp));
++
++      if (diff == 1)
++	emit_insn (gen_adddi3 (target, copy_rtx (subtarget), GEN_INT (f)));
++      else
++	{
++	  add_op = GEN_INT (f);
++	  if (sext_add_operand (add_op, mode))
++	    {
++	      // in sw_64 sxsubw is ra*x + rb;
++	      tmp = gen_rtx_MULT (DImode, copy_rtx (subtarget), GEN_INT (diff));
++	      tmp = gen_rtx_PLUS (DImode, tmp, add_op);
++	      emit_insn (gen_rtx_SET (target, tmp));
++	    }
++	  else
++	    return 0;
++	}
++    }
++  else
++    return 0;
++
++  return 1;
++}
++
++/* Look up the function X_floating library function name for the
++   given operation.  */
++
++struct GTY (()) xfloating_op
++{
++  const enum rtx_code code;
++  const char *const GTY ((skip)) osf_func;
++  const char *const GTY ((skip)) vms_func;
++  rtx libcall;
++};
++
++static GTY (()) struct xfloating_op xfloating_ops[]
++  = {{PLUS, "_OtsAddX", "OTS$ADD_X", 0},
++     {MINUS, "_OtsSubX", "OTS$SUB_X", 0},
++     {MULT, "_OtsMulX", "OTS$MUL_X", 0},
++     {DIV, "_OtsDivX", "OTS$DIV_X", 0},
++     {EQ, "_OtsEqlX", "OTS$EQL_X", 0},
++     {NE, "_OtsNeqX", "OTS$NEQ_X", 0},
++     {LT, "_OtsLssX", "OTS$LSS_X", 0},
++     {LE, "_OtsLeqX", "OTS$LEQ_X", 0},
++     {GT, "_OtsGtrX", "OTS$GTR_X", 0},
++     {GE, "_OtsGeqX", "OTS$GEQ_X", 0},
++     {FIX, "_OtsCvtXQ", "OTS$CVTXQ", 0},
++     {FLOAT, "_OtsCvtQX", "OTS$CVTQX", 0},
++     {UNSIGNED_FLOAT, "_OtsCvtQUX", "OTS$CVTQUX", 0},
++     {FLOAT_EXTEND, "_OtsConvertFloatTX", "OTS$CVT_FLOAT_T_X", 0},
++     {FLOAT_TRUNCATE, "_OtsConvertFloatXT", "OTS$CVT_FLOAT_X_T", 0}};
++
++static GTY (()) struct xfloating_op vax_cvt_ops[]
++  = {{FLOAT_EXTEND, "_OtsConvertFloatGX", "OTS$CVT_FLOAT_G_X", 0},
++     {FLOAT_TRUNCATE, "_OtsConvertFloatXG", "OTS$CVT_FLOAT_X_G", 0}};
++
++static rtx
++sw_64_lookup_xfloating_lib_func (enum rtx_code code)
++{
++  struct xfloating_op *ops = xfloating_ops;
++  long n = ARRAY_SIZE (xfloating_ops);
++  long i;
++
++  gcc_assert (TARGET_HAS_XFLOATING_LIBS);
++
++  /* How irritating.  Nothing to key off for the main table.  */
++  if (TARGET_FLOAT_VAX && (code == FLOAT_EXTEND || code == FLOAT_TRUNCATE))
++    {
++      ops = vax_cvt_ops;
++      n = ARRAY_SIZE (vax_cvt_ops);
++    }
++
++  for (i = 0; i < n; ++i, ++ops)
++    if (ops->code == code)
++      {
++	rtx func = ops->libcall;
++	if (!func)
++	  {
++	    func = init_one_libfunc (ops->osf_func);
++	    ops->libcall = func;
++	  }
++	return func;
++      }
++
++  gcc_unreachable ();
++}
++
++/* Most X_floating operations take the rounding mode as an argument.
++   Compute that here.  */
++
++static int
++sw_64_compute_xfloating_mode_arg (enum rtx_code code,
++				  enum sw_64_fp_rounding_mode round)
++{
++  int mode;
++
++  switch (round)
++    {
++    case SW_64_FPRM_NORM:
++      mode = 2;
++      break;
++    case SW_64_FPRM_MINF:
++      mode = 1;
++      break;
++    case SW_64_FPRM_CHOP:
++      mode = 0;
++      break;
++    case SW_64_FPRM_DYN:
++      mode = 4;
++      break;
++    default:
++      gcc_unreachable ();
++
++      /* XXX For reference, round to +inf is mode = 3.  */
++    }
++
++  if (code == FLOAT_TRUNCATE && sw_64_fptm == SW_64_FPTM_N)
++    mode |= 0x10000;
++
++  return mode;
++}
++
++/* Emit an X_floating library function call.
++
++   Note that these functions do not follow normal calling conventions:
++   TFmode arguments are passed in two integer registers (as opposed to
++   indirect); TFmode return values appear in R16+R17.
++
++   FUNC is the function to call.
++   TARGET is where the output belongs.
++   OPERANDS are the inputs.
++   NOPERANDS is the count of inputs.
++   EQUIV is the expression equivalent for the function.
++*/
++
++static void
++sw_64_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
++			      int noperands, rtx equiv)
++{
++  rtx usage = NULL_RTX, reg;
++  int regno = 16, i;
++
++  start_sequence ();
++
++  for (i = 0; i < noperands; ++i)
++    {
++      switch (GET_MODE (operands[i]))
++	{
++	case E_TFmode:
++	  reg = gen_rtx_REG (TFmode, regno);
++	  regno += 2;
++	  break;
++
++	case E_DFmode:
++	  reg = gen_rtx_REG (DFmode, regno + 32);
++	  regno += 1;
++	  break;
++
++	case E_VOIDmode:
++	  gcc_assert (CONST_INT_P (operands[i]));
++	  /* FALLTHRU */
++	case E_DImode:
++	  reg = gen_rtx_REG (DImode, regno);
++	  regno += 1;
++	  break;
++
++	default:
++	  gcc_unreachable ();
++	}
++
++      emit_move_insn (reg, operands[i]);
++      use_reg (&usage, reg);
++    }
++
++  switch (GET_MODE (target))
++    {
++    case E_TFmode:
++      reg = gen_rtx_REG (TFmode, 16);
++      break;
++    case E_DFmode:
++      reg = gen_rtx_REG (DFmode, 32);
++      break;
++    case E_DImode:
++      reg = gen_rtx_REG (DImode, 0);
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  rtx mem = gen_rtx_MEM (QImode, func);
++  rtx_insn *tmp = emit_call_insn (
++    gen_call_value (reg, mem, const0_rtx, const0_rtx, const0_rtx));
++  CALL_INSN_FUNCTION_USAGE (tmp) = usage;
++  RTL_CONST_CALL_P (tmp) = 1;
++
++  tmp = get_insns ();
++  end_sequence ();
++
++  emit_libcall_block (tmp, target, reg, equiv);
++}
++
++/* Emit an X_floating library function call for arithmetic (+,-,*,/).  */
++
++void
++sw_64_emit_xfloating_arith (enum rtx_code code, rtx operands[])
++{
++  rtx func;
++  int mode;
++  rtx out_operands[3];
++
++  func = sw_64_lookup_xfloating_lib_func (code);
++  mode = sw_64_compute_xfloating_mode_arg (code, sw_64_fprm);
++
++  out_operands[0] = operands[1];
++  out_operands[1] = operands[2];
++  out_operands[2] = GEN_INT (mode);
++  sw_64_emit_xfloating_libcall (func, operands[0], out_operands, 3,
++				gen_rtx_fmt_ee (code, TFmode, operands[1],
++						operands[2]));
++}
++
++/* Emit an X_floating library function call for a comparison.  */
++
++static rtx
++sw_64_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
++{
++  enum rtx_code cmp_code, res_code;
++  rtx func, out, operands[2], note;
++
++  /* X_floating library comparison functions return
++	   -1  unordered
++	    0  false
++	    1  true
++     Convert the compare against the raw return value.  */
++
++  cmp_code = *pcode;
++  switch (cmp_code)
++    {
++    case UNORDERED:
++      cmp_code = EQ;
++      res_code = LT;
++      break;
++    case ORDERED:
++      cmp_code = EQ;
++      res_code = GE;
++      break;
++    case NE:
++      res_code = NE;
++      break;
++    case EQ:
++    case LT:
++    case GT:
++    case LE:
++    case GE:
++      res_code = GT;
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  *pcode = res_code;
++
++  func = sw_64_lookup_xfloating_lib_func (cmp_code);
++
++  operands[0] = op0;
++  operands[1] = op1;
++  out = gen_reg_rtx (DImode);
++
++  /* What's actually returned is -1,0,1, not a proper boolean value.  */
++  note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
++  note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
++  sw_64_emit_xfloating_libcall (func, out, operands, 2, note);
++
++  return out;
++}
++
++/* Emit an X_floating library function call for a conversion.  */
++
++void
++sw_64_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
++{
++  int noperands = 1, mode;
++  rtx out_operands[2];
++  rtx func;
++  enum rtx_code code = orig_code;
++
++  if (code == UNSIGNED_FIX)
++    code = FIX;
++
++  func = sw_64_lookup_xfloating_lib_func (code);
++
++  out_operands[0] = operands[1];
++
++  switch (code)
++    {
++    case FIX:
++      mode = sw_64_compute_xfloating_mode_arg (code, SW_64_FPRM_CHOP);
++      out_operands[1] = GEN_INT (mode);
++      noperands = 2;
++      break;
++    case FLOAT_TRUNCATE:
++      mode = sw_64_compute_xfloating_mode_arg (code, sw_64_fprm);
++      out_operands[1] = GEN_INT (mode);
++      noperands = 2;
++      break;
++    default:
++      break;
++    }
++
++  sw_64_emit_xfloating_libcall (func, operands[0], out_operands, noperands,
++				gen_rtx_fmt_e (orig_code,
++					       GET_MODE (operands[0]),
++					       operands[1]));
++}
++
++/* Split a TImode or TFmode move from OP[1] to OP[0] into a pair of
++   DImode moves from OP[2,3] to OP[0,1].  If FIXUP_OVERLAP is true,
++   guarantee that the sequence
++     set (OP[0] OP[2])
++     set (OP[1] OP[3])
++   is valid.  Naturally, output operand ordering is little-endian.
++   This is used by *movtf_internal and *movti_internal.  */
++
++void
++sw_64_split_tmode_pair (rtx operands[4], machine_mode mode, bool fixup_overlap)
++{
++  switch (GET_CODE (operands[1]))
++    {
++    case REG:
++      operands[3] = gen_rtx_REG (DImode, REGNO (operands[1]) + 1);
++      operands[2] = gen_rtx_REG (DImode, REGNO (operands[1]));
++      break;
++
++    case MEM:
++      operands[3] = adjust_address (operands[1], DImode, 8);
++      operands[2] = adjust_address (operands[1], DImode, 0);
++      break;
++
++    CASE_CONST_SCALAR_INT:
++    case CONST_DOUBLE:
++      gcc_assert (operands[1] == CONST0_RTX (mode));
++      operands[2] = operands[3] = const0_rtx;
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  switch (GET_CODE (operands[0]))
++    {
++    case REG:
++      operands[1] = gen_rtx_REG (DImode, REGNO (operands[0]) + 1);
++      operands[0] = gen_rtx_REG (DImode, REGNO (operands[0]));
++      break;
++
++    case MEM:
++      operands[1] = adjust_address (operands[0], DImode, 8);
++      operands[0] = adjust_address (operands[0], DImode, 0);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  if (fixup_overlap && reg_overlap_mentioned_p (operands[0], operands[3]))
++    {
++      std::swap (operands[0], operands[1]);
++      std::swap (operands[2], operands[3]);
++    }
++}
++
++/* Implement negtf2 or abstf2.  Op0 is destination, op1 is source,
++   op2 is a register containing the sign bit, operation is the
++   logical operation to be performed.  */
++
++void
++sw_64_split_tfmode_frobsign (rtx operands[3], rtx (*operation) (rtx, rtx, rtx))
++{
++  rtx high_bit = operands[2];
++  rtx scratch;
++  int move;
++
++  sw_64_split_tmode_pair (operands, TFmode, false);
++
++  /* Detect three flavors of operand overlap.  */
++  move = 1;
++  if (rtx_equal_p (operands[0], operands[2]))
++    move = 0;
++  else if (rtx_equal_p (operands[1], operands[2]))
++    {
++      if (rtx_equal_p (operands[0], high_bit))
++	move = 2;
++      else
++	move = -1;
++    }
++
++  if (move < 0)
++    emit_move_insn (operands[0], operands[2]);
++
++  /* ??? If the destination overlaps both source tf and high_bit, then
++     assume source tf is dead in its entirety and use the other half
++     for a scratch register.  Otherwise "scratch" is just the proper
++     destination register.  */
++  scratch = operands[move < 2 ? 1 : 3];
++
++  emit_insn ((*operation) (scratch, high_bit, operands[3]));
++
++  if (move > 0)
++    {
++      emit_move_insn (operands[0], operands[2]);
++      if (move > 1)
++	emit_move_insn (operands[1], scratch);
++    }
++}
++
++/* Use ext[wlq][lh] as the Architecture Handbook describes for extracting
++   unaligned data:
++
++	   unsigned:		       signed:
++   word:   ldl_u  r1,X(r11)		ldl_u  r1,X(r11)
++	   ldl_u  r2,X+1(r11)		ldl_u  r2,X+1(r11)
++	   ldi    r3,X(r11)		ldi    r3,X+2(r11)
++	   exthl  r1,r3,r1		extll  r1,r3,r1
++	   exthh  r2,r3,r2		extlh  r2,r3,r2
++	   or     r1.r2.r1		or     r1,r2,r1
++					sra    r1,48,r1
++
++   long:   ldl_u  r1,X(r11)		ldl_u  r1,X(r11)
++	   ldl_u  r2,X+3(r11)		ldl_u  r2,X+3(r11)
++	   ldi    r3,X(r11)		ldi    r3,X(r11)
++	   extll  r1,r3,r1		extll  r1,r3,r1
++	   extlh  r2,r3,r2		extlh  r2,r3,r2
++	   or     r1.r2.r1		addl   r1,r2,r1
++
++   quad:   ldl_u  r1,X(r11)
++	   ldl_u  r2,X+7(r11)
++	   ldi    r3,X(r11)
++	   extll  r1,r3,r1
++	   extlh  r2,r3,r2
++	   or     r1.r2.r1
++*/
++
++void
++sw_64_expand_unaligned_load (rtx tgt, rtx mem, HOST_WIDE_INT size,
++			     HOST_WIDE_INT ofs, int sign)
++{
++  rtx meml, memh, addr, extl, exth, tmp, mema;
++  machine_mode mode;
++
++  if (TARGET_BWX && size == 2)
++    {
++      meml = adjust_address (mem, QImode, ofs);
++      memh = adjust_address (mem, QImode, ofs + 1);
++      extl = gen_reg_rtx (DImode);
++      exth = gen_reg_rtx (DImode);
++      emit_insn (gen_zero_extendqidi2 (extl, meml));
++      emit_insn (gen_zero_extendqidi2 (exth, memh));
++      exth = expand_simple_binop (DImode, ASHIFT, exth, GEN_INT (8), NULL, 1,
++				  OPTAB_LIB_WIDEN);
++      addr = expand_simple_binop (DImode, IOR, extl, exth, NULL, 1,
++				  OPTAB_LIB_WIDEN);
++
++      if (sign && GET_MODE (tgt) != HImode)
++	{
++	  addr = gen_lowpart (HImode, addr);
++	  emit_insn (gen_extend_insn (tgt, addr, GET_MODE (tgt), HImode, 0));
++	}
++      else
++	{
++	  if (GET_MODE (tgt) != DImode)
++	    addr = gen_lowpart (GET_MODE (tgt), addr);
++	  emit_move_insn (tgt, addr);
++	}
++      return;
++    }
++
++  meml = gen_reg_rtx (Pmode);
++  memh = gen_reg_rtx (Pmode);
++  addr = gen_reg_rtx (Pmode);
++  extl = gen_reg_rtx (Pmode);
++  exth = gen_reg_rtx (Pmode);
++
++  mema = XEXP (mem, 0);
++  rtx mema_const, mema_ptr;
++  if (GET_CODE (mema) == LO_SUM)
++    mema = force_reg (Pmode, mema);
++
++  // TODO: split const ptr
++  if (GET_CODE (mema) == PLUS)
++    {
++      mema_ptr = XEXP (mema, 0);
++      mema_const = XEXP (mema, 1);
++    }
++  /* AND addresses cannot be in any alias set, since they may implicitly
++     alias surrounding code.  Ideally we'd have some alias set that
++     covered all types except those with alignment 8 or higher.  */
++
++  tmp = change_address (mem, Pmode,
++			gen_rtx_AND (Pmode, plus_constant (Pmode, mema, ofs),
++				     GEN_INT (-8)));
++  set_mem_alias_set (tmp, 0);
++  emit_move_insn (meml, tmp);
++
++  tmp
++    = change_address (mem, Pmode,
++		      gen_rtx_AND (Pmode,
++				   plus_constant (Pmode, mema, ofs + size - 1),
++				   GEN_INT (-8)));
++  set_mem_alias_set (tmp, 0);
++  emit_move_insn (memh, tmp);
++
++  if (sign && size == 2)
++    {
++      emit_move_insn (addr, plus_constant (Pmode, mema, ofs + 2));
++
++      emit_insn (gen_extql (extl, meml, addr));
++      emit_insn (gen_extqh (exth, memh, addr));
++
++      /* We must use tgt here for the target.  Sw_64 port fails if we use
++	 addr for the target, because addr is marked as a pointer and combine
++	 knows that pointers are always sign-extended 32-bit values.  */
++      addr = expand_binop (Pmode, ior_optab, extl, exth, tgt, 1, OPTAB_WIDEN);
++      addr = expand_binop (Pmode, ashr_optab, addr, GEN_INT (48), addr, 1,
++			   OPTAB_WIDEN);
++    }
++  else
++    {
++      if (GET_CODE (mema) == PLUS && CONST_INT_P (mema_const) &&
++	  //	(INTVAL (mema_const) > 32767 || INTVAL (mema_const) < -32767))
++	  //	{
++	  (!add_operand (mema_const, VOIDmode)))
++	{
++	  rtx tmpreg = gen_reg_rtx (DImode);
++	  tmpreg = sw_64_emit_set_const (
++	    tmpreg, DImode, INTVAL (plus_constant (Pmode, mema_const, ofs)), 2,
++	    false);
++	  emit_insn (gen_adddi3 (addr, mema_ptr, tmpreg));
++	}
++      else
++	{
++	  emit_move_insn (addr, plus_constant (Pmode, mema, ofs));
++	}
++      emit_insn (gen_extxl (extl, meml, GEN_INT (size * 8), addr));
++      switch ((int) size)
++	{
++	case 2:
++	  emit_insn (gen_extwh (exth, memh, addr));
++	  mode = HImode;
++	  break;
++	case 4:
++	  emit_insn (gen_extlh (exth, memh, addr));
++	  mode = SImode;
++	  break;
++	case 8:
++	  emit_insn (gen_extqh (exth, memh, addr));
++	  mode = DImode;
++	  break;
++	default:
++	  gcc_unreachable ();
++	}
++
++      addr = expand_binop (mode, ior_optab, gen_lowpart (mode, extl),
++			   gen_lowpart (mode, exth), gen_lowpart (mode, tgt),
++			   sign, OPTAB_WIDEN);
++    }
++
++  if (addr != tgt)
++    emit_move_insn (tgt, gen_lowpart (GET_MODE (tgt), addr));
++}
++
++/* Similarly, use ins and msk instructions to perform unaligned stores.  */
++
++void
++sw_64_expand_unaligned_store (rtx dst, rtx src, HOST_WIDE_INT size,
++			      HOST_WIDE_INT ofs)
++{
++  rtx dstl, dsth, addr, insl, insh, meml, memh, dsta;
++
++  if (TARGET_BWX && size == 2)
++    {
++      if (src != const0_rtx)
++	{
++	  dstl = gen_lowpart (QImode, src);
++	  dsth = expand_simple_binop (DImode, LSHIFTRT, src, GEN_INT (8), NULL,
++				      1, OPTAB_LIB_WIDEN);
++	  dsth = gen_lowpart (QImode, dsth);
++	}
++      else
++	dstl = dsth = const0_rtx;
++
++      meml = adjust_address (dst, QImode, ofs);
++      memh = adjust_address (dst, QImode, ofs + 1);
++
++      emit_move_insn (meml, dstl);
++      emit_move_insn (memh, dsth);
++      return;
++    }
++
++  dstl = gen_reg_rtx (Pmode);
++  dsth = gen_reg_rtx (Pmode);
++  insl = gen_reg_rtx (Pmode);
++  insh = gen_reg_rtx (Pmode);
++
++  dsta = XEXP (dst, 0);
++  if (GET_CODE (dsta) == LO_SUM)
++    dsta = force_reg (Pmode, dsta);
++
++  /* AND addresses cannot be in any alias set, since they may implicitly
++     alias surrounding code.  Ideally we'd have some alias set that
++     covered all types except those with alignment 8 or higher.  */
++
++  meml = change_address (dst, Pmode,
++			 gen_rtx_AND (Pmode, plus_constant (Pmode, dsta, ofs),
++				      GEN_INT (-8)));
++  set_mem_alias_set (meml, 0);
++
++  memh
++    = change_address (dst, Pmode,
++		      gen_rtx_AND (Pmode,
++				   plus_constant (Pmode, dsta, ofs + size - 1),
++				   GEN_INT (-8)));
++  set_mem_alias_set (memh, 0);
++
++  emit_move_insn (dsth, memh);
++  emit_move_insn (dstl, meml);
++
++  addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
++
++  if (src != CONST0_RTX (GET_MODE (src)))
++    {
++      emit_insn (
++	gen_insxh (insh, gen_lowpart (DImode, src), GEN_INT (size * 8), addr));
++
++      switch ((int) size)
++	{
++	case 2:
++	  emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
++	  break;
++	case 4:
++	  emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
++	  break;
++	case 8:
++	  emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
++	  break;
++	default:
++	  gcc_unreachable ();
++	}
++    }
++
++  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr));
++
++  switch ((int) size)
++    {
++    case 2:
++      emit_insn (gen_mskwl (dstl, dstl, addr));
++      break;
++    case 4:
++      emit_insn (gen_mskll (dstl, dstl, addr));
++      break;
++    case 8:
++      emit_insn (gen_mskql (dstl, dstl, addr));
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  if (src != CONST0_RTX (GET_MODE (src)))
++    {
++      dsth = expand_binop (DImode, ior_optab, insh, dsth, dsth, 0, OPTAB_WIDEN);
++      dstl = expand_binop (DImode, ior_optab, insl, dstl, dstl, 0, OPTAB_WIDEN);
++    }
++
++  /* Must store high before low for degenerate case of aligned.  */
++  emit_move_insn (memh, dsth);
++  emit_move_insn (meml, dstl);
++}
++
++/* The block move code tries to maximize speed by separating loads and
++   stores at the expense of register pressure: we load all of the data
++   before we store it back out.  There are two secondary effects worth
++   mentioning, that this speeds copying to/from aligned and unaligned
++   buffers, and that it makes the code significantly easier to write.  */
++
++#define MAX_MOVE_WORDS 8
++
++/* Load an integral number of consecutive unaligned quadwords.  */
++
++static void
++sw_64_expand_unaligned_load_words (rtx *out_regs, rtx smem, HOST_WIDE_INT words,
++				   HOST_WIDE_INT ofs)
++{
++  rtx const im8 = GEN_INT (-8);
++  rtx ext_tmps[MAX_MOVE_WORDS], data_regs[MAX_MOVE_WORDS + 1];
++  rtx sreg, areg, tmp, smema;
++  HOST_WIDE_INT i;
++
++  smema = XEXP (smem, 0);
++  if (GET_CODE (smema) == LO_SUM)
++    smema = force_reg (Pmode, smema);
++
++  /* Generate all the tmp registers we need.  */
++  for (i = 0; i < words; ++i)
++    {
++      data_regs[i] = out_regs[i];
++      ext_tmps[i] = gen_reg_rtx (DImode);
++    }
++  data_regs[words] = gen_reg_rtx (DImode);
++
++  if (ofs != 0)
++    smem = adjust_address (smem, GET_MODE (smem), ofs);
++
++  /* Load up all of the source data.  */
++  for (i = 0; i < words; ++i)
++    {
++      tmp = change_address (smem, Pmode,
++			    gen_rtx_AND (Pmode,
++					 plus_constant (Pmode, smema, 8 * i),
++					 im8));
++      set_mem_alias_set (tmp, 0);
++      emit_move_insn (data_regs[i], tmp);
++    }
++
++  tmp = change_address (
++    smem, Pmode,
++    gen_rtx_AND (Pmode, plus_constant (Pmode, smema, 8 * words - 1), im8));
++  set_mem_alias_set (tmp, 0);
++  emit_move_insn (data_regs[words], tmp);
++
++  /* Extract the half-word fragments.  Unfortunately decided to make
++     extxh with offset zero a noop instead of zeroing the register, so
++     we must take care of that edge condition ourselves with cmov.  */
++
++  sreg = copy_addr_to_reg (smema);
++  areg
++    = expand_binop (DImode, and_optab, sreg, GEN_INT (7), NULL, 1, OPTAB_WIDEN);
++  for (i = 0; i < words; ++i)
++    {
++      emit_insn (gen_extql (data_regs[i], data_regs[i], sreg));
++      emit_insn (gen_extqh (ext_tmps[i], data_regs[i + 1], sreg));
++      emit_insn (gen_rtx_SET (
++	ext_tmps[i],
++	gen_rtx_IF_THEN_ELSE (DImode, gen_rtx_EQ (DImode, areg, const0_rtx),
++			      const0_rtx, ext_tmps[i])));
++    }
++
++  /* Merge the half-words into whole words.  */
++  for (i = 0; i < words; ++i)
++    {
++      out_regs[i] = expand_binop (DImode, ior_optab, data_regs[i], ext_tmps[i],
++				  data_regs[i], 1, OPTAB_WIDEN);
++    }
++}
++
++/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
++   may be NULL to store zeros.  */
++
++static void
++sw_64_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
++				    HOST_WIDE_INT words, HOST_WIDE_INT ofs)
++{
++  rtx const im8 = GEN_INT (-8);
++  rtx ins_tmps[MAX_MOVE_WORDS];
++  rtx st_tmp_1, st_tmp_2, dreg;
++  rtx st_addr_1, st_addr_2, dmema;
++  HOST_WIDE_INT i;
++
++  dmema = XEXP (dmem, 0);
++  if (GET_CODE (dmema) == LO_SUM)
++    dmema = force_reg (Pmode, dmema);
++
++  /* Generate all the tmp registers we need.  */
++  if (data_regs != NULL)
++    for (i = 0; i < words; ++i)
++      ins_tmps[i] = gen_reg_rtx (DImode);
++  st_tmp_1 = gen_reg_rtx (DImode);
++  st_tmp_2 = gen_reg_rtx (DImode);
++
++  if (ofs != 0)
++    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
++
++  st_addr_2 = change_address (
++    dmem, Pmode,
++    gen_rtx_AND (Pmode, plus_constant (Pmode, dmema, words * 8 - 1), im8));
++  set_mem_alias_set (st_addr_2, 0);
++
++  st_addr_1 = change_address (dmem, Pmode, gen_rtx_AND (Pmode, dmema, im8));
++  set_mem_alias_set (st_addr_1, 0);
++
++  /* Load up the destination end bits.  */
++  emit_move_insn (st_tmp_2, st_addr_2);
++  emit_move_insn (st_tmp_1, st_addr_1);
++
++  /* Shift the input data into place.  */
++  dreg = copy_addr_to_reg (dmema);
++  if (data_regs != NULL)
++    {
++      for (i = words - 1; i >= 0; --i)
++	{
++	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
++	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
++	}
++      for (i = words - 1; i > 0; --i)
++	{
++	  ins_tmps[i - 1]
++	    = expand_binop (DImode, ior_optab, data_regs[i], ins_tmps[i - 1],
++			    ins_tmps[i - 1], 1, OPTAB_WIDEN);
++	}
++    }
++
++  /* Split and merge the ends with the destination data.  */
++  emit_insn (gen_mskqh (st_tmp_2, st_tmp_2, dreg));
++  emit_insn (gen_mskql (st_tmp_1, st_tmp_1, dreg));
++
++  if (data_regs != NULL)
++    {
++      st_tmp_2 = expand_binop (DImode, ior_optab, st_tmp_2, ins_tmps[words - 1],
++			       st_tmp_2, 1, OPTAB_WIDEN);
++      st_tmp_1 = expand_binop (DImode, ior_optab, st_tmp_1, data_regs[0],
++			       st_tmp_1, 1, OPTAB_WIDEN);
++    }
++
++  /* Store it all.  */
++  emit_move_insn (st_addr_2, st_tmp_2);
++  for (i = words - 1; i > 0; --i)
++    {
++      rtx tmp = change_address (
++	dmem, Pmode,
++	gen_rtx_AND (Pmode, plus_constant (Pmode, dmema, i * 8), im8));
++      set_mem_alias_set (tmp, 0);
++      emit_move_insn (tmp, data_regs ? ins_tmps[i - 1] : const0_rtx);
++    }
++  emit_move_insn (st_addr_1, st_tmp_1);
++}
++
++/* Expand string/block move operations.
++
++   operands[0] is the pointer to the destination.
++   operands[1] is the pointer to the source.
++   operands[2] is the number of bytes to move.
++   operands[3] is the alignment.  */
++
++int
++sw_64_expand_block_move (rtx operands[])
++{
++  rtx bytes_rtx = operands[2];
++  rtx align_rtx = operands[3];
++  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
++  HOST_WIDE_INT bytes = orig_bytes;
++  HOST_WIDE_INT src_align = INTVAL (align_rtx) * BITS_PER_UNIT;
++  HOST_WIDE_INT dst_align = src_align;
++  rtx orig_src = operands[1];
++  rtx orig_dst = operands[0];
++  rtx data_regs[2 * MAX_MOVE_WORDS + 16];
++  rtx tmp;
++  unsigned int i, words, ofs, nregs = 0;
++
++  if (orig_bytes <= 0)
++    return 1;
++  else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
++    return 0;
++
++  /* Look for additional alignment information from recorded register info.  */
++
++  tmp = XEXP (orig_src, 0);
++  if (REG_P (tmp))
++    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
++  else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0))
++	   && CONST_INT_P (XEXP (tmp, 1)))
++    {
++      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
++      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
++
++      if (a > src_align)
++	{
++	  if (a >= 64 && c % 8 == 0)
++	    src_align = 64;
++	  else if (a >= 32 && c % 4 == 0)
++	    src_align = 32;
++	  else if (a >= 16 && c % 2 == 0)
++	    src_align = 16;
++	}
++    }
++
++  tmp = XEXP (orig_dst, 0);
++  if (REG_P (tmp))
++    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
++  else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0))
++	   && CONST_INT_P (XEXP (tmp, 1)))
++    {
++      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
++      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
++
++      if (a > dst_align)
++	{
++	  if (a >= 64 && c % 8 == 0)
++	    dst_align = 64;
++	  else if (a >= 32 && c % 4 == 0)
++	    dst_align = 32;
++	  else if (a >= 16 && c % 2 == 0)
++	    dst_align = 16;
++	}
++    }
++
++  ofs = 0;
++  if (src_align >= 64 && bytes >= 8)
++    {
++      words = bytes / 8;
++
++      for (i = 0; i < words; ++i)
++	data_regs[nregs + i] = gen_reg_rtx (DImode);
++
++      for (i = 0; i < words; ++i)
++	emit_move_insn (data_regs[nregs + i],
++			adjust_address (orig_src, DImode, ofs + i * 8));
++
++      nregs += words;
++      bytes -= words * 8;
++      ofs += words * 8;
++    }
++
++  if (src_align >= 32 && bytes >= 4)
++    {
++      words = bytes / 4;
++
++      for (i = 0; i < words; ++i)
++	data_regs[nregs + i] = gen_reg_rtx (SImode);
++
++      for (i = 0; i < words; ++i)
++	emit_move_insn (data_regs[nregs + i],
++			adjust_address (orig_src, SImode, ofs + i * 4));
++
++      nregs += words;
++      bytes -= words * 4;
++      ofs += words * 4;
++    }
++
++  if (bytes >= 8)
++    {
++      words = bytes / 8;
++
++      for (i = 0; i < words + 1; ++i)
++	data_regs[nregs + i] = gen_reg_rtx (DImode);
++
++      sw_64_expand_unaligned_load_words (data_regs + nregs, orig_src, words,
++					 ofs);
++
++      nregs += words;
++      bytes -= words * 8;
++      ofs += words * 8;
++    }
++
++  if (!TARGET_BWX && bytes >= 4)
++    {
++      data_regs[nregs++] = tmp = gen_reg_rtx (SImode);
++      sw_64_expand_unaligned_load (tmp, orig_src, 4, ofs, 0);
++      bytes -= 4;
++      ofs += 4;
++    }
++
++  if (bytes >= 2)
++    {
++      if (src_align >= 16)
++	{
++	  do
++	    {
++	      data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
++	      emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
++	      bytes -= 2;
++	      ofs += 2;
++	    }
++	  while (bytes >= 2);
++	}
++      else if (!TARGET_BWX)
++	{
++	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
++	  sw_64_expand_unaligned_load (tmp, orig_src, 2, ofs, 0);
++	  bytes -= 2;
++	  ofs += 2;
++	}
++    }
++
++  while (bytes > 0)
++    {
++      data_regs[nregs++] = tmp = gen_reg_rtx (QImode);
++      emit_move_insn (tmp, adjust_address (orig_src, QImode, ofs));
++      bytes -= 1;
++      ofs += 1;
++    }
++
++  gcc_assert (nregs <= ARRAY_SIZE (data_regs));
++
++  /* Now save it back out again.  */
++
++  i = 0, ofs = 0;
++
++  /* Write out the data in whatever chunks reading the source allowed.  */
++  if (dst_align >= 64)
++    {
++      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
++	{
++	  emit_move_insn (adjust_address (orig_dst, DImode, ofs), data_regs[i]);
++	  ofs += 8;
++	  i++;
++	}
++    }
++
++  if (dst_align >= 32)
++    {
++      /* If the source has remaining DImode regs, write them out in
++	 two pieces.  */
++      while (i < nregs && GET_MODE (data_regs[i]) == DImode)
++	{
++	  tmp = expand_binop (DImode, lshr_optab, data_regs[i], GEN_INT (32),
++			      NULL_RTX, 1, OPTAB_WIDEN);
++
++	  emit_move_insn (adjust_address (orig_dst, SImode, ofs),
++			  gen_lowpart (SImode, data_regs[i]));
++	  emit_move_insn (adjust_address (orig_dst, SImode, ofs + 4),
++			  gen_lowpart (SImode, tmp));
++	  ofs += 8;
++	  i++;
++	}
++
++      while (i < nregs && GET_MODE (data_regs[i]) == SImode)
++	{
++	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), data_regs[i]);
++	  ofs += 4;
++	  i++;
++	}
++    }
++
++  if (i < nregs && GET_MODE (data_regs[i]) == DImode)
++    {
++      /* Write out a remaining block of words using unaligned methods.  */
++
++      for (words = 1; i + words < nregs; words++)
++	if (GET_MODE (data_regs[i + words]) != DImode)
++	  break;
++
++      if (words == 1)
++	sw_64_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
++      else
++	sw_64_expand_unaligned_store_words (data_regs + i, orig_dst, words,
++					    ofs);
++
++      i += words;
++      ofs += words * 8;
++    }
++
++  /* Due to the above, this won't be aligned.  */
++  /* ??? If we have more than one of these, consider constructing full
++     words in registers and using sw_64_expand_unaligned_store_words.  */
++  while (i < nregs && GET_MODE (data_regs[i]) == SImode)
++    {
++      sw_64_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
++      ofs += 4;
++      i++;
++    }
++
++  if (dst_align >= 16)
++    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
++      {
++	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
++	i++;
++	ofs += 2;
++      }
++  else
++    while (i < nregs && GET_MODE (data_regs[i]) == HImode)
++      {
++	sw_64_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
++	i++;
++	ofs += 2;
++      }
++
++  /* The remainder must be byte copies.  */
++  while (i < nregs)
++    {
++      gcc_assert (GET_MODE (data_regs[i]) == QImode);
++      emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
++      i++;
++      ofs += 1;
++    }
++
++  return 1;
++}
++
++int
++sw_64_expand_block_clear (rtx operands[])
++{
++  rtx bytes_rtx = operands[1];
++  rtx align_rtx = operands[3];
++  HOST_WIDE_INT orig_bytes = INTVAL (bytes_rtx);
++  HOST_WIDE_INT bytes = orig_bytes;
++  HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
++  HOST_WIDE_INT alignofs = 0;
++  rtx orig_dst = operands[0];
++  rtx tmp;
++  int i, words, ofs = 0;
++
++  if (orig_bytes <= 0)
++    return 1;
++  if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
++    return 0;
++
++  /* Look for stricter alignment.  */
++  tmp = XEXP (orig_dst, 0);
++  if (REG_P (tmp))
++    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
++  else if (GET_CODE (tmp) == PLUS && REG_P (XEXP (tmp, 0))
++	   && CONST_INT_P (XEXP (tmp, 1)))
++    {
++      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
++      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
++
++      if (a > align)
++	{
++	  if (a >= 64)
++	    align = a, alignofs = 8 - c % 8;
++	  else if (a >= 32)
++	    align = a, alignofs = 4 - c % 4;
++	  else if (a >= 16)
++	    align = a, alignofs = 2 - c % 2;
++	}
++    }
++
++  /* Handle an unaligned prefix first.  */
++
++  if (alignofs > 0)
++    {
++      /* Given that alignofs is bounded by align, the only time BWX could
++	 generate three stores is for a 7 byte fill.  Prefer two individual
++	 stores over a load/mask/store sequence.  */
++      if ((!TARGET_BWX || alignofs == 7) && align >= 32
++	  && !(alignofs == 4 && bytes >= 4))
++	{
++	  machine_mode mode = (align >= 64 ? DImode : SImode);
++	  int inv_alignofs = (align >= 64 ? 8 : 4) - alignofs;
++	  rtx mem, tmp;
++	  HOST_WIDE_INT mask;
++
++	  mem = adjust_address (orig_dst, mode, ofs - inv_alignofs);
++	  set_mem_alias_set (mem, 0);
++
++	  mask = ~(HOST_WIDE_INT_M1U << (inv_alignofs * 8));
++	  if (bytes < alignofs)
++	    {
++	      mask |= HOST_WIDE_INT_M1U << ((inv_alignofs + bytes) * 8);
++	      ofs += bytes;
++	      bytes = 0;
++	    }
++	  else
++	    {
++	      bytes -= alignofs;
++	      ofs += alignofs;
++	    }
++	  alignofs = 0;
++
++	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), NULL_RTX, 1,
++			      OPTAB_WIDEN);
++
++	  emit_move_insn (mem, tmp);
++	}
++
++      if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
++	{
++	  emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
++	  bytes -= 1;
++	  ofs += 1;
++	  alignofs -= 1;
++	}
++      if (TARGET_BWX && align >= 16 && (alignofs & 3) == 2 && bytes >= 2)
++	{
++	  emit_move_insn (adjust_address (orig_dst, HImode, ofs), const0_rtx);
++	  bytes -= 2;
++	  ofs += 2;
++	  alignofs -= 2;
++	}
++      if (alignofs == 4 && bytes >= 4)
++	{
++	  emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
++	  bytes -= 4;
++	  ofs += 4;
++	  alignofs = 0;
++	}
++
++      /* If we've not used the extra lead alignment information by now,
++	 we won't be able to.  Downgrade align to match what's left over.  */
++      if (alignofs > 0)
++	{
++	  alignofs = alignofs & -alignofs;
++	  align = MIN (align, alignofs * BITS_PER_UNIT);
++	}
++    }
++
++  /* Handle a block of contiguous long-words.  */
++
++  if (align >= 64 && bytes >= 8)
++    {
++      words = bytes / 8;
++
++      for (i = 0; i < words; ++i)
++	emit_move_insn (adjust_address (orig_dst, DImode, ofs + i * 8),
++			const0_rtx);
++
++      bytes -= words * 8;
++      ofs += words * 8;
++    }
++
++  /* If the block is large and appropriately aligned, emit a single
++     store followed by a sequence of stl_u insns.  */
++
++  if (align >= 32 && bytes > 16)
++    {
++      rtx orig_dsta;
++
++      emit_move_insn (adjust_address (orig_dst, SImode, ofs), const0_rtx);
++      bytes -= 4;
++      ofs += 4;
++
++      orig_dsta = XEXP (orig_dst, 0);
++      if (GET_CODE (orig_dsta) == LO_SUM)
++	orig_dsta = force_reg (Pmode, orig_dsta);
++
++      words = bytes / 8;
++      for (i = 0; i < words; ++i)
++	{
++	  rtx mem = change_address (
++	    orig_dst, Pmode,
++	    gen_rtx_AND (Pmode, plus_constant (Pmode, orig_dsta, ofs + i * 8),
++			 GEN_INT (-8)));
++	  set_mem_alias_set (mem, 0);
++	  emit_move_insn (mem, const0_rtx);
++	}
++
++      /* Depending on the alignment, the first stl_u may have overlapped
++	 with the initial stl, which means that the last stl_u didn't
++	 write as much as it would appear.  Leave those questionable bytes
++	 unaccounted for.  */
++      bytes -= words * 8 - 4;
++      ofs += words * 8 - 4;
++    }
++
++  /* Handle a smaller block of aligned words.  */
++
++  if ((align >= 64 && bytes == 4) || (align == 32 && bytes >= 4))
++    {
++      words = bytes / 4;
++
++      for (i = 0; i < words; ++i)
++	emit_move_insn (adjust_address (orig_dst, SImode, ofs + i * 4),
++			const0_rtx);
++
++      bytes -= words * 4;
++      ofs += words * 4;
++    }
++
++  /* An unaligned block uses stl_u stores for as many as possible.  */
++
++  if (bytes >= 8)
++    {
++      words = bytes / 8;
++
++      sw_64_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
++
++      bytes -= words * 8;
++      ofs += words * 8;
++    }
++
++  /* Next clean up any trailing pieces.  */
++
++  /* Count the number of bits in BYTES for which aligned stores could
++     be emitted.  */
++  words = 0;
++  for (i = (TARGET_BWX ? 1 : 4); i * BITS_PER_UNIT <= align; i <<= 1)
++    if (bytes & i)
++      words += 1;
++
++  /* If we have appropriate alignment (and it wouldn't take too many
++     instructions otherwise), mask out the bytes we need.  */
++  if (TARGET_BWX ? words > 2 : bytes > 0)
++    {
++      if (align >= 64)
++	{
++	  rtx mem, tmp;
++	  HOST_WIDE_INT mask;
++
++	  mem = adjust_address (orig_dst, DImode, ofs);
++	  set_mem_alias_set (mem, 0);
++
++	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
++
++	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask), NULL_RTX,
++			      1, OPTAB_WIDEN);
++
++	  emit_move_insn (mem, tmp);
++	  return 1;
++	}
++      else if (align >= 32 && bytes < 4)
++	{
++	  rtx mem, tmp;
++	  HOST_WIDE_INT mask;
++
++	  mem = adjust_address (orig_dst, SImode, ofs);
++	  set_mem_alias_set (mem, 0);
++
++	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
++
++	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask), NULL_RTX,
++			      1, OPTAB_WIDEN);
++
++	  emit_move_insn (mem, tmp);
++	  return 1;
++	}
++    }
++
++  if (!TARGET_BWX && bytes >= 4)
++    {
++      sw_64_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
++      bytes -= 4;
++      ofs += 4;
++    }
++
++  if (bytes >= 2)
++    {
++      if (align >= 16)
++	{
++	  do
++	    {
++	      emit_move_insn (adjust_address (orig_dst, HImode, ofs),
++			      const0_rtx);
++	      bytes -= 2;
++	      ofs += 2;
++	    }
++	  while (bytes >= 2);
++	}
++      else if (!TARGET_BWX)
++	{
++	  sw_64_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
++	  bytes -= 2;
++	  ofs += 2;
++	}
++    }
++
++  while (bytes > 0)
++    {
++      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
++      bytes -= 1;
++      ofs += 1;
++    }
++
++  return 1;
++}
++
++/* Returns a mask so that zap(x, value) == x & mask.  */
++
++rtx
++sw_64_expand_zap_mask (HOST_WIDE_INT value)
++{
++  rtx result;
++  int i;
++  HOST_WIDE_INT mask = 0;
++
++  for (i = 7; i >= 0; --i)
++    {
++      mask <<= 8;
++      if (!((value >> i) & 1))
++	mask |= 0xff;
++    }
++
++  result = gen_int_mode (mask, DImode);
++  return result;
++}
++
++void
++sw_64_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
++				   machine_mode mode, rtx op0, rtx op1, rtx op2)
++{
++  op0 = gen_lowpart (mode, op0);
++
++  if (op1 == const0_rtx)
++    op1 = CONST0_RTX (mode);
++  else
++    op1 = gen_lowpart (mode, op1);
++
++  if (op2 == const0_rtx)
++    op2 = CONST0_RTX (mode);
++  else
++    op2 = gen_lowpart (mode, op2);
++
++  emit_insn ((*gen) (op0, op1, op2));
++}
++
++/* A subroutine of the atomic operation splitters.  Jump to LABEL if
++   COND is true.  Mark the jump as unlikely to be taken.  */
++
++static void
++emit_unlikely_jump (rtx cond, rtx label)
++{
++  rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
++  rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
++  add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
++}
++
++/* Subroutines of the atomic operation splitters.  Emit barriers
++   as needed for the memory MODEL.  */
++
++static void
++sw_64_pre_atomic_barrier (enum memmodel model)
++{
++  if (need_atomic_barrier_p (model, true))
++    emit_insn (gen_memory_barrier ());
++}
++
++static void
++sw_64_post_atomic_barrier (enum memmodel model)
++{
++  if (need_atomic_barrier_p (model, false))
++    emit_insn (gen_memory_barrier ());
++}
++
++/* A subroutine of the atomic operation splitters.  Emit an insxl
++   instruction in MODE.  */
++
++static rtx
++emit_insxl (machine_mode mode, rtx op1, rtx op2)
++{
++  rtx ret = gen_reg_rtx (DImode);
++  rtx (*fn) (rtx, rtx, rtx);
++
++  switch (mode)
++    {
++    case E_QImode:
++      fn = gen_insbl;
++      break;
++    case E_HImode:
++      fn = gen_inswl;
++      break;
++    case E_SImode:
++      fn = gen_insll;
++      break;
++    case E_DImode:
++      fn = gen_insql;
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  op1 = force_reg (mode, op1);
++  emit_insn (fn (ret, op1, op2));
++
++  return ret;
++}
++
++/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
++   to perform.  MEM is the memory on which to operate.  VAL is the second
++   operand of the binary operator.  BEFORE and AFTER are optional locations to
++   return the value of MEM either before of after the operation.  SCRATCH is
++   a scratch register.  */
++
++void
++sw_64_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
++		       rtx after, rtx scratch, enum memmodel model)
++{
++  machine_mode mode = GET_MODE (mem);
++  rtx label, x, cond = gen_rtx_REG (DImode, REGNO (scratch));
++
++  label = gen_label_rtx ();
++  emit_label (label);
++  label = gen_rtx_LABEL_REF (DImode, label);
++
++  if (before == NULL)
++    before = scratch;
++  emit_insn (gen_load_locked (mode, before, mem));
++
++  if (!TARGET_SW8A)
++    {
++      if (after)
++	{
++	  rtx cond1 = gen_rtx_REG (DImode, REGNO (after));
++	  emit_insn (gen_rtx_SET (cond1, const1_rtx));
++	  emit_insn (gen_builtin_wr_f (cond1));
++	}
++      else
++	{
++	  rtx cond2 = gen_rtx_REG (DImode, 28);
++	  emit_insn (gen_rtx_SET (cond2, const1_rtx));
++	  emit_insn (gen_builtin_wr_f (cond2));
++	}
++    }
++  if (code == NOT)
++    {
++      x = gen_rtx_AND (mode, before, val);
++      emit_insn (gen_rtx_SET (val, x));
++
++      x = gen_rtx_NOT (mode, val);
++    }
++  else
++    x = gen_rtx_fmt_ee (code, mode, before, val);
++  if (after)
++    emit_insn (gen_rtx_SET (after, copy_rtx (x)));
++  emit_insn (gen_rtx_SET (scratch, x));
++
++  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
++  if (!TARGET_SW8A)
++    emit_insn (gen_builtin_rd_f (cond));
++
++  x = gen_rtx_EQ (DImode, cond, const0_rtx);
++  emit_unlikely_jump (x, label);
++}
++
++/* Expand a compare and swap operation.  */
++
++void
++sw_64_split_compare_and_swap (rtx operands[])
++{
++  rtx cond, retval, mem, oldval, newval;
++  bool is_weak;
++  enum memmodel mod_s, mod_f;
++  machine_mode mode;
++  rtx label1, label2, x;
++
++  rtx imust = operands[8];
++  cond = operands[0];
++  retval = operands[1];
++  mem = operands[2];
++  oldval = operands[3];
++  newval = operands[4];
++  is_weak = (operands[5] != const0_rtx);
++  mod_s = memmodel_from_int (INTVAL (operands[6]));
++  mod_f = memmodel_from_int (INTVAL (operands[7]));
++  mode = GET_MODE (mem);
++
++  label1 = NULL_RTX;
++  if (!is_weak)
++    {
++      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
++      emit_label (XEXP (label1, 0));
++    }
++  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
++
++  emit_insn (gen_load_locked (mode, retval, mem));
++  x = gen_lowpart (DImode, retval);
++
++  rtx imust1;
++  if (TARGET_SW8A)
++    {
++      if (oldval == const0_rtx)
++	{
++	  emit_move_insn (cond, const0_rtx);
++	  x = gen_rtx_NE (DImode, x, const0_rtx);
++	}
++      else
++	{
++	  x = gen_rtx_EQ (DImode, x, oldval);
++	  emit_insn (gen_rtx_SET (cond, x));
++	  x = gen_rtx_EQ (DImode, cond, const0_rtx);
++	}
++      emit_unlikely_jump (x, label2);
++    }
++  else
++    {
++      x = gen_rtx_EQ (DImode, x, oldval);
++      imust1 = gen_lowpart (DImode, imust);
++      emit_insn (gen_rtx_SET (imust1, x));
++      emit_insn (gen_builtin_wr_f (imust1));
++    }
++
++  emit_move_insn (cond, newval);
++  emit_insn (gen_store_conditional (mode, cond, mem, gen_lowpart (mode, cond)));
++
++  if (!TARGET_SW8A)
++    {
++      emit_insn (gen_builtin_rd_f (cond));
++      imust1 = gen_rtx_EQ (DImode, imust1, const0_rtx);
++      emit_unlikely_jump (imust1, label2);
++    }
++  if (!is_weak)
++    {
++      x = gen_rtx_EQ (DImode, cond, const0_rtx);
++      emit_unlikely_jump (x, label1);
++    }
++
++  if (!is_mm_relaxed (mod_f))
++    emit_label (XEXP (label2, 0));
++
++  if (is_mm_relaxed (mod_f))
++    emit_label (XEXP (label2, 0));
++}
++
++void
++sw_64_expand_compare_and_swap_12 (rtx operands[])
++{
++  rtx cond, dst, mem, oldval, newval, is_weak, mod_s, mod_f;
++  machine_mode mode;
++  rtx addr, align, wdst;
++  rtx imust;
++
++  cond = operands[0];
++  dst = operands[1];
++  mem = operands[2];
++  oldval = operands[3];
++  newval = operands[4];
++  is_weak = operands[5];
++  mod_s = operands[6];
++  mod_f = operands[7];
++  mode = GET_MODE (mem);
++  bool use_cas = GET_MODE_SIZE (mode) >= 32 && flag_sw_use_cas;
++  if (!use_cas)
++    imust = operands[8];
++
++  /* We forced the address into a register via mem_noofs_operand.  */
++  addr = XEXP (mem, 0);
++  gcc_assert (register_operand (addr, DImode));
++
++  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), NULL_RTX, 1,
++			       OPTAB_DIRECT);
++  if (oldval != const0_rtx && TARGET_SW8A && use_cas)
++    oldval = emit_insxl (mode, oldval, addr);
++  oldval = convert_modes (DImode, mode, oldval, 1);
++
++  if (newval != const0_rtx)
++    newval = emit_insxl (mode, newval, addr);
++
++  wdst = gen_reg_rtx (DImode);
++  if (TARGET_SW8A && use_cas)
++    emit_insn (gen_atomic_compare_and_swap_1_target_sw8a (
++      mode, cond, wdst, mem, oldval, newval, align, is_weak, mod_s, mod_f));
++  else
++    emit_insn (gen_atomic_compare_and_swap_1 (mode, cond, wdst, mem, oldval,
++					      newval, align, is_weak, mod_s,
++					      mod_f, imust));
++  emit_move_insn (dst, gen_lowpart (mode, wdst));
++}
++
++void
++sw_64_split_compare_and_swap_12 (rtx operands[])
++{
++  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
++  machine_mode mode;
++  bool is_weak;
++  enum memmodel mod_s, mod_f;
++  rtx label1, label2, mem, addr, width, mask, x;
++  rtx imust;
++
++  cond = operands[0];
++  dest = operands[1];
++  orig_mem = operands[2];
++  oldval = operands[3];
++  newval = operands[4];
++  align = operands[5];
++  is_weak = (operands[6] != const0_rtx);
++  mod_s = memmodel_from_int (INTVAL (operands[7]));
++  mod_f = memmodel_from_int (INTVAL (operands[8]));
++  imust = operands[9];
++  scratch = operands[10];
++  mode = GET_MODE (orig_mem);
++  addr = XEXP (orig_mem, 0);
++
++  mem = gen_rtx_MEM (DImode, align);
++  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
++  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
++    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
++
++  label1 = NULL_RTX;
++  if (!is_weak)
++    {
++      label1 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
++      emit_label (XEXP (label1, 0));
++    }
++  label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
++
++  emit_insn (gen_load_locked (DImode, scratch, mem));
++
++  width = GEN_INT (GET_MODE_BITSIZE (mode));
++  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
++  emit_insn (gen_extxl (dest, scratch, width, addr));
++
++  rtx imust1;
++  if (TARGET_SW8A)
++    {
++      if (oldval == const0_rtx)
++	{
++	  emit_move_insn (cond, const0_rtx);
++	  x = gen_rtx_NE (DImode, dest, const0_rtx);
++	}
++      else
++	{
++	  x = gen_rtx_EQ (DImode, dest, oldval);
++	  emit_insn (gen_rtx_SET (cond, x));
++	  x = gen_rtx_EQ (DImode, cond, const0_rtx);
++	}
++      emit_unlikely_jump (x, label2);
++    }
++  else
++    {
++      x = gen_rtx_EQ (DImode, dest, oldval);
++      imust1 = gen_lowpart (DImode, imust);
++      emit_insn (gen_rtx_SET (imust1, x));
++      emit_insn (gen_builtin_wr_f (imust1));
++    }
++
++  emit_insn (gen_mskxl (cond, scratch, mask, addr));
++
++  if (newval != const0_rtx)
++    emit_insn (gen_iordi3 (cond, cond, newval));
++
++  emit_insn (gen_store_conditional (DImode, cond, mem, cond));
++  if (!TARGET_SW8A)
++    {
++      emit_insn (gen_builtin_rd_f (cond));
++      imust1 = gen_rtx_EQ (DImode, imust1, const0_rtx);
++      emit_unlikely_jump (imust1, label2);
++    }
++
++  if (!is_weak)
++    {
++      x = gen_rtx_EQ (DImode, cond, const0_rtx);
++      emit_unlikely_jump (x, label1);
++    }
++
++  if (!is_mm_relaxed (mod_f))
++    emit_label (XEXP (label2, 0));
++
++  if (is_mm_relaxed (mod_f))
++    emit_label (XEXP (label2, 0));
++}
++
++/* Expand an atomic exchange operation.  */
++
++void
++sw_64_split_atomic_exchange (rtx operands[])
++{
++  rtx retval, mem, val, scratch;
++  enum memmodel model;
++  machine_mode mode;
++  rtx label, x, cond;
++
++  retval = operands[0];
++  mem = operands[1];
++  val = operands[2];
++  model = (enum memmodel) INTVAL (operands[3]);
++  scratch = operands[4];
++  mode = GET_MODE (mem);
++  cond = gen_lowpart (DImode, scratch);
++
++  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
++  emit_label (XEXP (label, 0));
++
++  emit_insn (gen_load_locked (mode, retval, mem));
++  if (!TARGET_SW8A)
++    {
++      emit_insn (gen_rtx_SET (cond, const1_rtx));
++      emit_insn (gen_builtin_wr_f (cond));
++    }
++  emit_move_insn (scratch, val);
++  emit_insn (gen_store_conditional (mode, cond, mem, scratch));
++  if (!TARGET_SW8A)
++    emit_insn (gen_builtin_rd_f (cond));
++
++  x = gen_rtx_EQ (DImode, cond, const0_rtx);
++  emit_unlikely_jump (x, label);
++}
++
++void
++sw_64_expand_atomic_exchange_12 (rtx operands[])
++{
++  rtx dst, mem, val, model;
++  machine_mode mode;
++  rtx addr, align, wdst;
++
++  dst = operands[0];
++  mem = operands[1];
++  val = operands[2];
++  model = operands[3];
++  mode = GET_MODE (mem);
++
++  /* We forced the address into a register via mem_noofs_operand.  */
++  addr = XEXP (mem, 0);
++  gcc_assert (register_operand (addr, DImode));
++
++  align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-8), NULL_RTX, 1,
++			       OPTAB_DIRECT);
++
++  /* Insert val into the correct byte location within the word.  */
++  if (val != const0_rtx)
++    val = emit_insxl (mode, val, addr);
++
++  wdst = gen_reg_rtx (DImode);
++  emit_insn (gen_atomic_exchange_1 (mode, wdst, mem, val, align, model));
++  emit_move_insn (dst, gen_lowpart (mode, wdst));
++}
++
++void
++sw_64_split_atomic_exchange_12 (rtx operands[])
++{
++  rtx dest, orig_mem, addr, val, align, scratch;
++  rtx label, mem, width, mask, x;
++  machine_mode mode;
++  enum memmodel model;
++
++  dest = operands[0];
++  orig_mem = operands[1];
++  val = operands[2];
++  align = operands[3];
++  model = (enum memmodel) INTVAL (operands[4]);
++  scratch = operands[5];
++  mode = GET_MODE (orig_mem);
++  addr = XEXP (orig_mem, 0);
++
++  mem = gen_rtx_MEM (DImode, align);
++  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
++  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
++    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
++
++  label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
++  emit_label (XEXP (label, 0));
++
++  emit_insn (gen_load_locked (DImode, scratch, mem));
++  if (!TARGET_SW8A)
++    {
++      emit_insn (gen_rtx_SET (dest, const1_rtx));
++      emit_insn (gen_builtin_wr_f (dest));
++    }
++
++  width = GEN_INT (GET_MODE_BITSIZE (mode));
++  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
++  emit_insn (gen_extxl (dest, scratch, width, addr));
++  emit_insn (gen_mskxl (scratch, scratch, mask, addr));
++  if (val != const0_rtx)
++    emit_insn (gen_iordi3 (scratch, scratch, val));
++
++  emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
++  if (!TARGET_SW8A)
++    emit_insn (gen_builtin_rd_f (scratch));
++
++  x = gen_rtx_EQ (DImode, scratch, const0_rtx);
++  emit_unlikely_jump (x, label);
++}
++
++/* Emit an atomic compare-and-swap operation.  SI and larger modes.  */
++
++void
++sw_64_split_atomic_cas (rtx operands[])
++{
++  rtx cond, retval, mem, oldval, newval;
++  rtx (*gen) (rtx, rtx, rtx);
++  enum memmodel mod_s;
++  machine_mode mode;
++
++  cond = operands[0];
++  retval = operands[1];
++  mem = operands[2];
++  oldval = operands[3];
++  newval = operands[4];
++
++  mod_s = memmodel_from_int (INTVAL (operands[6]));
++  mode = GET_MODE (mem);
++
++  if (GET_MODE (mem) == SImode && GET_MODE (oldval) == DImode
++      && GET_MODE (newval) == DImode)
++    {
++      oldval = gen_rtx_REG (SImode, REGNO (oldval));
++      newval = gen_rtx_REG (SImode, REGNO (newval));
++    }
++
++  switch (mode)
++    {
++    case E_SImode:
++      gen = gen_sw_64_atomic_cassi;
++      break;
++    case E_DImode:
++      gen = gen_sw_64_atomic_casdi;
++      break;
++    default:
++      gcc_unreachable ();
++    }
++
++  emit_insn (gen_rtx_SET (retval, newval));
++  emit_insn (gen (oldval, mem, retval));
++
++  rtx x = gen_lowpart (DImode, retval);
++  rtx x1 = gen_lowpart (DImode, oldval);
++  x = gen_rtx_EQ (DImode, x, x1);
++  emit_insn (gen_rtx_SET (cond, x));
++}
++
++/* Emit an atomic compare-and-swap operation.  HI and smaller modes.  */
++
++void
++sw_64_split_atomic_cas_12 (rtx operands[])
++{
++  rtx cond, dest, orig_mem, oldval, newval, align, scratch;
++  machine_mode mode;
++  bool is_weak;
++  enum memmodel mod_s, mod_f;
++  rtx label1, label2, mem, addr, width, mask, x;
++
++  cond = operands[0];
++  dest = operands[1];
++  orig_mem = operands[2];
++  oldval = operands[3];
++  newval = operands[4];
++  align = operands[5];
++  is_weak = (operands[6] != const0_rtx);
++  mod_s = memmodel_from_int (INTVAL (operands[7]));
++  mod_f = memmodel_from_int (INTVAL (operands[8]));
++  scratch = operands[9];
++  mode = GET_MODE (orig_mem);
++  addr = XEXP (orig_mem, 0);
++
++  mem = gen_rtx_MEM (DImode, align);
++  MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
++  if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
++    set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
++
++  emit_move_insn (scratch, mem);
++
++  width = GEN_INT (GET_MODE_BITSIZE (mode));
++  mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
++  emit_insn (gen_extxl (dest, scratch, width, addr));
++  emit_insn (gen_mskxl (cond, scratch, mask, addr));
++
++  rtx scratch2 = operands[10];
++  if (newval != const0_rtx)
++    emit_insn (gen_iordi3 (scratch2, cond, newval));
++  if (oldval == const0_rtx)
++    {
++      emit_move_insn (cond, const0_rtx);
++      x = gen_rtx_NE (DImode, dest, const0_rtx);
++    }
++  else
++    {
++      emit_insn (gen_iordi3 (scratch, cond, oldval));
++      emit_insn (gen_sw_64_atomic_casdi (scratch, mem, scratch2));
++
++      x = gen_rtx_EQ (DImode, scratch2, scratch);
++      emit_insn (gen_rtx_SET (cond, x));
++      x = gen_rtx_EQ (DImode, cond, const0_rtx);
++    }
++}
++
++/* Adjust the cost of a scheduling dependency.  Return the new cost of
++   a dependency LINK or INSN on DEP_INSN.  COST is the current cost.  */
++
++static int
++sw_64_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
++		   unsigned int)
++{
++  enum attr_type dep_insn_type;
++
++  /* If the dependence is an anti-dependence, there is no cost.  For an
++     output dependence, there is sometimes a cost, but it doesn't seem
++     worth handling those few cases.  */
++  if (dep_type != 0)
++    return cost;
++
++  /* If we can't recognize the insns, we can't really do anything.  */
++  if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
++    return cost;
++
++  dep_insn_type = get_attr_type (dep_insn);
++
++  /* Bring in the user-defined memory latency.  */
++  if (dep_insn_type == TYPE_ILD || dep_insn_type == TYPE_FLD
++      || dep_insn_type == TYPE_LDSYM)
++    cost += sw_64_memory_latency - 1;
++
++  /* Everything else handled in DFA bypasses now.  */
++
++  return cost;
++}
++
++/* The number of instructions that can be issued per cycle.  */
++
++static int
++sw_64_issue_rate (void)
++{
++  return ((sw_64_tune == PROCESSOR_SW6 || sw_64_tune == PROCESSOR_SW8) ? 4 : 2);
++}
++
++/* How many alternative schedules to try.  This should be as wide as the
++   scheduling freedom in the DFA, but no wider.  Making this value too
++   large results extra work for the scheduler.  */
++
++static int
++sw_64_multipass_dfa_lookahead (void)
++{
++  return ((sw_64_tune == PROCESSOR_SW6 || sw_64_tune == PROCESSOR_SW8) ? 4 : 2);
++}
++
++/* Machine-specific function data.  */
++
++struct GTY (()) sw_64_links;
++
++/* Information about a function's frame layout.  */
++struct GTY (()) sw_64_frame_info
++{
++  /* The size of the frame in bytes.  */
++  HOST_WIDE_INT frame_size;
++
++  /* Bit X is set if the function saves or restores GPR X.  */
++  unsigned HOST_WIDE_INT sa_mask;
++
++  /* The size of the saved callee-save int/FP registers.  */
++  HOST_WIDE_INT saved_regs_size;
++
++  /* The number of extra stack bytes taken up by register varargs.  */
++  HOST_WIDE_INT saved_varargs_size;
++
++  /* Offset of virtual frame pointer from stack pointer/frame bottom.  */
++  HOST_WIDE_INT callee_offset;
++
++  /* Offset of hard frame pointer from stack pointer/frame bottom.  */
++  HOST_WIDE_INT hard_frame_pointer_offset;
++
++  HOST_WIDE_INT local_offset;
++
++  /* The offset of arg_pointer_rtx from the bottom of the frame.  */
++  HOST_WIDE_INT arg_pointer_offset;
++
++  bool emit_frame_pointer;
++};
++
++struct GTY (()) machine_function
++{
++  unsigned HOST_WIDE_INT sa_mask;
++  HOST_WIDE_INT sa_size;
++  HOST_WIDE_INT frame_size;
++
++  /* For flag_reorder_blocks_and_partition.  */
++  rtx gp_save_rtx;
++
++  /* For VMS condition handlers.  */
++  bool uses_condition_handler;
++
++  struct sw_64_frame_info frame;
++
++  /* Linkage entries.  */
++  hash_map<nofree_string_hash, sw_64_links *> *links;
++};
++
++/* How to allocate a 'struct machine_function'.  */
++
++static struct machine_function *
++sw_64_init_machine_status (void)
++{
++  return ggc_cleared_alloc<machine_function> ();
++}
++
++/* Start the ball rolling with RETURN_ADDR_RTX.  */
++
++rtx
++sw_64_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
++{
++  if (count != 0)
++    return const0_rtx;
++
++  return get_hard_reg_initial_val (Pmode, REG_RA);
++}
++
++/* Return or create a memory slot containing the gp value for the current
++   function.  Needed only if TARGET_LD_BUGGY_LDGP.  */
++
++rtx
++sw_64_gp_save_rtx (void)
++{
++  rtx_insn *seq;
++  rtx m = cfun->machine->gp_save_rtx;
++
++  if (m == NULL)
++    {
++      start_sequence ();
++
++      m = assign_stack_local (Pmode, UNITS_PER_WORD, BITS_PER_WORD);
++      m = validize_mem (m);
++      emit_move_insn (m, pic_offset_table_rtx);
++
++      seq = get_insns ();
++      end_sequence ();
++
++      /* We used to simply emit the sequence after entry_of_function.
++	 However this breaks the CFG if the first instruction in the
++	 first block is not the NOTE_INSN_BASIC_BLOCK, for example a
++	 label.  Emit the sequence properly on the edge.  We are only
++	 invoked from dw2_build_landing_pads and finish_eh_generation
++	 will call commit_edge_insertions thanks to a kludge.  */
++      insert_insn_on_edge (seq,
++			   single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
++
++      cfun->machine->gp_save_rtx = m;
++    }
++
++  return m;
++}
++
++static void
++sw_64_instantiate_decls (void)
++{
++  if (cfun->machine->gp_save_rtx != NULL_RTX)
++    instantiate_decl_rtl (cfun->machine->gp_save_rtx);
++}
++
++static int
++sw_64_ra_ever_killed (void)
++{
++  rtx_insn *top;
++
++  if (!has_hard_reg_initial_val (Pmode, REG_RA))
++    return (int) df_regs_ever_live_p (REG_RA);
++
++  push_topmost_sequence ();
++  top = get_insns ();
++  pop_topmost_sequence ();
++
++  return reg_set_between_p (gen_rtx_REG (Pmode, REG_RA), top, NULL);
++}
++
++/* Return the trap mode suffix applicable to the current
++   instruction, or NULL.  */
++
++static const char *
++get_trap_mode_suffix (void)
++{
++  enum attr_trap_suffix s = get_attr_trap_suffix (current_output_insn);
++
++  switch (s)
++    {
++    case TRAP_SUFFIX_NONE:
++      return NULL;
++
++    case TRAP_SUFFIX_SU:
++      if (sw_64_fptm >= SW_64_FPTM_SU)
++	return "su";
++      return NULL;
++
++    case TRAP_SUFFIX_SUI:
++      if (sw_64_fptm >= SW_64_FPTM_SUI)
++	return "sui";
++      return NULL;
++
++    case TRAP_SUFFIX_V_SV:
++      switch (sw_64_fptm)
++	{
++	case SW_64_FPTM_N:
++	  return NULL;
++	case SW_64_FPTM_U:
++	  return "v";
++	case SW_64_FPTM_SU:
++	case SW_64_FPTM_SUI:
++	  return "sv";
++	default:
++	  gcc_unreachable ();
++	}
++
++    case TRAP_SUFFIX_V_SV_SVI:
++      switch (sw_64_fptm)
++	{
++	case SW_64_FPTM_N:
++	  return NULL;
++	case SW_64_FPTM_U:
++	  return "v";
++	case SW_64_FPTM_SU:
++	  return "sv";
++	case SW_64_FPTM_SUI:
++	  return "svi";
++	default:
++	  gcc_unreachable ();
++	}
++      break;
++
++    case TRAP_SUFFIX_U_SU_SUI:
++      switch (sw_64_fptm)
++	{
++	case SW_64_FPTM_N:
++	  return NULL;
++	case SW_64_FPTM_U:
++	  return "u";
++	case SW_64_FPTM_SU:
++	  return "su";
++	case SW_64_FPTM_SUI:
++	  return "sui";
++	default:
++	  gcc_unreachable ();
++	}
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++  gcc_unreachable ();
++}
++
++/* Return the rounding mode suffix applicable to the current
++   instruction, or NULL.  */
++
++static const char *
++get_round_mode_suffix (void)
++{
++  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
++
++  switch (s)
++    {
++    case ROUND_SUFFIX_NONE:
++      return NULL;
++    case ROUND_SUFFIX_NORMAL:
++      switch (sw_64_fprm)
++	{
++	case SW_64_FPRM_NORM:
++	  return NULL;
++	case SW_64_FPRM_MINF:
++	  return "m";
++	case SW_64_FPRM_CHOP:
++	  return "c";
++	case SW_64_FPRM_DYN:
++	  return "d";
++	default:
++	  gcc_unreachable ();
++	}
++      break;
++
++    case ROUND_SUFFIX_C:
++      return "c";
++
++    default:
++      gcc_unreachable ();
++    }
++  gcc_unreachable ();
++}
++
++/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
++
++static bool
++sw_64_print_operand_punct_valid_p (unsigned char code)
++{
++  return (code == '/' || code == ',' || code == '-' || code == '~'
++	  || code == '#' || code == '*' || code == '&');
++}
++
++/* Implement TARGET_PRINT_OPERAND.  The sw_64-specific
++   operand codes are documented below.  */
++
++static const char *
++get_round_mode_suffix_sw (void)
++{
++  enum attr_round_suffix s = get_attr_round_suffix (current_output_insn);
++
++  switch (s)
++    {
++    case ROUND_SUFFIX_NONE:
++      return NULL;
++    case ROUND_SUFFIX_NORMAL:
++      switch (sw_64_fprm)
++	{
++	case SW_64_FPRM_NORM:
++	  return "_g";
++	case SW_64_FPRM_MINF:
++	  return "_p";
++	case SW_64_FPRM_CHOP:
++	  return "_z";
++	case SW_64_FPRM_DYN:
++	  return "_n";
++	default:
++	  gcc_unreachable ();
++	}
++      break;
++
++    case ROUND_SUFFIX_C:
++      return "_z";
++
++    default:
++      gcc_unreachable ();
++    }
++  gcc_unreachable ();
++}
++static void
++sw_64_print_operand (FILE *file, rtx x, int code)
++{
++  int i;
++
++  switch (code)
++    {
++    case '~':
++      /* Print the assembler name of the current function.  */
++      assemble_name (file, sw_64_fnname);
++      break;
++
++    case '&':
++      if (const char *name = get_some_local_dynamic_name ())
++	assemble_name (file, name);
++      else
++	output_operand_lossage ("'%%&' used without any "
++				"local dynamic TLS references");
++      break;
++
++    case '/':
++      /* Generates the instruction suffix.  The TRAP_SUFFIX and ROUND_SUFFIX
++	 attributes are examined to determine what is appropriate.  */
++      {
++	const char *trap = get_trap_mode_suffix ();
++	const char *round = get_round_mode_suffix ();
++
++	break;
++      }
++
++    case 'T':
++      {
++	const char *round_sw = get_round_mode_suffix_sw ();
++
++	if (round_sw)
++	  fprintf (file, "%s", (round_sw ? round_sw : ""));
++	break;
++      }
++    case ',':
++      /* Generates single precision suffix for floating point
++	 instructions (s for IEEE, f for VAX).  */
++      fputc ((TARGET_FLOAT_VAX ? 'f' : 's'), file);
++      break;
++
++    case '-':
++      /* Generates double precision suffix for floating point
++	 instructions (t for IEEE, g for VAX).  */
++      fputc ((TARGET_FLOAT_VAX ? 'g' : 'd'), file);
++      break;
++
++    case '#':
++      if (sw_64_this_literal_sequence_number == 0)
++	sw_64_this_literal_sequence_number = sw_64_next_sequence_number++;
++      fprintf (file, "%d", sw_64_this_literal_sequence_number);
++      break;
++
++    case '*':
++      if (sw_64_this_gpdisp_sequence_number == 0)
++	sw_64_this_gpdisp_sequence_number = sw_64_next_sequence_number++;
++      fprintf (file, "%d", sw_64_this_gpdisp_sequence_number);
++      break;
++
++    case 'J':
++      {
++	const char *lituse;
++
++	if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD_CALL)
++	  {
++	    x = XVECEXP (x, 0, 0);
++	    lituse = "lituse_tlsgd";
++	  }
++	else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM_CALL)
++	  {
++	    x = XVECEXP (x, 0, 0);
++	    lituse = "lituse_tlsldm";
++	  }
++	else if (CONST_INT_P (x))
++	  lituse = "lituse_jsr";
++	else
++	  {
++	    output_operand_lossage ("invalid %%J value");
++	    break;
++	  }
++
++	if (x != const0_rtx)
++	  fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
++      }
++      break;
++
++    case 'j':
++      {
++	const char *lituse;
++
++#ifdef HAVE_AS_JSRDIRECT_RELOCS
++	lituse = "lituse_jsrdirect";
++#else
++	lituse = "lituse_jsr";
++#endif
++
++	gcc_assert (INTVAL (x) != 0);
++	fprintf (file, "\t\t!%s!%d", lituse, (int) INTVAL (x));
++      }
++      break;
++    case 'r':
++      /* If this operand is the constant zero, write it as "$31".  */
++      if (REG_P (x))
++	fprintf (file, "%s", reg_names[REGNO (x)]);
++      else if (x == CONST0_RTX (GET_MODE (x)))
++	fprintf (file, "$31");
++      else
++	output_operand_lossage ("invalid %%r value");
++      break;
++
++    case 'R':
++      /* Similar, but for floating-point.  */
++      if (REG_P (x))
++	fprintf (file, "%s", reg_names[REGNO (x)]);
++      else if (x == CONST0_RTX (GET_MODE (x)))
++	fprintf (file, "$f31");
++      else
++	output_operand_lossage ("invalid %%R value");
++      break;
++
++    case 'N':
++      /* Write the 1's complement of a constant.  */
++      if (!CONST_INT_P (x))
++	output_operand_lossage ("invalid %%N value");
++
++      fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~INTVAL (x));
++      break;
++
++    case 'P':
++      /* Write 1 << C, for a constant C.  */
++      if (!CONST_INT_P (x))
++	output_operand_lossage ("invalid %%P value");
++
++      fprintf (file, HOST_WIDE_INT_PRINT_DEC, HOST_WIDE_INT_1 << INTVAL (x));
++      break;
++
++    case 'h':
++      /* Write the high-order 16 bits of a constant, sign-extended.  */
++      if (!CONST_INT_P (x))
++	output_operand_lossage ("invalid %%h value");
++
++      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) >> 16);
++      break;
++
++    case 'L':
++      /* Write the low-order 16 bits of a constant, sign-extended.  */
++      if (!CONST_INT_P (x))
++	output_operand_lossage ("invalid %%L value");
++
++      fprintf (file, HOST_WIDE_INT_PRINT_DEC,
++	       (INTVAL (x) & 0xffff) - 2 * (INTVAL (x) & 0x8000));
++      break;
++
++    case 'm':
++      /* Write mask for ZAP insn.  */
++      if (CONST_INT_P (x))
++	{
++	  HOST_WIDE_INT mask = 0, value = INTVAL (x);
++
++	  for (i = 0; i < 8; i++, value >>= 8)
++	    if (value & 0xff)
++	      mask |= (1 << i);
++
++	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, mask);
++	}
++      else
++	output_operand_lossage ("invalid %%m value");
++      break;
++
++    case 'M':
++      /* 'b', 'w', 'l', or 'q' as the value of the constant.  */
++      if (!mode_width_operand (x, VOIDmode))
++	output_operand_lossage ("invalid %%M value");
++
++      fprintf (file, "%s",
++	       (INTVAL (x) == 8
++		  ? "b"
++		  : INTVAL (x) == 16 ? "w" : INTVAL (x) == 32 ? "l" : "q"));
++      break;
++
++    case 'U':
++      /* Similar, except do it from the mask.  */
++      if (CONST_INT_P (x))
++	{
++	  HOST_WIDE_INT value = INTVAL (x);
++
++	  if (value == 0xff)
++	    {
++	      fputc ('b', file);
++	      break;
++	    }
++	  if (value == 0xffff)
++	    {
++	      fputc ('w', file);
++	      break;
++	    }
++	  if (value == 0xffffffff)
++	    {
++	      fputc ('l', file);
++	      break;
++	    }
++	  if (value == -1)
++	    {
++	      fputc ('q', file);
++	      break;
++	    }
++	}
++      /* Write "_a" for AUTO_INC_DEC access.  */
++      if (MEM_P (x)
++	  && (GET_CODE (XEXP (x, 0)) == POST_INC
++	      || GET_CODE (XEXP (x, 0)) == POST_DEC
++	      || GET_CODE (XEXP (x, 0)) == POST_MODIFY))
++	{
++	  fprintf (file, "_a");
++	  break;
++	}
++      break;
++
++    case 's':
++      /* Write the constant value divided by 8.  */
++      if (!CONST_INT_P (x) || (unsigned HOST_WIDE_INT) INTVAL (x) >= 64
++	  || (INTVAL (x) & 7) != 0)
++	output_operand_lossage ("invalid %%s value");
++
++      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) / 8);
++      break;
++
++    case 'C':
++    case 'D':
++    case 'c':
++    case 'd':
++      /* Write out comparison name.  */
++      {
++	enum rtx_code c = GET_CODE (x);
++
++	if (!COMPARISON_P (x))
++	  output_operand_lossage ("invalid %%C value");
++
++	else if (code == 'D')
++	  c = reverse_condition (c);
++	else if (code == 'c')
++	  c = swap_condition (c);
++	else if (code == 'd')
++	  c = swap_condition (reverse_condition (c));
++
++	if (c == LEU)
++	  fprintf (file, "ule");
++	else if (c == LTU)
++	  fprintf (file, "ult");
++	else if (c == UNORDERED)
++	  fprintf (file, "un");
++	else
++	  fprintf (file, "%s", GET_RTX_NAME (c));
++      }
++      break;
++
++    case 'E':
++      /* Write the divide or modulus operator.  */
++      switch (GET_CODE (x))
++	{
++	case DIV:
++	  fprintf (file, "div%s", GET_MODE (x) == SImode ? "w" : "l");
++	  break;
++	case UDIV:
++	  fprintf (file, "div%su", GET_MODE (x) == SImode ? "w" : "l");
++	  break;
++	case MOD:
++	  fprintf (file, "rem%s", GET_MODE (x) == SImode ? "w" : "l");
++	  break;
++	case UMOD:
++	  fprintf (file, "rem%su", GET_MODE (x) == SImode ? "w" : "l");
++	  break;
++	default:
++	  output_operand_lossage ("invalid %%E value");
++	  break;
++	}
++      break;
++
++    case 'A':
++      /* Write "_u" for unaligned access.  */
++      if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == AND)
++	fprintf (file, "_u");
++      break;
++
++    case 0:
++      if (REG_P (x))
++	fprintf (file, "%s", reg_names[REGNO (x)]);
++      else if (MEM_P (x))
++	{
++	  if (GET_CODE (XEXP (x, 0)) == POST_INC)
++	    fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
++		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
++	  else if (GET_CODE (XEXP (x, 0)) == POST_DEC)
++	    fprintf (file, "%d(%s)", -GET_MODE_SIZE (GET_MODE (x)),
++		     reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
++	  else if (GET_CODE (XEXP (x, 0)) == POST_MODIFY)
++	    output_address (GET_MODE (x), XEXP (XEXP (x, 0), 1));
++	  else
++	    output_address (GET_MODE (x), XEXP (x, 0));
++	}
++      else if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
++	{
++	  switch (XINT (XEXP (x, 0), 1))
++	    {
++	    case UNSPEC_DTPREL:
++	    case UNSPEC_TPREL:
++	      output_addr_const (file, XVECEXP (XEXP (x, 0), 0, 0));
++	      break;
++	    default:
++	      output_operand_lossage ("unknown relocation unspec");
++	      break;
++	    }
++	}
++      else
++	output_addr_const (file, x);
++      break;
++
++    default:
++      output_operand_lossage ("invalid %%xn code");
++    }
++}
++
++/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
++
++static void
++sw_64_print_operand_address (FILE *file, machine_mode /*mode.  */, rtx addr)
++{
++  int basereg = 31;
++  HOST_WIDE_INT offset = 0;
++
++  if (GET_CODE (addr) == AND)
++    addr = XEXP (addr, 0);
++
++  if (GET_CODE (addr) == PLUS && CONST_INT_P (XEXP (addr, 1)))
++    {
++      offset = INTVAL (XEXP (addr, 1));
++      addr = XEXP (addr, 0);
++    }
++
++  if (GET_CODE (addr) == LO_SUM)
++    {
++      const char *reloc16, *reloclo;
++      rtx op1 = XEXP (addr, 1);
++
++      if (GET_CODE (op1) == CONST && GET_CODE (XEXP (op1, 0)) == UNSPEC)
++	{
++	  op1 = XEXP (op1, 0);
++	  switch (XINT (op1, 1))
++	    {
++	    case UNSPEC_DTPREL:
++	      reloc16 = NULL;
++	      reloclo = (sw_64_tls_size == 16 ? "dtprel" : "dtprello");
++	      break;
++	    case UNSPEC_TPREL:
++	      reloc16 = NULL;
++	      reloclo = (sw_64_tls_size == 16 ? "tprel" : "tprello");
++	      break;
++	    default:
++	      output_operand_lossage ("unknown relocation unspec");
++	      return;
++	    }
++
++	  output_addr_const (file, XVECEXP (op1, 0, 0));
++	}
++      else
++	{
++	  reloc16 = "gprel";
++	  reloclo = "gprellow";
++	  output_addr_const (file, op1);
++	}
++
++      if (offset)
++	fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
++
++      addr = XEXP (addr, 0);
++      switch (GET_CODE (addr))
++	{
++	case REG:
++	  basereg = REGNO (addr);
++	  break;
++
++	case SUBREG:
++	  basereg = subreg_regno (addr);
++	  break;
++
++	default:
++	  gcc_unreachable ();
++	}
++
++      fprintf (file, "($%d)\t\t!%s", basereg,
++	       (basereg == 29 ? reloc16 : reloclo));
++      return;
++    }
++
++  switch (GET_CODE (addr))
++    {
++    case REG:
++      basereg = REGNO (addr);
++      break;
++
++    case SUBREG:
++      basereg = subreg_regno (addr);
++      break;
++
++    case CONST_INT:
++      offset = INTVAL (addr);
++      break;
++
++    case SYMBOL_REF:
++      gcc_assert (this_is_asm_operands);
++      fprintf (file, "%s", XSTR (addr, 0));
++      return;
++
++    case CONST:
++      gcc_assert (this_is_asm_operands);
++      gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS
++		  && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF);
++      fprintf (file, "%s+" HOST_WIDE_INT_PRINT_DEC,
++	       XSTR (XEXP (XEXP (addr, 0), 0), 0),
++	       INTVAL (XEXP (XEXP (addr, 0), 1)));
++      return;
++
++    default:
++      output_operand_lossage ("invalid operand address");
++      return;
++    }
++
++  fprintf (file, HOST_WIDE_INT_PRINT_DEC "($%d)", offset, basereg);
++}
++
++/* Emit RTL insns to initialize the variable parts of a trampoline at
++   M_TRAMP.  FNDECL is target function's decl.  CHAIN_VALUE is an rtx
++   for the static chain value for the function.  */
++
++static void
++sw_64_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
++{
++  rtx fnaddr, mem, word1, word2;
++
++  fnaddr = XEXP (DECL_RTL (fndecl), 0);
++
++#ifdef POINTERS_EXTEND_UNSIGNED
++  fnaddr = convert_memory_address (Pmode, fnaddr);
++  chain_value = convert_memory_address (Pmode, chain_value);
++#endif
++
++      /* These 4 instructions are:
++	    ldq $1,24($27)
++	    ldq $27,16($27)
++	    jmp $31,($27),0
++	    nop
++	 We don't bother setting the HINT field of the jump; the nop
++	 is merely there for padding.  */
++      word1 = GEN_INT (HOST_WIDE_INT_C (0x8f7b00108c3b0018));
++      word2 = GEN_INT (HOST_WIDE_INT_C (0x43ff075f0ffb0000));
++
++  /* Store the first two words, as computed above.  */
++  mem = adjust_address (m_tramp, DImode, 0);
++  emit_move_insn (mem, word1);
++  mem = adjust_address (m_tramp, DImode, 8);
++  emit_move_insn (mem, word2);
++
++  /* Store function address and static chain value.  */
++  mem = adjust_address (m_tramp, Pmode, 16);
++  emit_move_insn (mem, fnaddr);
++  mem = adjust_address (m_tramp, Pmode, 24);
++  emit_move_insn (mem, chain_value);
++
++      emit_insn (gen_imb ());
++#ifdef HAVE_ENABLE_EXECUTE_STACK
++      emit_library_call (init_one_libfunc ("__enable_execute_stack"),
++			LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
++#endif
++}
++
++/* Determine where to put an argument to a function.
++   Value is zero to push the argument on the stack,
++   or a hard register in which to store the argument.
++
++   CUM is a variable of type CUMULATIVE_ARGS which gives info about
++    the preceding args and about the function being called.
++
++   ARG is a description of the argument.
++   On Sw_64 the first 6 words of args are normally in registers
++   and the rest are pushed.  */
++
++static rtx
++sw_64_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
++{
++  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
++  int basereg;
++  int num_args;
++
++  /* Don't get confused and pass small structures in FP registers.  */
++  if (arg.aggregate_type_p ())
++    basereg = 16;
++  else
++    {
++      /* With sw_64_split_complex_arg, we shouldn't see any raw complex
++	 values here.  */
++      gcc_checking_assert (!COMPLEX_MODE_P (arg.mode));
++
++      /* Set up defaults for FP operands passed in FP registers, and
++	 integral operands passed in integer registers.  */
++      if (TARGET_FPREGS && GET_MODE_CLASS (arg.mode) == MODE_FLOAT)
++	basereg = 32 + 16;
++      else
++	basereg = 16;
++    }
++
++    /* ??? Irritatingly, the definition of CUMULATIVE_ARGS is different for
++       the two platforms, so we can't avoid conditional compilation.  */
++  {
++    if (*cum >= 6)
++      return NULL_RTX;
++    num_args = *cum;
++
++    if (arg.end_marker_p ())
++      basereg = 16;
++    else if (targetm.calls.must_pass_in_stack (arg))
++      return NULL_RTX;
++  }
++
++  return gen_rtx_REG (arg.mode, num_args + basereg);
++}
++
++/* Update the data in CUM to advance over an argument ARG.  */
++
++static void
++sw_64_function_arg_advance (cumulative_args_t cum_v,
++			    const function_arg_info &arg)
++{
++  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
++  bool onstack = targetm.calls.must_pass_in_stack (arg);
++  int increment = onstack ? 6 : SW_64_ARG_SIZE (arg.mode, arg.type);
++
++  *cum += increment;
++}
++
++static int
++sw_64_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
++{
++  int words = 0;
++  CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED = get_cumulative_args (cum_v);
++
++  if (*cum < 6 && 6 < *cum + SW_64_ARG_SIZE (arg.mode, arg.type))
++    words = 6 - *cum;
++
++  return words * UNITS_PER_WORD;
++}
++
++/* Return true if ARG must be returned in memory, instead of in registers.  */
++
++static bool
++sw_64_return_in_memory (const_tree type, const_tree fndecl ATTRIBUTE_UNUSED)
++{
++  machine_mode mode = VOIDmode;
++  int size;
++
++  if (type)
++    {
++      mode = TYPE_MODE (type);
++
++      /* All aggregates are returned in memory, except on OpenVMS where
++	 records that fit 64 bits should be returned by immediate value
++	 as required by section 3.8.7.1 of the OpenVMS Calling Standard.  */
++      if (AGGREGATE_TYPE_P (type))
++	return true;
++    }
++
++  size = GET_MODE_SIZE (mode);
++  switch (GET_MODE_CLASS (mode))
++    {
++    case MODE_VECTOR_FLOAT:
++      /* Pass all float vectors in memory, like an aggregate.  */
++      return true;
++
++    case MODE_COMPLEX_FLOAT:
++      /* We judge complex floats on the size of their element,
++	 not the size of the whole type.  */
++      size = GET_MODE_UNIT_SIZE (mode);
++      break;
++
++    case MODE_INT:
++    case MODE_FLOAT:
++    case MODE_COMPLEX_INT:
++    case MODE_VECTOR_INT:
++      break;
++
++    default:
++      /* ??? We get called on all sorts of random stuff from
++	 aggregate_value_p.  We must return something, but it's not
++	 clear what's safe to return.  Pretend it's a struct I
++	 guess.  */
++      return true;
++    }
++
++  /* Otherwise types must fit in one register.  */
++  return size > UNITS_PER_WORD;
++}
++
++/* Return true if TYPE should be passed by invisible reference.  */
++
++static bool
++sw_64_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
++{
++  /* Pass float and _Complex float variable arguments by reference.
++     This avoids 64-bit store from a FP register to a pretend args save area
++     and subsequent 32-bit load from the saved location to a FP register.
++
++     Note that 32-bit loads and stores to/from a FP register on sw_64 reorder
++     bits to form a canonical 64-bit value in the FP register.  This fact
++     invalidates compiler assumption that 32-bit FP value lives in the lower
++     32-bits of the passed 64-bit FP value, so loading the 32-bit value from
++     the stored 64-bit location using 32-bit FP load is invalid on sw_64.
++
++     This introduces sort of ABI incompatibility, but until _Float32 was
++     introduced, C-family languages promoted 32-bit float variable arg to
++     a 64-bit double, and it was not allowed to pass float as a varible
++     argument.  Passing _Complex float as a variable argument never
++     worked on sw_64.  Thus, we have no backward compatibility issues
++     to worry about, and passing unpromoted _Float32 and _Complex float
++     as a variable argument will actually work in the future.  */
++
++  if (arg.mode == SFmode || arg.mode == SCmode)
++    return !arg.named;
++
++  return arg.mode == TFmode || arg.mode == TCmode;
++}
++
++/* Define how to find the value returned by a function.  VALTYPE is the
++   data type of the value (as a tree).  If the precise function being
++   called is known, FUNC is its FUNCTION_DECL; otherwise, FUNC is 0.
++   MODE is set instead of VALTYPE for libcalls.
++
++   On Sw_64 the value is found in $0 for integer functions and
++   $f0 for floating-point functions.  */
++
++static rtx
++sw_64_function_value_1 (const_tree valtype, const_tree func ATTRIBUTE_UNUSED,
++			machine_mode mode)
++{
++  unsigned int regnum, dummy ATTRIBUTE_UNUSED;
++  enum mode_class mclass;
++
++  gcc_assert (!valtype || !sw_64_return_in_memory (valtype, func));
++
++  if (valtype)
++    mode = TYPE_MODE (valtype);
++
++  mclass = GET_MODE_CLASS (mode);
++  switch (mclass)
++    {
++    case MODE_INT:
++      /* Do the same thing as PROMOTE_MODE except for libcalls on VMS,
++	where we have them returning both SImode and DImode.  */
++       PROMOTE_MODE (mode, dummy, valtype);
++      /* FALLTHRU */
++
++    case MODE_COMPLEX_INT:
++    case MODE_VECTOR_INT:
++      regnum = 0;
++      break;
++
++    case MODE_FLOAT:
++      regnum = 32;
++      break;
++
++    case MODE_COMPLEX_FLOAT:
++      {
++	machine_mode cmode = GET_MODE_INNER (mode);
++
++	return gen_rtx_PARALLEL (
++	  VOIDmode,
++	  gen_rtvec (2,
++		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 32),
++					const0_rtx),
++		     gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (cmode, 33),
++					GEN_INT (GET_MODE_SIZE (cmode)))));
++      }
++
++    case MODE_RANDOM:
++    default:
++      gcc_unreachable ();
++    }
++
++  return gen_rtx_REG (mode, regnum);
++}
++
++/* Implement TARGET_FUNCTION_VALUE.  */
++
++static rtx
++sw_64_function_value (const_tree valtype, const_tree fn_decl_or_type,
++		      bool /* outgoing */)
++{
++  return sw_64_function_value_1 (valtype, fn_decl_or_type, VOIDmode);
++}
++
++/* Implement TARGET_LIBCALL_VALUE.  */
++
++static rtx
++sw_64_libcall_value (machine_mode mode, const_rtx /* fun */)
++{
++  return sw_64_function_value_1 (NULL_TREE, NULL_TREE, mode);
++}
++
++/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
++
++   On the Sw_64, $0 $1 and $f0 $f1 are the only register thus used.  */
++
++static bool
++sw_64_function_value_regno_p (const unsigned int regno)
++{
++  return (regno == 0 || regno == 1 || regno == 32 || regno == 33);
++}
++
++/* TCmode complex values are passed by invisible reference.  We
++   should not split these values.  */
++
++static bool
++sw_64_split_complex_arg (const_tree type)
++{
++  return TYPE_MODE (type) != TCmode;
++}
++
++static tree
++sw_64_build_builtin_va_list (void)
++{
++  tree base, ofs, space, record, type_decl;
++
++  record = (*lang_hooks.types.make_type) (RECORD_TYPE);
++  type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
++			  get_identifier ("__va_list_tag"), record);
++  TYPE_STUB_DECL (record) = type_decl;
++  TYPE_NAME (record) = type_decl;
++
++  /* C++? SET_IS_AGGR_TYPE (record, 1); */
++
++  /* Dummy field to prevent alignment warnings.  */
++  space
++    = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE, integer_type_node);
++  DECL_FIELD_CONTEXT (space) = record;
++  DECL_ARTIFICIAL (space) = 1;
++  DECL_IGNORED_P (space) = 1;
++
++  ofs = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__offset"),
++		    integer_type_node);
++  DECL_FIELD_CONTEXT (ofs) = record;
++  DECL_CHAIN (ofs) = space;
++
++  base = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__base"),
++		     ptr_type_node);
++  DECL_FIELD_CONTEXT (base) = record;
++  DECL_CHAIN (base) = ofs;
++
++  TYPE_FIELDS (record) = base;
++  layout_type (record);
++
++  va_list_gpr_counter_field = ofs;
++  return record;
++}
++
++/* Helper function for sw_64_stdarg_optimize_hook.  Skip over casts
++   and constant additions.  */
++
++static gimple *
++va_list_skip_additions (tree lhs)
++{
++  gimple *stmt;
++
++  for (;;)
++    {
++      enum tree_code code;
++
++      stmt = SSA_NAME_DEF_STMT (lhs);
++
++      if (gimple_code (stmt) == GIMPLE_PHI)
++	return stmt;
++
++      if (!is_gimple_assign (stmt) || gimple_assign_lhs (stmt) != lhs)
++	return NULL;
++
++      if (TREE_CODE (gimple_assign_rhs1 (stmt)) != SSA_NAME)
++	return stmt;
++      code = gimple_assign_rhs_code (stmt);
++      if (!CONVERT_EXPR_CODE_P (code)
++	  && ((code != PLUS_EXPR && code != POINTER_PLUS_EXPR)
++	      || TREE_CODE (gimple_assign_rhs2 (stmt)) != INTEGER_CST
++	      || !tree_fits_uhwi_p (gimple_assign_rhs2 (stmt))))
++	return stmt;
++
++      lhs = gimple_assign_rhs1 (stmt);
++    }
++}
++
++/* Check if LHS = RHS statement is
++   LHS = *(ap.__base + ap.__offset + cst)
++   or
++   LHS = *(ap.__base
++	   + ((ap.__offset + cst <= 47)
++	      ? ap.__offset + cst - 48 : ap.__offset + cst) + cst2).
++   If the former, indicate that GPR registers are needed,
++   if the latter, indicate that FPR registers are needed.
++
++   Also look for LHS = (*ptr).field, where ptr is one of the forms
++   listed above.
++
++   On sw_64, cfun->va_list_gpr_size is used as size of the needed
++   regs and cfun->va_list_fpr_size is a bitmask, bit 0 set if GPR
++   registers are needed and bit 1 set if FPR registers are needed.
++   Return true if va_list references should not be scanned for the
++   current statement.  */
++
++static bool
++sw_64_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
++{
++  tree base, offset, rhs;
++  int offset_arg = 1;
++  gimple *base_stmt;
++
++  if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) != GIMPLE_SINGLE_RHS)
++    return false;
++
++  rhs = gimple_assign_rhs1 (stmt);
++  while (handled_component_p (rhs))
++    rhs = TREE_OPERAND (rhs, 0);
++  if (TREE_CODE (rhs) != MEM_REF
++      || TREE_CODE (TREE_OPERAND (rhs, 0)) != SSA_NAME)
++    return false;
++
++  stmt = va_list_skip_additions (TREE_OPERAND (rhs, 0));
++  if (stmt == NULL || !is_gimple_assign (stmt)
++      || gimple_assign_rhs_code (stmt) != POINTER_PLUS_EXPR)
++    return false;
++
++  base = gimple_assign_rhs1 (stmt);
++  if (TREE_CODE (base) == SSA_NAME)
++    {
++      base_stmt = va_list_skip_additions (base);
++      if (base_stmt && is_gimple_assign (base_stmt)
++	  && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
++	base = gimple_assign_rhs1 (base_stmt);
++    }
++
++  if (TREE_CODE (base) != COMPONENT_REF
++      || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
++    {
++      base = gimple_assign_rhs2 (stmt);
++      if (TREE_CODE (base) == SSA_NAME)
++	{
++	  base_stmt = va_list_skip_additions (base);
++	  if (base_stmt && is_gimple_assign (base_stmt)
++	      && gimple_assign_rhs_code (base_stmt) == COMPONENT_REF)
++	    base = gimple_assign_rhs1 (base_stmt);
++	}
++
++      if (TREE_CODE (base) != COMPONENT_REF
++	  || TREE_OPERAND (base, 1) != TYPE_FIELDS (va_list_type_node))
++	return false;
++
++      offset_arg = 0;
++    }
++
++  base = get_base_address (base);
++  if (TREE_CODE (base) != VAR_DECL
++      || !bitmap_bit_p (si->va_list_vars, DECL_UID (base) + num_ssa_names))
++    return false;
++
++  offset = gimple_op (stmt, 1 + offset_arg);
++  if (TREE_CODE (offset) == SSA_NAME)
++    {
++      gimple *offset_stmt = va_list_skip_additions (offset);
++
++      if (offset_stmt && gimple_code (offset_stmt) == GIMPLE_PHI)
++	{
++	  HOST_WIDE_INT sub;
++	  gimple *arg1_stmt, *arg2_stmt;
++	  tree arg1, arg2;
++	  enum tree_code code1, code2;
++
++	  if (gimple_phi_num_args (offset_stmt) != 2)
++	    goto escapes;
++
++	  arg1_stmt
++	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 0));
++	  arg2_stmt
++	    = va_list_skip_additions (gimple_phi_arg_def (offset_stmt, 1));
++	  if (arg1_stmt == NULL || !is_gimple_assign (arg1_stmt)
++	      || arg2_stmt == NULL || !is_gimple_assign (arg2_stmt))
++	    goto escapes;
++
++	  code1 = gimple_assign_rhs_code (arg1_stmt);
++	  code2 = gimple_assign_rhs_code (arg2_stmt);
++	  if (code1 == COMPONENT_REF
++	      && (code2 == MINUS_EXPR || code2 == PLUS_EXPR))
++	    /* Do nothing.  */;
++	  else if (code2 == COMPONENT_REF
++		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
++	    {
++	      std::swap (arg1_stmt, arg2_stmt);
++	      code2 = code1;
++	    }
++	  else
++	    goto escapes;
++
++	  if (!tree_fits_shwi_p (gimple_assign_rhs2 (arg2_stmt)))
++	    goto escapes;
++
++	  sub = tree_to_shwi (gimple_assign_rhs2 (arg2_stmt));
++	  if (code2 == MINUS_EXPR)
++	    sub = -sub;
++	  if (sub < -48 || sub > -32)
++	    goto escapes;
++
++	  arg1 = gimple_assign_rhs1 (arg1_stmt);
++	  arg2 = gimple_assign_rhs1 (arg2_stmt);
++	  if (TREE_CODE (arg2) == SSA_NAME)
++	    {
++	      arg2_stmt = va_list_skip_additions (arg2);
++	      if (arg2_stmt == NULL || !is_gimple_assign (arg2_stmt)
++		  || gimple_assign_rhs_code (arg2_stmt) != COMPONENT_REF)
++		goto escapes;
++	      arg2 = gimple_assign_rhs1 (arg2_stmt);
++	    }
++	  if (arg1 != arg2)
++	    goto escapes;
++
++	  if (TREE_CODE (arg1) != COMPONENT_REF
++	      || TREE_OPERAND (arg1, 1) != va_list_gpr_counter_field
++	      || get_base_address (arg1) != base)
++	    goto escapes;
++
++	  /* Need floating point regs.  */
++	  cfun->va_list_fpr_size |= 2;
++	  return false;
++	}
++      if (offset_stmt && is_gimple_assign (offset_stmt)
++	  && gimple_assign_rhs_code (offset_stmt) == COMPONENT_REF)
++	offset = gimple_assign_rhs1 (offset_stmt);
++    }
++  if (TREE_CODE (offset) != COMPONENT_REF
++      || TREE_OPERAND (offset, 1) != va_list_gpr_counter_field
++      || get_base_address (offset) != base)
++    goto escapes;
++  else
++    /* Need general regs.  */
++    cfun->va_list_fpr_size |= 1;
++  return false;
++
++escapes:
++  si->va_list_escapes = true;
++  return false;
++}
++
++/* Perform any needed actions needed for a function that is receiving a
++   variable number of arguments.  */
++
++static void
++sw_64_setup_incoming_varargs (cumulative_args_t pcum,
++			      const function_arg_info &arg, int *pretend_size,
++			      int no_rtl)
++{
++  CUMULATIVE_ARGS cum = *get_cumulative_args (pcum);
++
++  /* Skip the current argument.  */
++  targetm.calls.function_arg_advance (pack_cumulative_args (&cum), arg);
++
++  /* On SYSV and friends, we allocate space for all 12 arg registers, but
++     only push those that are remaining.  However, if NO registers need to
++     be saved, don't allocate any space.  This is not only because we won't
++     need the space, but because AP includes the current_pretend_args_size
++     and we don't want to mess up any ap-relative addresses already made.
++
++     If we are not to use the floating-point registers, save the integer
++     registers where we would put the floating-point registers.  This is
++     not the most efficient way to implement varargs with just one register
++     class, but it isn't worth doing anything more efficient in this rare
++     case.  */
++  if (cum >= 6)
++    return;
++
++  if (!no_rtl)
++    {
++      int count;
++      alias_set_type set = get_varargs_alias_set ();
++      rtx tmp;
++
++      count = cfun->va_list_gpr_size / UNITS_PER_WORD;
++      if (count > 6 - cum)
++	count = 6 - cum;
++
++      /* Detect whether integer registers or floating-point registers
++	 are needed by the detected va_arg statements.  See above for
++	 how these values are computed.  Note that the "escape" value
++	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
++	 these bits set.  */
++      gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
++
++      if (cfun->va_list_fpr_size & 1)
++	{
++	  tmp = gen_rtx_MEM (BLKmode,
++			     plus_constant (Pmode, virtual_incoming_args_rtx,
++					    (cum + 6) * UNITS_PER_WORD));
++	  MEM_NOTRAP_P (tmp) = 1;
++	  set_mem_alias_set (tmp, set);
++	  move_block_from_reg (16 + cum, tmp, count);
++	}
++
++      if (cfun->va_list_fpr_size & 2)
++	{
++	  tmp = gen_rtx_MEM (BLKmode,
++			     plus_constant (Pmode, virtual_incoming_args_rtx,
++					    cum * UNITS_PER_WORD));
++	  MEM_NOTRAP_P (tmp) = 1;
++	  set_mem_alias_set (tmp, set);
++	  move_block_from_reg (16 + cum + TARGET_FPREGS * 32, tmp, count);
++	}
++    }
++#ifdef SW_64_ENABLE_FULL_ASAN
++  cfun->machine->frame.saved_varargs_size = 12 * UNITS_PER_WORD;
++#else
++  *pretend_size = 12 * UNITS_PER_WORD;
++#endif
++}
++
++static void
++sw_64_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
++{
++  HOST_WIDE_INT offset;
++  tree t, offset_field, base_field;
++
++  if (TREE_CODE (TREE_TYPE (valist)) == ERROR_MARK)
++    return;
++
++  /* For Unix, TARGET_SETUP_INCOMING_VARARGS moves the starting address base
++     up by 48, storing fp arg registers in the first 48 bytes, and the
++     integer arg registers in the next 48 bytes.  This is only done,
++     however, if any integer registers need to be stored.
++
++     If no integer registers need be stored, then we must subtract 48
++     in order to account for the integer arg registers which are counted
++     in argsize above, but which are not actually stored on the stack.
++     Must further be careful here about structures straddling the last
++     integer argument register; that futzes with pretend_args_size,
++     which changes the meaning of AP.  */
++
++  if (NUM_ARGS < 6)
++    offset = 6 * UNITS_PER_WORD;
++  else
++#ifdef SW_64_ENABLE_FULL_ASAN
++    offset = -6 * UNITS_PER_WORD + cfun->machine->frame.saved_varargs_size
++	     + crtl->args.pretend_args_size;
++#else
++    offset = -6 * UNITS_PER_WORD + crtl->args.pretend_args_size;
++#endif
++
++      base_field = TYPE_FIELDS (TREE_TYPE (valist));
++      offset_field = DECL_CHAIN (base_field);
++
++      base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), valist,
++			   base_field, NULL_TREE);
++      offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), valist,
++			     offset_field, NULL_TREE);
++
++      t = make_tree (ptr_type_node, virtual_incoming_args_rtx);
++      t = fold_build_pointer_plus_hwi (t, offset);
++      t = build2 (MODIFY_EXPR, TREE_TYPE (base_field), base_field, t);
++      TREE_SIDE_EFFECTS (t) = 1;
++      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
++
++      t = build_int_cst (NULL_TREE, NUM_ARGS * UNITS_PER_WORD);
++      t = build2 (MODIFY_EXPR, TREE_TYPE (offset_field), offset_field, t);
++      TREE_SIDE_EFFECTS (t) = 1;
++      expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
++}
++
++static tree
++sw_64_gimplify_va_arg_1 (tree type, tree base, tree offset, gimple_seq *pre_p)
++{
++  tree type_size, ptr_type, addend, t, addr;
++  gimple_seq internal_post;
++
++  /* If the type could not be passed in registers, skip the block
++     reserved for the registers.  */
++  if (must_pass_va_arg_in_stack (type))
++    {
++      t = build_int_cst (TREE_TYPE (offset), 6 * 8);
++      gimplify_assign (offset, build2 (MAX_EXPR, TREE_TYPE (offset), offset, t),
++		       pre_p);
++    }
++
++  addend = offset;
++  ptr_type = build_pointer_type_for_mode (type, ptr_mode, true);
++
++  if (TREE_CODE (type) == COMPLEX_TYPE)
++    {
++      tree real_part, imag_part, real_temp;
++
++      real_part
++	= sw_64_gimplify_va_arg_1 (TREE_TYPE (type), base, offset, pre_p);
++
++      /* Copy the value into a new temporary, lest the formal temporary
++	 be reused out from under us.  */
++      real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
++
++      imag_part
++	= sw_64_gimplify_va_arg_1 (TREE_TYPE (type), base, offset, pre_p);
++
++      return build2 (COMPLEX_EXPR, type, real_temp, imag_part);
++    }
++  else if (TREE_CODE (type) == REAL_TYPE)
++    {
++      tree fpaddend, cond, fourtyeight;
++
++      fourtyeight = build_int_cst (TREE_TYPE (addend), 6 * 8);
++      fpaddend
++	= fold_build2 (MINUS_EXPR, TREE_TYPE (addend), addend, fourtyeight);
++      cond = fold_build2 (LT_EXPR, boolean_type_node, addend, fourtyeight);
++      addend
++	= fold_build3 (COND_EXPR, TREE_TYPE (addend), cond, fpaddend, addend);
++    }
++
++  /* Build the final address and force that value into a temporary.  */
++  addr = fold_build_pointer_plus (fold_convert (ptr_type, base), addend);
++  internal_post = NULL;
++  gimplify_expr (&addr, pre_p, &internal_post, is_gimple_val, fb_rvalue);
++  gimple_seq_add_seq (pre_p, internal_post);
++
++  /* Update the offset field.  */
++  type_size = TYPE_SIZE_UNIT (TYPE_MAIN_VARIANT (type));
++  if (type_size == NULL || TREE_OVERFLOW (type_size))
++    t = size_zero_node;
++  else
++    {
++      t = size_binop (PLUS_EXPR, type_size, size_int (7));
++      t = size_binop (TRUNC_DIV_EXPR, t, size_int (8));
++      t = size_binop (MULT_EXPR, t, size_int (8));
++    }
++  t = fold_convert (TREE_TYPE (offset), t);
++  gimplify_assign (offset, build2 (PLUS_EXPR, TREE_TYPE (offset), offset, t),
++		   pre_p);
++
++  return build_va_arg_indirect_ref (addr);
++}
++
++static tree
++sw_64_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
++		       gimple_seq *post_p)
++{
++  tree offset_field, base_field, offset, base, t, r;
++  bool indirect;
++
++  base_field = TYPE_FIELDS (va_list_type_node);
++  offset_field = DECL_CHAIN (base_field);
++  base_field = build3 (COMPONENT_REF, TREE_TYPE (base_field), valist,
++		       base_field, NULL_TREE);
++  offset_field = build3 (COMPONENT_REF, TREE_TYPE (offset_field), valist,
++			 offset_field, NULL_TREE);
++
++  /* Pull the fields of the structure out into temporaries.  Since we never
++     modify the base field, we can use a formal temporary.  Sign-extend the
++     offset field so that it's the proper width for pointer arithmetic.  */
++  base = get_formal_tmp_var (base_field, pre_p);
++
++  t = fold_convert (build_nonstandard_integer_type (64, 0), offset_field);
++  offset = get_initialized_tmp_var (t, pre_p, NULL);
++
++  indirect = pass_va_arg_by_reference (type);
++
++  if (indirect)
++    {
++      if (TREE_CODE (type) == COMPLEX_TYPE
++	  && targetm.calls.split_complex_arg (type))
++	{
++	  tree real_part, imag_part, real_temp;
++
++	  tree ptr_type
++	    = build_pointer_type_for_mode (TREE_TYPE (type), ptr_mode, true);
++
++	  real_part = sw_64_gimplify_va_arg_1 (ptr_type, base, offset, pre_p);
++	  real_part = build_va_arg_indirect_ref (real_part);
++
++	  /* Copy the value into a new temporary, lest the formal temporary
++	     be reused out from under us.  */
++	  real_temp = get_initialized_tmp_var (real_part, pre_p, NULL);
++
++	  imag_part = sw_64_gimplify_va_arg_1 (ptr_type, base, offset, pre_p);
++	  imag_part = build_va_arg_indirect_ref (imag_part);
++
++	  r = build2 (COMPLEX_EXPR, type, real_temp, imag_part);
++
++	  /* Stuff the offset temporary back into its field.  */
++	  gimplify_assign (unshare_expr (offset_field),
++			   fold_convert (TREE_TYPE (offset_field), offset),
++			   pre_p);
++	  return r;
++	}
++      else
++	type = build_pointer_type_for_mode (type, ptr_mode, true);
++    }
++
++  /* Find the value.  Note that this will be a stable indirection, or
++     a composite of stable indirections in the case of complex.  */
++  r = sw_64_gimplify_va_arg_1 (type, base, offset, pre_p);
++
++  /* Stuff the offset temporary back into its field.  */
++  gimplify_assign (unshare_expr (offset_field),
++		   fold_convert (TREE_TYPE (offset_field), offset), pre_p);
++
++  if (indirect)
++    r = build_va_arg_indirect_ref (r);
++
++  return r;
++}
++
++/* Builtins.  */
++
++enum sw_64_builtin
++{
++  SW_64_BUILTIN_CMPBGE,
++  SW_64_BUILTIN_EXTBL,
++  SW_64_BUILTIN_EXTWL,
++  SW_64_BUILTIN_EXTLL,
++  SW_64_BUILTIN_EXTQL,
++  SW_64_BUILTIN_EXTWH,
++  SW_64_BUILTIN_EXTLH,
++  SW_64_BUILTIN_EXTQH,
++  SW_64_BUILTIN_INSBL,
++  SW_64_BUILTIN_INSWL,
++  SW_64_BUILTIN_INSLL,
++  SW_64_BUILTIN_INSQL,
++  SW_64_BUILTIN_INSWH,
++  SW_64_BUILTIN_INSLH,
++  SW_64_BUILTIN_INSQH,
++  SW_64_BUILTIN_MSKBL,
++  SW_64_BUILTIN_MSKWL,
++  SW_64_BUILTIN_MSKLL,
++  SW_64_BUILTIN_MSKQL,
++  SW_64_BUILTIN_MSKWH,
++  SW_64_BUILTIN_MSKLH,
++  SW_64_BUILTIN_MSKQH,
++  SW_64_BUILTIN_UMULH,
++  SW_64_BUILTIN_ZAP,
++  SW_64_BUILTIN_ZAPNOT,
++  SW_64_BUILTIN_AMASK,
++  SW_64_BUILTIN_IMPLVER,
++  SW_64_BUILTIN_RPCC,
++
++  /* TARGET_MAX.  */
++  SW_64_BUILTIN_MINUB8,
++  SW_64_BUILTIN_MINSB8,
++  SW_64_BUILTIN_MINUW4,
++  SW_64_BUILTIN_MINSW4,
++  SW_64_BUILTIN_MAXUB8,
++  SW_64_BUILTIN_MAXSB8,
++  SW_64_BUILTIN_MAXUW4,
++  SW_64_BUILTIN_MAXSW4,
++  SW_64_BUILTIN_PERR,
++  SW_64_BUILTIN_PKLB,
++  SW_64_BUILTIN_PKWB,
++  SW_64_BUILTIN_UNPKBL,
++  SW_64_BUILTIN_UNPKBW,
++
++  /* TARGET_CIX.  */
++  SW_64_BUILTIN_CTTZ,
++  SW_64_BUILTIN_CTLZ,
++  SW_64_BUILTIN_CTPOP,
++  SW_64_BUILTIN_SBT,
++  SW_64_BUILTIN_CBT,
++
++  SW_64_BUILTIN_max
++};
++
++static enum insn_code const code_for_builtin[SW_64_BUILTIN_max]
++  = {CODE_FOR_builtin_cmpbge, CODE_FOR_extbl, CODE_FOR_extwl, CODE_FOR_extll,
++     CODE_FOR_extql, CODE_FOR_extwh, CODE_FOR_extlh, CODE_FOR_extqh,
++     CODE_FOR_builtin_insbl, CODE_FOR_builtin_inswl, CODE_FOR_builtin_insll,
++     CODE_FOR_insql, CODE_FOR_inswh, CODE_FOR_inslh, CODE_FOR_insqh,
++     CODE_FOR_mskbl, CODE_FOR_mskwl, CODE_FOR_mskll, CODE_FOR_mskql,
++     CODE_FOR_mskwh, CODE_FOR_msklh, CODE_FOR_mskqh, CODE_FOR_umuldi3_highpart,
++     CODE_FOR_builtin_zap, CODE_FOR_builtin_zapnot, CODE_FOR_builtin_amask,
++     CODE_FOR_builtin_implver, CODE_FOR_builtin_rpcc,
++
++
++     /* TARGET_MAX */
++     CODE_FOR_builtin_minub8, CODE_FOR_builtin_minsb8, CODE_FOR_builtin_minuw4,
++     CODE_FOR_builtin_minsw4, CODE_FOR_builtin_maxub8, CODE_FOR_builtin_maxsb8,
++     CODE_FOR_builtin_maxuw4, CODE_FOR_builtin_maxsw4, CODE_FOR_builtin_perr,
++     CODE_FOR_builtin_pklb, CODE_FOR_builtin_pkwb, CODE_FOR_builtin_unpkbl,
++     CODE_FOR_builtin_unpkbw,
++
++     /* TARGET_CIX */
++     CODE_FOR_ctzdi2, CODE_FOR_clzdi2, CODE_FOR_popcountdi2,
++
++     CODE_FOR_builtin_sbt, CODE_FOR_builtin_cbt};
++
++struct sw_64_builtin_def
++{
++  const char *name;
++  enum sw_64_builtin code;
++  unsigned int target_mask;
++  bool is_const;
++};
++
++static struct sw_64_builtin_def const zero_arg_builtins[]
++  = {{"__builtin_sw_64_implver", SW_64_BUILTIN_IMPLVER, 0, true},
++     {"__builtin_sw_64_rpcc", SW_64_BUILTIN_RPCC, 0, false}};
++
++static struct sw_64_builtin_def const one_arg_builtins[]
++  = {{"__builtin_sw_64_amask", SW_64_BUILTIN_AMASK, 0, true},
++     {"__builtin_sw_64_pklb", SW_64_BUILTIN_PKLB, MASK_MAX, true},
++     {"__builtin_sw_64_pkwb", SW_64_BUILTIN_PKWB, MASK_MAX, true},
++     {"__builtin_sw_64_unpkbl", SW_64_BUILTIN_UNPKBL, MASK_MAX, true},
++     {"__builtin_sw_64_unpkbw", SW_64_BUILTIN_UNPKBW, MASK_MAX, true},
++     {"__builtin_sw_64_cttz", SW_64_BUILTIN_CTTZ, MASK_CIX, true},
++     {"__builtin_sw_64_ctlz", SW_64_BUILTIN_CTLZ, MASK_CIX, true},
++     {"__builtin_sw_64_ctpop", SW_64_BUILTIN_CTPOP, MASK_CIX, true}};
++
++static struct sw_64_builtin_def const two_arg_builtins[]
++  = {{"__builtin_sw_64_cmpbge", SW_64_BUILTIN_CMPBGE, 0, true},
++     {"__builtin_sw_64_extbl", SW_64_BUILTIN_EXTBL, 0, true},
++     {"__builtin_sw_64_extwl", SW_64_BUILTIN_EXTWL, 0, true},
++     {"__builtin_sw_64_extll", SW_64_BUILTIN_EXTLL, 0, true},
++     {"__builtin_sw_64_extql", SW_64_BUILTIN_EXTQL, 0, true},
++     {"__builtin_sw_64_extwh", SW_64_BUILTIN_EXTWH, 0, true},
++     {"__builtin_sw_64_extlh", SW_64_BUILTIN_EXTLH, 0, true},
++     {"__builtin_sw_64_extqh", SW_64_BUILTIN_EXTQH, 0, true},
++     {"__builtin_sw_64_insbl", SW_64_BUILTIN_INSBL, 0, true},
++     {"__builtin_sw_64_inswl", SW_64_BUILTIN_INSWL, 0, true},
++     {"__builtin_sw_64_insll", SW_64_BUILTIN_INSLL, 0, true},
++     {"__builtin_sw_64_insql", SW_64_BUILTIN_INSQL, 0, true},
++     {"__builtin_sw_64_inswh", SW_64_BUILTIN_INSWH, 0, true},
++     {"__builtin_sw_64_inslh", SW_64_BUILTIN_INSLH, 0, true},
++     {"__builtin_sw_64_insqh", SW_64_BUILTIN_INSQH, 0, true},
++     {"__builtin_sw_64_mskbl", SW_64_BUILTIN_MSKBL, 0, true},
++     {"__builtin_sw_64_mskwl", SW_64_BUILTIN_MSKWL, 0, true},
++     {"__builtin_sw_64_mskll", SW_64_BUILTIN_MSKLL, 0, true},
++     {"__builtin_sw_64_mskql", SW_64_BUILTIN_MSKQL, 0, true},
++     {"__builtin_sw_64_mskwh", SW_64_BUILTIN_MSKWH, 0, true},
++     {"__builtin_sw_64_msklh", SW_64_BUILTIN_MSKLH, 0, true},
++     {"__builtin_sw_64_mskqh", SW_64_BUILTIN_MSKQH, 0, true},
++     {"__builtin_sw_64_umulh", SW_64_BUILTIN_UMULH, 0, true},
++     {"__builtin_sw_64_zap", SW_64_BUILTIN_ZAP, 0, true},
++     {"__builtin_sw_64_zapnot", SW_64_BUILTIN_ZAPNOT, 0, true},
++     {"__builtin_sw_64_minub8", SW_64_BUILTIN_MINUB8, MASK_MAX, true},
++     {"__builtin_sw_64_minsb8", SW_64_BUILTIN_MINSB8, MASK_MAX, true},
++     {"__builtin_sw_64_minuw4", SW_64_BUILTIN_MINUW4, MASK_MAX, true},
++     {"__builtin_sw_64_minsw4", SW_64_BUILTIN_MINSW4, MASK_MAX, true},
++     {"__builtin_sw_64_maxub8", SW_64_BUILTIN_MAXUB8, MASK_MAX, true},
++     {"__builtin_sw_64_maxsb8", SW_64_BUILTIN_MAXSB8, MASK_MAX, true},
++     {"__builtin_sw_64_maxuw4", SW_64_BUILTIN_MAXUW4, MASK_MAX, true},
++     {"__builtin_sw_64_maxsw4", SW_64_BUILTIN_MAXSW4, MASK_MAX, true},
++     {"__builtin_sw_64_perr", SW_64_BUILTIN_PERR, MASK_MAX, true},
++     {"__builtin_sw_64_sbt", SW_64_BUILTIN_SBT, MASK_SW8A, true},
++     {"__builtin_sw_64_cbt", SW_64_BUILTIN_CBT, MASK_SW8A, true}};
++
++static GTY (()) tree sw_64_dimode_u;
++static GTY (()) tree sw_64_v8qi_u;
++static GTY (()) tree sw_64_v8qi_s;
++static GTY (()) tree sw_64_v4hi_u;
++static GTY (()) tree sw_64_v4hi_s;
++
++static GTY (()) tree sw_64_builtins[(int) SW_64_BUILTIN_max];
++
++/* Return the sw_64 builtin for CODE.  */
++
++static tree
++sw_64_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
++{
++  if (code >= SW_64_BUILTIN_max)
++    return error_mark_node;
++  return sw_64_builtins[code];
++}
++
++/* Helper function of sw_64_init_builtins.  Add the built-in specified
++   by NAME, TYPE, CODE, and ECF.  */
++
++static void
++sw_64_builtin_function (const char *name, tree ftype, enum sw_64_builtin code,
++			unsigned ecf)
++{
++  tree decl = add_builtin_function (name, ftype, (int) code, BUILT_IN_MD, NULL,
++				    NULL_TREE);
++
++  if (ecf & ECF_CONST)
++    TREE_READONLY (decl) = 1;
++  if (ecf & ECF_NOTHROW)
++    TREE_NOTHROW (decl) = 1;
++
++  sw_64_builtins[(int) code] = decl;
++}
++
++/* Helper function of sw_64_init_builtins.  Add the COUNT built-in
++   functions pointed to by P, with function type FTYPE.  */
++
++static void
++sw_64_add_builtins (const struct sw_64_builtin_def *p, size_t count, tree ftype)
++{
++  size_t i;
++
++  for (i = 0; i < count; ++i, ++p)
++    if ((target_flags & p->target_mask) == p->target_mask)
++      sw_64_builtin_function (p->name, ftype, p->code,
++			      (p->is_const ? ECF_CONST : 0) | ECF_NOTHROW);
++}
++
++static void
++sw_64_init_builtins (void)
++{
++  tree ftype;
++
++  sw_64_dimode_u = lang_hooks.types.type_for_mode (DImode, 1);
++  sw_64_v8qi_u = build_vector_type (unsigned_intQI_type_node, 8);
++  sw_64_v8qi_s = build_vector_type (intQI_type_node, 8);
++  sw_64_v4hi_u = build_vector_type (unsigned_intHI_type_node, 4);
++  sw_64_v4hi_s = build_vector_type (intHI_type_node, 4);
++
++  ftype = build_function_type_list (sw_64_dimode_u, NULL_TREE);
++  sw_64_add_builtins (zero_arg_builtins, ARRAY_SIZE (zero_arg_builtins), ftype);
++
++  ftype = build_function_type_list (sw_64_dimode_u, sw_64_dimode_u, NULL_TREE);
++  sw_64_add_builtins (one_arg_builtins, ARRAY_SIZE (one_arg_builtins), ftype);
++
++  ftype = build_function_type_list (sw_64_dimode_u, sw_64_dimode_u,
++				    sw_64_dimode_u, NULL_TREE);
++  sw_64_add_builtins (two_arg_builtins, ARRAY_SIZE (two_arg_builtins), ftype);
++}
++
++/* Expand an expression EXP that calls a built-in function,
++   with result going to TARGET if that's convenient
++   (and in mode MODE if that's convenient).
++   SUBTARGET may be used as the target for computing one of EXP's operands.
++   IGNORE is nonzero if the value is to be ignored.  */
++
++static rtx
++sw_64_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
++		      machine_mode mode ATTRIBUTE_UNUSED,
++		      int ignore ATTRIBUTE_UNUSED)
++{
++#define MAX_ARGS 2
++
++  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
++  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
++  tree arg;
++  call_expr_arg_iterator iter;
++  enum insn_code icode;
++  rtx op[MAX_ARGS], pat;
++  int arity;
++  bool nonvoid;
++
++  if (fcode >= SW_64_BUILTIN_max)
++    internal_error ("bad builtin fcode");
++  icode = code_for_builtin[fcode];
++  if (icode == 0)
++    internal_error ("bad builtin fcode");
++
++  nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
++
++  arity = 0;
++  FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
++    {
++      const struct insn_operand_data *insn_op;
++
++      if (arg == error_mark_node)
++	return NULL_RTX;
++      if (arity > MAX_ARGS)
++	return NULL_RTX;
++
++      insn_op = &insn_data[icode].operand[arity + nonvoid];
++
++      op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
++
++      if (!(*insn_op->predicate) (op[arity], insn_op->mode))
++	op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
++      arity++;
++    }
++
++  if (nonvoid)
++    {
++      machine_mode tmode = insn_data[icode].operand[0].mode;
++      if (!target || GET_MODE (target) != tmode
++	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
++	target = gen_reg_rtx (tmode);
++    }
++
++  switch (arity)
++    {
++    case 0:
++      pat = GEN_FCN (icode) (target);
++      break;
++    case 1:
++      if (nonvoid)
++	pat = GEN_FCN (icode) (target, op[0]);
++      else
++	pat = GEN_FCN (icode) (op[0]);
++      break;
++    case 2:
++      pat = GEN_FCN (icode) (target, op[0], op[1]);
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  if (!pat)
++    return NULL_RTX;
++  emit_insn (pat);
++
++  if (nonvoid)
++    return target;
++  else
++    return const0_rtx;
++}
++
++/* Fold the builtin for the CMPBGE instruction.  This is a vector comparison
++   with an 8-bit output vector.  OPINT contains the integer operands; bit N
++   of OP_CONST is set if OPINT[N] is valid.  */
++
++static tree
++sw_64_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  if (op_const == 3)
++    {
++      int i, val;
++      for (i = 0, val = 0; i < 8; ++i)
++	{
++	  unsigned HOST_WIDE_INT c0 = (opint[0] >> (i * 8)) & 0xff;
++	  unsigned HOST_WIDE_INT c1 = (opint[1] >> (i * 8)) & 0xff;
++	  if (c0 >= c1)
++	    val |= 1 << i;
++	}
++      return build_int_cst (sw_64_dimode_u, val);
++    }
++  else if (op_const == 2 && opint[1] == 0)
++    return build_int_cst (sw_64_dimode_u, 0xff);
++  return NULL;
++}
++
++/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
++   specialized form of an AND operation.  Other byte manipulation instructions
++   are defined in terms of this instruction, so this is also used as a
++   subroutine for other builtins.
++
++   OP contains the tree operands; OPINT contains the extracted integer values.
++   Bit N of OP_CONST it set if OPINT[N] is valid.  OP may be null if only
++   OPINT may be considered.  */
++
++static tree
++sw_64_fold_builtin_zapnot (tree *op, unsigned HOST_WIDE_INT opint[],
++			   long op_const)
++{
++  if (op_const & 2)
++    {
++      unsigned HOST_WIDE_INT mask = 0;
++      int i;
++
++      for (i = 0; i < 8; ++i)
++	if ((opint[1] >> i) & 1)
++	  mask |= (unsigned HOST_WIDE_INT) 0xff << (i * 8);
++
++      if (op_const & 1)
++	return build_int_cst (sw_64_dimode_u, opint[0] & mask);
++
++      if (op)
++	return fold_build2 (BIT_AND_EXPR, sw_64_dimode_u, op[0],
++			    build_int_cst (sw_64_dimode_u, mask));
++    }
++  else if ((op_const & 1) && opint[0] == 0)
++    return build_int_cst (sw_64_dimode_u, 0);
++  return NULL;
++}
++
++/* Fold the builtins for the EXT family of instructions.  */
++
++static tree
++sw_64_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
++			  long op_const, unsigned HOST_WIDE_INT bytemask,
++			  bool is_high)
++{
++  long zap_const = 2;
++  tree *zap_op = NULL;
++
++  if (op_const & 2)
++    {
++      unsigned HOST_WIDE_INT loc;
++
++      loc = opint[1] & 7;
++      loc *= BITS_PER_UNIT;
++
++      if (loc != 0)
++	{
++	  if (op_const & 1)
++	    {
++	      unsigned HOST_WIDE_INT temp = opint[0];
++	      if (is_high)
++		temp <<= loc;
++	      else
++		temp >>= loc;
++	      opint[0] = temp;
++	      zap_const = 3;
++	    }
++	}
++      else
++	zap_op = op;
++    }
++
++  opint[1] = bytemask;
++  return sw_64_fold_builtin_zapnot (zap_op, opint, zap_const);
++}
++
++/* Fold the builtins for the INS family of instructions.  */
++
++static tree
++sw_64_fold_builtin_insxx (tree op[], unsigned HOST_WIDE_INT opint[],
++			  long op_const, unsigned HOST_WIDE_INT bytemask,
++			  bool is_high)
++{
++  if ((op_const & 1) && opint[0] == 0)
++    return build_int_cst (sw_64_dimode_u, 0);
++
++  if (op_const & 2)
++    {
++      unsigned HOST_WIDE_INT temp, loc, byteloc;
++      tree *zap_op = NULL;
++
++      loc = opint[1] & 7;
++      bytemask <<= loc;
++
++      temp = opint[0];
++      if (is_high)
++	{
++	  byteloc = (64 - (loc * 8)) & 0x3f;
++	  if (byteloc == 0)
++	    zap_op = op;
++	  else
++	    temp >>= byteloc;
++	  bytemask >>= 8;
++	}
++      else
++	{
++	  byteloc = loc * 8;
++	  if (byteloc == 0)
++	    zap_op = op;
++	  else
++	    temp <<= byteloc;
++	}
++
++      opint[0] = temp;
++      opint[1] = bytemask;
++      return sw_64_fold_builtin_zapnot (zap_op, opint, op_const);
++    }
++
++  return NULL;
++}
++
++static tree
++sw_64_fold_builtin_mskxx (tree op[], unsigned HOST_WIDE_INT opint[],
++			  long op_const, unsigned HOST_WIDE_INT bytemask,
++			  bool is_high)
++{
++  if (op_const & 2)
++    {
++      unsigned HOST_WIDE_INT loc;
++
++      loc = opint[1] & 7;
++      bytemask <<= loc;
++
++      if (is_high)
++	bytemask >>= 8;
++
++      opint[1] = bytemask ^ 0xff;
++    }
++
++  return sw_64_fold_builtin_zapnot (op, opint, op_const);
++}
++
++static tree
++sw_64_fold_vector_minmax (enum tree_code code, tree op[], tree vtype)
++{
++  tree op0 = fold_convert (vtype, op[0]);
++  tree op1 = fold_convert (vtype, op[1]);
++  tree val = fold_build2 (code, vtype, op0, op1);
++  return fold_build1 (VIEW_CONVERT_EXPR, sw_64_dimode_u, val);
++}
++
++static tree
++sw_64_fold_builtin_perr (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp = 0;
++  int i;
++
++  if (op_const != 3)
++    return NULL;
++
++  for (i = 0; i < 8; ++i)
++    {
++      unsigned HOST_WIDE_INT a = (opint[0] >> (i * 8)) & 0xff;
++      unsigned HOST_WIDE_INT b = (opint[1] >> (i * 8)) & 0xff;
++      if (a >= b)
++	temp += a - b;
++      else
++	temp += b - a;
++    }
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_pklb (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp;
++
++  if (op_const == 0)
++    return NULL;
++
++  temp = opint[0] & 0xff;
++  temp |= (opint[0] >> 24) & 0xff00;
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_pkwb (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp;
++
++  if (op_const == 0)
++    return NULL;
++
++  temp = opint[0] & 0xff;
++  temp |= (opint[0] >> 8) & 0xff00;
++  temp |= (opint[0] >> 16) & 0xff0000;
++  temp |= (opint[0] >> 24) & 0xff000000;
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_unpkbl (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp;
++
++  if (op_const == 0)
++    return NULL;
++
++  temp = opint[0] & 0xff;
++  temp |= (opint[0] & 0xff00) << 24;
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_unpkbw (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp;
++
++  if (op_const == 0)
++    return NULL;
++
++  temp = opint[0] & 0xff;
++  temp |= (opint[0] & 0x0000ff00) << 8;
++  temp |= (opint[0] & 0x00ff0000) << 16;
++  temp |= (opint[0] & 0xff000000) << 24;
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_cttz (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp;
++
++  if (op_const == 0)
++    return NULL;
++
++  if (opint[0] == 0)
++    temp = 64;
++  else
++    temp = exact_log2 (opint[0] & -opint[0]);
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_ctlz (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp;
++
++  if (op_const == 0)
++    return NULL;
++
++  if (opint[0] == 0)
++    temp = 64;
++  else
++    temp = 64 - floor_log2 (opint[0]) - 1;
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_fold_builtin_ctpop (unsigned HOST_WIDE_INT opint[], long op_const)
++{
++  unsigned HOST_WIDE_INT temp, op;
++
++  if (op_const == 0)
++    return NULL;
++
++  op = opint[0];
++  temp = 0;
++  while (op)
++    temp++, op &= op - 1;
++
++  return build_int_cst (sw_64_dimode_u, temp);
++}
++
++static tree
++sw_64_builtin_sbt (int n_args, tree *op, unsigned HOST_WIDE_INT opint[],
++		   long op_const)
++{
++  int i;
++  if (op_const == 0)
++    return NULL;
++
++  if (TREE_CODE (op[0]) == INTEGER_CST)
++    {
++      error ("The first parameter cannot be a constant!");
++      gcc_unreachable ();
++    }
++
++  if ((opint[1] >> 63) & 0x1 & (warning_sbt_num == 1))
++    warning (0, "The second parameter is negative [enabled by default]");
++
++  warning_sbt_num++;
++  return NULL;
++}
++
++static tree
++sw_64_builtin_cbt (int n_args, tree *op, unsigned HOST_WIDE_INT opint[],
++		   long op_const)
++{
++  int i;
++  if (op_const == 0)
++    return NULL;
++
++  if (TREE_CODE (op[0]) == INTEGER_CST)
++    {
++      error ("The first parameter cannot be a constant!");
++      gcc_unreachable ();
++    }
++
++  if ((opint[1] >> 63) & 0x1 & (warning_cbt_num == 1))
++    warning (0, "The second parameter is negative [enabled by default]");
++
++  warning_cbt_num++;
++  return NULL;
++}
++
++/* Fold one of our builtin functions.  */
++
++static tree
++sw_64_fold_builtin (tree fndecl, int n_args, tree *op,
++		    bool ignore ATTRIBUTE_UNUSED)
++{
++  unsigned HOST_WIDE_INT opint[MAX_ARGS];
++  long op_const = 0;
++  int i;
++
++  if (n_args > MAX_ARGS)
++    return NULL;
++
++  for (i = 0; i < n_args; i++)
++    {
++      tree arg = op[i];
++      if (arg == error_mark_node)
++	return NULL;
++
++      opint[i] = 0;
++      if (TREE_CODE (arg) == INTEGER_CST)
++	{
++	  op_const |= 1L << i;
++	  opint[i] = int_cst_value (arg);
++	}
++    }
++
++  switch (DECL_MD_FUNCTION_CODE (fndecl))
++    {
++    case SW_64_BUILTIN_CMPBGE:
++      return sw_64_fold_builtin_cmpbge (opint, op_const);
++
++    case SW_64_BUILTIN_EXTBL:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x01, false);
++    case SW_64_BUILTIN_EXTWL:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x03, false);
++    case SW_64_BUILTIN_EXTLL:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x0f, false);
++    case SW_64_BUILTIN_EXTQL:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0xff, false);
++    case SW_64_BUILTIN_EXTWH:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x03, true);
++    case SW_64_BUILTIN_EXTLH:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0x0f, true);
++    case SW_64_BUILTIN_EXTQH:
++      return sw_64_fold_builtin_extxx (op, opint, op_const, 0xff, true);
++
++    case SW_64_BUILTIN_INSBL:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x01, false);
++    case SW_64_BUILTIN_INSWL:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x03, false);
++    case SW_64_BUILTIN_INSLL:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x0f, false);
++    case SW_64_BUILTIN_INSQL:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0xff, false);
++    case SW_64_BUILTIN_INSWH:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x03, true);
++    case SW_64_BUILTIN_INSLH:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0x0f, true);
++    case SW_64_BUILTIN_INSQH:
++      return sw_64_fold_builtin_insxx (op, opint, op_const, 0xff, true);
++
++    case SW_64_BUILTIN_MSKBL:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x01, false);
++    case SW_64_BUILTIN_MSKWL:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x03, false);
++    case SW_64_BUILTIN_MSKLL:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x0f, false);
++    case SW_64_BUILTIN_MSKQL:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0xff, false);
++    case SW_64_BUILTIN_MSKWH:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x03, true);
++    case SW_64_BUILTIN_MSKLH:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0x0f, true);
++    case SW_64_BUILTIN_MSKQH:
++      return sw_64_fold_builtin_mskxx (op, opint, op_const, 0xff, true);
++
++    case SW_64_BUILTIN_ZAP:
++      opint[1] ^= 0xff;
++      /* FALLTHRU */
++    case SW_64_BUILTIN_ZAPNOT:
++      return sw_64_fold_builtin_zapnot (op, opint, op_const);
++
++    case SW_64_BUILTIN_MINUB8:
++      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v8qi_u);
++    case SW_64_BUILTIN_MINSB8:
++      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v8qi_s);
++    case SW_64_BUILTIN_MINUW4:
++      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v4hi_u);
++    case SW_64_BUILTIN_MINSW4:
++      return sw_64_fold_vector_minmax (MIN_EXPR, op, sw_64_v4hi_s);
++    case SW_64_BUILTIN_MAXUB8:
++      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v8qi_u);
++    case SW_64_BUILTIN_MAXSB8:
++      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v8qi_s);
++    case SW_64_BUILTIN_MAXUW4:
++      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v4hi_u);
++    case SW_64_BUILTIN_MAXSW4:
++      return sw_64_fold_vector_minmax (MAX_EXPR, op, sw_64_v4hi_s);
++
++    case SW_64_BUILTIN_PERR:
++      return sw_64_fold_builtin_perr (opint, op_const);
++    case SW_64_BUILTIN_PKLB:
++      return sw_64_fold_builtin_pklb (opint, op_const);
++    case SW_64_BUILTIN_PKWB:
++      return sw_64_fold_builtin_pkwb (opint, op_const);
++    case SW_64_BUILTIN_UNPKBL:
++      return sw_64_fold_builtin_unpkbl (opint, op_const);
++    case SW_64_BUILTIN_UNPKBW:
++      return sw_64_fold_builtin_unpkbw (opint, op_const);
++
++    case SW_64_BUILTIN_CTTZ:
++      return sw_64_fold_builtin_cttz (opint, op_const);
++    case SW_64_BUILTIN_CTLZ:
++      return sw_64_fold_builtin_ctlz (opint, op_const);
++    case SW_64_BUILTIN_CTPOP:
++      return sw_64_fold_builtin_ctpop (opint, op_const);
++    case SW_64_BUILTIN_SBT:
++      return sw_64_builtin_sbt (n_args, op, opint, op_const);
++    case SW_64_BUILTIN_CBT:
++      return sw_64_builtin_cbt (n_args, op, opint, op_const);
++    case SW_64_BUILTIN_AMASK:
++    case SW_64_BUILTIN_IMPLVER:
++    case SW_64_BUILTIN_RPCC:
++      /* None of these are foldable at compile-time.  */
++    default:
++      return NULL;
++    }
++}
++
++bool
++sw_64_gimple_fold_builtin (gimple_stmt_iterator *gsi)
++{
++  bool changed = false;
++  gimple *stmt = gsi_stmt (*gsi);
++  tree call = gimple_call_fn (stmt);
++  gimple *new_stmt = NULL;
++
++  if (call)
++    {
++      tree fndecl = gimple_call_fndecl (stmt);
++
++      if (fndecl)
++	{
++	  tree arg0, arg1;
++
++	  switch (DECL_MD_FUNCTION_CODE (fndecl))
++	    {
++	    case SW_64_BUILTIN_UMULH:
++	      arg0 = gimple_call_arg (stmt, 0);
++	      arg1 = gimple_call_arg (stmt, 1);
++
++	      new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
++					      MULT_HIGHPART_EXPR, arg0, arg1);
++	      break;
++	    default:
++	      break;
++	    }
++	}
++    }
++
++  if (new_stmt)
++    {
++      gsi_replace (gsi, new_stmt, true);
++      changed = true;
++    }
++
++  return changed;
++}
++
++/* This page contains routines that are used to determine what the function
++   prologue and epilogue code will do and write them out.  */
++
++/* Compute the size of the save area in the stack.  */
++
++/* These variables are used for communication between the following functions.
++   They indicate various things about the current function being compiled
++   that are used to tell what kind of prologue, epilogue and procedure
++   descriptor to generate.  */
++
++/* Nonzero if we need a stack procedure.  */
++enum sw_64_procedure_types
++{
++  PT_NULL = 0,
++  PT_REGISTER = 1,
++  PT_STACK = 2
++};
++static enum sw_64_procedure_types sw_64_procedure_type;
++
++/* Compute register masks for saved registers, register save area size,
++   and total frame size.  */
++static void
++sw_64_compute_frame_layout (void)
++{
++  unsigned HOST_WIDE_INT sa_mask = 0;
++  HOST_WIDE_INT frame_size;
++  int sa_size;
++
++  /* When outputting a thunk, we don't have valid register life info,
++     but assemble_start_function wants to output .frame and .mask
++     directives.  */
++  if (!cfun->is_thunk)
++    {
++      /* One for every register we have to save.  */
++      for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++	if (!call_used_or_fixed_reg_p (i) && df_regs_ever_live_p (i)
++	    && i != REG_RA)
++	  sa_mask |= HOST_WIDE_INT_1U << i;
++
++      /* We need to restore these for the handler.  */
++      if (crtl->calls_eh_return)
++	{
++	  for (unsigned i = 0;; ++i)
++	    {
++	      unsigned regno = EH_RETURN_DATA_REGNO (i);
++	      if (regno == INVALID_REGNUM)
++		break;
++	      sa_mask |= HOST_WIDE_INT_1U << regno;
++	    }
++	}
++      /* If any register spilled, then spill the return address also.  */
++      /* ??? This is required by the Digital stack unwind specification
++	 and isn't needed if we're doing Dwarf2 unwinding.  */
++      if (sa_mask || sw_64_ra_ever_killed ())
++	sa_mask |= HOST_WIDE_INT_1U << REG_RA;
++    }
++  sa_size = popcount_hwi (sa_mask);
++  frame_size = get_frame_size ();
++
++      /* Our size must be even (multiple of 16 bytes).  */
++      if (sa_size & 1)
++	sa_size++;
++  sa_size *= 8;
++
++    frame_size = (SW_64_ROUND (crtl->outgoing_args_size) + sa_size
++		  + SW_64_ROUND (frame_size + crtl->args.pretend_args_size));
++
++  cfun->machine->sa_mask = sa_mask;
++  cfun->machine->sa_size = sa_size;
++  cfun->machine->frame_size = frame_size;
++}
++
++#undef TARGET_COMPUTE_FRAME_LAYOUT
++#define TARGET_COMPUTE_FRAME_LAYOUT sw_64_layout_frame
++
++/* Return 1 if this function can directly return via $26.  */
++
++bool
++direct_return (void)
++{
++  return (reload_completed && cfun->machine->frame_size == 0);
++}
++
++bool
++sw_64_find_lo_sum_using_gp (rtx insn)
++{
++  subrtx_iterator::array_type array;
++  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
++    {
++      const_rtx x = *iter;
++      if (GET_CODE (x) == LO_SUM && XEXP (x, 0) == pic_offset_table_rtx)
++	return true;
++    }
++  return false;
++}
++
++static int
++sw_64_does_function_need_gp (void)
++{
++  rtx_insn *insn;
++
++  /* We need the gp to load the address of __mcount.  */
++  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
++    return 1;
++
++  /* The code emitted by sw_64_output_mi_thunk_sysv uses the gp.  */
++  if (cfun->is_thunk)
++    return 1;
++
++  /* The nonlocal receiver pattern assumes that the gp is valid for
++     the nested function.  Reasonable because it's almost always set
++     correctly already.  For the cases where that's wrong, make sure
++     the nested function loads its gp on entry.  */
++  if (crtl->has_nonlocal_goto)
++    return 1;
++
++  /* If we need a GP (we have a LDSYM insn or a CALL_INSN), load it first.
++     Even if we are a static function, we still need to do this in case
++     our address is taken and passed to something like qsort.  */
++
++  push_topmost_sequence ();
++  insn = get_insns ();
++  pop_topmost_sequence ();
++
++  for (; insn; insn = NEXT_INSN (insn))
++    if (NONDEBUG_INSN_P (insn) && GET_CODE (PATTERN (insn)) != USE
++	&& GET_CODE (PATTERN (insn)) != CLOBBER && get_attr_usegp (insn))
++      return 1;
++
++  return 0;
++}
++
++/* Helper function to set RTX_FRAME_RELATED_P on instructions, including
++   sequences.  */
++
++static rtx_insn *
++set_frame_related_p (void)
++{
++  rtx_insn *seq = get_insns ();
++  rtx_insn *insn;
++
++  end_sequence ();
++
++  if (!seq)
++    return NULL;
++
++  if (INSN_P (seq))
++    {
++      insn = seq;
++      while (insn != NULL_RTX)
++	{
++	  RTX_FRAME_RELATED_P (insn) = 1;
++	  insn = NEXT_INSN (insn);
++	}
++      seq = emit_insn (seq);
++    }
++  else
++    {
++      seq = emit_insn (seq);
++      RTX_FRAME_RELATED_P (seq) = 1;
++    }
++  return seq;
++}
++
++#define FRP(exp) (start_sequence (), exp, set_frame_related_p ())
++
++/* Generates a store with the proper unwind info attached.  VALUE is
++   stored at BASE_REG+BASE_OFS.  If FRAME_BIAS is nonzero, then BASE_REG
++   contains SP+FRAME_BIAS, and that is the unwind info that should be
++   generated.  If FRAME_REG != VALUE, then VALUE is being stored on
++   behalf of FRAME_REG, and FRAME_REG should be present in the unwind.  */
++
++static void
++emit_frame_store_1 (rtx value, rtx base_reg, HOST_WIDE_INT frame_bias,
++		    HOST_WIDE_INT base_ofs, rtx frame_reg)
++{
++  rtx addr, mem;
++  rtx_insn *insn;
++
++  addr = plus_constant (Pmode, base_reg, base_ofs);
++  mem = gen_frame_mem (Pmode, addr);
++
++  insn = emit_move_insn (mem, value);
++  RTX_FRAME_RELATED_P (insn) = 1;
++
++  if (frame_bias || value != frame_reg)
++    {
++      if (frame_bias)
++	{
++	  addr
++	    = plus_constant (Pmode, stack_pointer_rtx, frame_bias + base_ofs);
++	  mem = gen_rtx_MEM (Pmode, addr);
++	}
++
++      add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_SET (mem, frame_reg));
++    }
++}
++
++static void
++emit_frame_store (unsigned int regno, rtx base_reg, HOST_WIDE_INT frame_bias,
++		  HOST_WIDE_INT base_ofs)
++{
++  rtx reg = gen_rtx_REG (DImode, regno);
++  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
++}
++
++static void
++emit_frame_store_32 (unsigned int regno, rtx base_reg, HOST_WIDE_INT frame_bias,
++		     HOST_WIDE_INT base_ofs)
++{
++  rtx reg = gen_rtx_REG (Pmode, regno);
++  emit_frame_store_1 (reg, base_reg, frame_bias, base_ofs, reg);
++}
++
++/* Write function prologue.  */
++static void
++sw64_add_cfa_expression (rtx_insn *insn, unsigned int reg, rtx base,
++			 poly_int64 offset)
++{
++  rtx mem = gen_frame_mem (DImode, plus_constant (Pmode, base, offset));
++  add_reg_note (insn, REG_CFA_EXPRESSION,
++		gen_rtx_SET (mem, regno_reg_rtx[reg]));
++}
++
++void
++sw_64_expand_prologue (void)
++{
++  /* Registers to save.  */
++  unsigned HOST_WIDE_INT sa_mask = cfun->machine->frame.sa_mask;
++  /* Stack space needed for pushing registers clobbered by us.  */
++  HOST_WIDE_INT sa_size = cfun->machine->frame.saved_regs_size;
++  /* Complete stack size needed.  */
++  HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
++  /* Probed stack size; it additionally includes the size of
++     the "reserve region" if any.  */
++  HOST_WIDE_INT probed_size, sa_bias;
++  /* Offset from base reg to register save area.  */
++  HOST_WIDE_INT reg_offset;
++  rtx sa_reg;
++  bool fp_flag = false;
++
++  if (flag_stack_usage_info)
++    current_function_static_stack_size = frame_size;
++
++#ifdef SW_64_ENABLE_FULL_ASAN
++    reg_offset = aligned_upper_bound (crtl->outgoing_args_size,
++				      STACK_BOUNDARY / BITS_PER_UNIT);
++#else
++    reg_offset = SW_64_ROUND (crtl->outgoing_args_size);
++#endif
++
++  /* Emit an insn to reload GP, if needed.  */
++      sw_64_function_needs_gp = sw_64_does_function_need_gp ();
++      if (sw_64_function_needs_gp)
++	{
++	  if (TARGET_SW_M32)
++	    emit_insn (gen_prologue_ldgp_32 ());
++	  else
++	    emit_insn (gen_prologue_ldgp ());
++	}
++
++  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
++      && (TARGET_SW_32ALIGN || TARGET_SW_SIMD))
++    {
++      rtx const16 = gen_rtx_REG (DImode, 7);
++      sw_64_emit_set_const (const16, DImode, 16, 3, false);
++      emit_insn (gen_anddi3 (const16, const16, stack_pointer_rtx));
++      emit_insn (gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, const16));
++
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-32)));
++      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (0));
++      rtx tmp7 = gen_rtx_MEM (Pmode, mem_address);
++      emit_move_insn (tmp7, gen_rtx_REG (DImode, 7));
++    }
++  /* TARGET_PROFILING_NEEDS_GP actually implies that we need to insert
++     the call to mcount ourselves, rather than having the linker do it
++     magically in response to -pg.  Since _mcount has special linkage,
++     don't represent the call as a call.  */
++  if (TARGET_PROFILING_NEEDS_GP && crtl->profile)
++    emit_insn (gen_prologue_mcount ());
++
++  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
++      && flag_sw_hardware_prefetch)
++    {
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256)));
++      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8));
++      rtx tmp16 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16));
++      rtx tmp17 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24));
++      rtx tmp18 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32));
++      rtx tmp19 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40));
++      rtx tmp26 = gen_rtx_MEM (Pmode, mem_address);
++
++      emit_move_insn (tmp16, gen_rtx_REG (DImode, 16));
++      emit_move_insn (tmp17, gen_rtx_REG (DImode, 17));
++      emit_move_insn (tmp18, gen_rtx_REG (DImode, 18));
++      emit_move_insn (tmp19, gen_rtx_REG (DImode, 19));
++      emit_move_insn (tmp26, gen_rtx_REG (DImode, 26));
++
++      rtx tmp_clt = gen_rtx_REG (DImode, 7);
++      rtx tmp_cnt = gen_rtx_REG (DImode, 8);
++      rtx op = gen_rtx_REG (DImode, 17);
++
++      unsigned long clt1, clt2, clt3;
++      unsigned long cnt1, cnt2, cnt3;
++      clt1 = flag_hardware_prefetch_clt % 2;
++      clt2 = (flag_hardware_prefetch_clt >> 1) % 2;
++      clt3 = (flag_hardware_prefetch_clt >> 2) % 2;
++      cnt1 = flag_hardware_prefetch_cnt_l1;
++      cnt2 = flag_hardware_prefetch_cnt_l2;
++      cnt3 = flag_hardware_prefetch_cnt_l3;
++      sw_64_emit_set_const (op, DImode, 0x10, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x11, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x12, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x1, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x4, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x8, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      emit_move_insn (gen_rtx_REG (DImode, 16), tmp16);
++      emit_move_insn (gen_rtx_REG (DImode, 17), tmp17);
++      emit_move_insn (gen_rtx_REG (DImode, 18), tmp18);
++      emit_move_insn (gen_rtx_REG (DImode, 19), tmp19);
++      emit_move_insn (gen_rtx_REG (DImode, 26), tmp26);
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256)));
++    }
++  if (strcmp ("exit", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0)
++    {
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256)));
++      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8));
++      rtx tmp16 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16));
++      rtx tmp17 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24));
++      rtx tmp18 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32));
++      rtx tmp19 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40));
++      rtx tmp26 = gen_rtx_MEM (Pmode, mem_address);
++
++      emit_move_insn (tmp16, gen_rtx_REG (DImode, 16));
++      emit_move_insn (tmp17, gen_rtx_REG (DImode, 17));
++      emit_move_insn (tmp18, gen_rtx_REG (DImode, 18));
++      emit_move_insn (tmp19, gen_rtx_REG (DImode, 19));
++      emit_move_insn (tmp26, gen_rtx_REG (DImode, 26));
++
++      rtx tmp_clt = gen_rtx_REG (DImode, 7);
++      rtx tmp_cnt = gen_rtx_REG (DImode, 8);
++      rtx op = gen_rtx_REG (DImode, 17);
++
++      unsigned long clt1, clt2, clt3;
++      unsigned long cnt1, cnt2, cnt3;
++      clt1 = 1;
++      clt2 = 0;
++      clt3 = 1;
++      cnt1 = 0;
++      cnt2 = 0;
++      cnt3 = 5;
++      sw_64_emit_set_const (op, DImode, 0x10, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x11, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x12, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x1, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x4, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x8, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      emit_move_insn (gen_rtx_REG (DImode, 16), tmp16);
++      emit_move_insn (gen_rtx_REG (DImode, 17), tmp17);
++      emit_move_insn (gen_rtx_REG (DImode, 18), tmp18);
++      emit_move_insn (gen_rtx_REG (DImode, 19), tmp19);
++      emit_move_insn (gen_rtx_REG (DImode, 26), tmp26);
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256)));
++    }
++
++  /* Adjust the stack by the frame size.  If the frame size is > 4096
++     bytes, we need to be sure we probe somewhere in the first and last
++     4096 bytes (we can probably get away without the latter test) and
++     every 8192 bytes in between.  If the frame size is > 32768, we
++     do this in a loop.  Otherwise, we generate the explicit probe
++     instructions.
++
++     Note that we are only allowed to adjust sp once in the prologue.  */
++
++  probed_size = frame_size;
++  if (flag_stack_check || flag_stack_clash_protection)
++    probed_size += get_stack_check_protect ();
++
++  if (probed_size <= 32768)
++    {
++      if (probed_size > 4096)
++	{
++	  int probed;
++
++	  for (probed = 4096; probed < probed_size; probed += 8192)
++	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed)));
++
++	  /* We only have to do this probe if we aren't saving registers or
++	     if we are probing beyond the frame because of -fstack-check.  */
++	  if ((sa_size == 0 && probed_size > probed - 4096) || flag_stack_check
++	      || flag_stack_clash_protection)
++	    emit_insn (gen_stack_probe_internal (GEN_INT (-probed_size)));
++	}
++
++      if (frame_size != 0)
++	{
++	  if (TARGET_SW_M32)
++	    FRP (emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
++					GEN_INT (-frame_size))));
++	  else
++	    FRP (emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx,
++					GEN_INT (-frame_size))));
++	}
++    }
++  else
++    {
++      /* Here we generate code to set R22 to SP + 4096 and set R23 to the
++	 number of 8192 byte blocks to probe.  We then probe each block
++	 in the loop and then set SP to the proper location.  If the
++	 amount remaining is > 4096, we have to do one more probe if we
++	 are not saving any registers or if we are probing beyond the
++	 frame because of -fstack-check.  */
++
++      HOST_WIDE_INT blocks = (probed_size + 4096) / 8192;
++      HOST_WIDE_INT leftover = probed_size + 4096 - blocks * 8192;
++      rtx ptr = gen_rtx_REG (DImode, 22);
++      rtx count = gen_rtx_REG (DImode, 23);
++      rtx seq;
++
++      emit_move_insn (count, GEN_INT (blocks));
++      emit_insn (gen_adddi3 (ptr, stack_pointer_rtx, GEN_INT (4096)));
++
++      /* Because of the difficulty in emitting a new basic block this
++	 late in the compilation, generate the loop as a single insn.  */
++      emit_insn (gen_prologue_stack_probe_loop (count, ptr));
++
++      if ((leftover > 4096 && sa_size == 0) || flag_stack_check
++	  || flag_stack_clash_protection)
++	{
++	  rtx last = gen_rtx_MEM (Pmode, plus_constant (Pmode, ptr, -leftover));
++	  MEM_VOLATILE_P (last) = 1;
++	  emit_move_insn (last, const0_rtx);
++	}
++
++      if (flag_stack_check || flag_stack_clash_protection)
++	{
++	  /* If -fstack-check is specified we have to load the entire
++	     constant into a register and subtract from the sp in one go,
++	     because the probed stack size is not equal to the frame size.  */
++	  HOST_WIDE_INT lo, hi;
++	  lo = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
++	  hi = frame_size - lo;
++
++	  emit_move_insn (ptr, GEN_INT (hi));
++	  emit_insn (gen_adddi3 (ptr, ptr, GEN_INT (lo)));
++	  seq = emit_insn (
++	    gen_subdi3 (stack_pointer_rtx, stack_pointer_rtx, ptr));
++	}
++      else
++	{
++	  seq = emit_insn (
++	    gen_adddi3 (stack_pointer_rtx, ptr, GEN_INT (-leftover)));
++	}
++
++      /* This alternative is special, because the DWARF code cannot
++	 possibly intuit through the loop above.  So we invent this
++	 note it looks at instead.  */
++      RTX_FRAME_RELATED_P (seq) = 1;
++      add_reg_note (seq, REG_FRAME_RELATED_EXPR,
++		    gen_rtx_SET (stack_pointer_rtx,
++				 plus_constant (Pmode, stack_pointer_rtx,
++						-frame_size)));
++    }
++
++  /* Cope with very large offsets to the register save area.  */
++  sa_bias = 0;
++  sa_reg = stack_pointer_rtx;
++  if (reg_offset + sa_size > 0x8000)
++    {
++      int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
++      rtx sa_bias_rtx;
++
++      if (low + sa_size <= 0x8000)
++	sa_bias = reg_offset - low, reg_offset = low;
++      else
++	sa_bias = reg_offset, reg_offset = 0;
++
++      sa_reg = gen_rtx_REG (DImode, 24);
++      sa_bias_rtx = GEN_INT (sa_bias);
++
++      if (add_operand (sa_bias_rtx, DImode))
++	emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_bias_rtx));
++      else
++	{
++	  emit_move_insn (sa_reg, sa_bias_rtx);
++	  emit_insn (gen_adddi3 (sa_reg, stack_pointer_rtx, sa_reg));
++	}
++    }
++
++  /* Save register RA next, followed by any other registers
++     that need to be saved.  */
++  for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi (sa_mask))
++    {
++      /* if we need a frame pointer, set it from the stack pointer.  */
++      if (frame_pointer_needed && i != REG_RA && fp_flag == false)
++	{
++	  if (TARGET_SW_M32)
++	    {
++	      emit_frame_store_32 (HARD_FRAME_POINTER_REGNUM, sa_reg, sa_bias,
++				   reg_offset);
++	    }
++	  else
++	    {
++	      emit_frame_store (HARD_FRAME_POINTER_REGNUM, sa_reg, sa_bias,
++				reg_offset);
++	      sa_mask &= ~(HOST_WIDE_INT_1U << HARD_FRAME_POINTER_REGNUM);
++	      reg_offset += 8;
++	      fp_flag = true;
++	    }
++	}
++      else
++	{
++	  if (TARGET_SW_M32)
++	    {
++	      emit_frame_store_32 (i, sa_reg, sa_bias, reg_offset);
++	    }
++	  else
++	    {
++	      emit_frame_store (i, sa_reg, sa_bias, reg_offset);
++	      reg_offset += 8;
++	      sa_mask &= ~(HOST_WIDE_INT_1U << i);
++	    }
++	}
++    }
++
++      /* If we need a frame pointer, set it from the stack pointer.  */
++      if (frame_pointer_needed)
++	{
++	  if (TARGET_CAN_FAULT_IN_PROLOGUE)
++	    {
++	      unsigned reg2 = 15; // FP
++	      unsigned reg1 = 26; // R26
++	      long adj_size = SW_64_ROUND (crtl->outgoing_args_size);
++	      if (adj_size > 0x8000)
++		{
++		  int low = ((adj_size & 0xffff) ^ 0x8000) - 0x8000;
++		  HOST_WIDE_INT bias;
++
++		  if (low <= 0x8000)
++		    bias = adj_size - low, adj_size = low;
++		  else
++		    bias = adj_size, adj_size = 0;
++
++		  rtx fp_move;
++		  rtx sa_reg_exp
++		    = plus_constant (Pmode, stack_pointer_rtx, bias);
++		  emit_move_insn (hard_frame_pointer_rtx, sa_reg_exp);
++		  if (adj_size != 0)
++		    fp_move
++		      = gen_adddi3 (hard_frame_pointer_rtx,
++				    hard_frame_pointer_rtx, GEN_INT (adj_size));
++
++		  if ((void *) fp_move == NULL)
++		    printf ("unable gen add3");
++		  emit_insn (fp_move);
++		}
++	      else
++		{
++		  rtx fp_move
++		    = gen_add3_insn (hard_frame_pointer_rtx, stack_pointer_rtx,
++				     GEN_INT (
++				       SW_64_ROUND (crtl->outgoing_args_size)));
++		  FRP (emit_insn (fp_move));
++		}
++	      rtx_insn *insn = get_last_insn ();
++	      if (!find_reg_note (insn, REG_CFA_ADJUST_CFA, NULL_RTX))
++		{
++		  rtx src
++		    = plus_constant (Pmode, stack_pointer_rtx,
++				     SW_64_ROUND (crtl->outgoing_args_size));
++		  add_reg_note (insn, REG_CFA_ADJUST_CFA,
++				gen_rtx_SET (hard_frame_pointer_rtx, src));
++		}
++
++	      emit_insn (
++		gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
++	    }
++	  else
++	    /* This must always be the last instruction in the
++	       prologue, thus we emit a special move + clobber.  */
++	    FRP (emit_insn (
++	      gen_init_fp (hard_frame_pointer_rtx, stack_pointer_rtx, sa_reg)));
++	}
++
++  /* The ABIs for VMS and OSF/1 say that while we can schedule insns into
++     the prologue, for exception handling reasons, we cannot do this for
++     any insn that might fault.  We could prevent this for mems with a
++     (clobber:BLK (scratch)), but this doesn't work for fp insns.  So we
++     have to prevent all such scheduling with a blockage.
++
++     Linux, on the other hand, never bothered to implement OSF/1's
++     exception handling, and so doesn't care about such things.  Anyone
++     planning to use dwarf2 frame-unwind info can also omit the blockage.  */
++
++  if (!TARGET_CAN_FAULT_IN_PROLOGUE)
++    emit_insn (gen_blockage ());
++}
++
++/* Count the number of .file directives, so that .loc is up to date.  */
++int num_source_filenames = 0;
++
++/* Output the textual info surrounding the prologue.  */
++
++void
++sw_64_start_function (FILE *file, const char *fnname,
++		      tree decl ATTRIBUTE_UNUSED)
++{
++  unsigned long imask, fmask;
++  /* Complete stack size needed.  */
++  HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
++  /* The maximum debuggable frame size.  */
++  const HOST_WIDE_INT max_frame_size = HOST_WIDE_INT_1 << 31;
++  /* Offset from base reg to register save area.  */
++  HOST_WIDE_INT reg_offset;
++  char *entry_label = (char *) alloca (strlen (fnname) + 6);
++  char *tramp_label = (char *) alloca (strlen (fnname) + 6);
++  int i;
++
++  sw_64_fnname = fnname;
++  const char *main = "main";
++  if (flag_fpcr_set == 4 && strcmp (fnname, main) == 0)
++    stfp3_flag = 1;
++  else
++    stfp3_flag = 0;
++
++    reg_offset = SW_64_ROUND (crtl->outgoing_args_size);
++
++  imask = cfun->machine->frame.sa_mask & 0xffffffffu;
++  fmask = cfun->machine->frame.sa_mask >> 32;
++  /* Issue function start and label.  */
++  if (!flag_inhibit_size_directive)
++    {
++      fputs ("\t.ent ", file);
++      assemble_name (file, fnname);
++      putc ('\n', file);
++
++      /* If the function needs GP, we'll write the "..ng" label there.
++	 Otherwise, do it here.  */
++      if (!sw_64_function_needs_gp && !cfun->is_thunk)
++	{
++	  putc ('$', file);
++	  assemble_name (file, fnname);
++	  fputs ("..ng:\n", file);
++	}
++    }
++  /* Nested functions on VMS that are potentially called via trampoline
++     get a special transfer entry point that loads the called functions
++     procedure descriptor and static chain.  */
++  strcpy (entry_label, fnname);
++
++  ASM_OUTPUT_LABEL (file, entry_label);
++  inside_function = TRUE;
++
++  if (TARGET_IEEE_CONFORMANT && !flag_inhibit_size_directive)
++    {
++      /* Set flags in procedure descriptor to request IEEE-conformant
++	 math-library routines.  The value we set it to is PDSC_EXC_IEEE
++	 (/usr/include/pdsc.h).  */
++      fputs ("\t.eflag 48\n", file);
++    }
++
++  /* Set up offsets to sw_64 virtual arg/local debugging pointer.  */
++  sw_64_auto_offset = -frame_size + cfun->machine->frame.saved_varargs_size
++		      + crtl->args.pretend_args_size;
++  sw_64_arg_offset = -frame_size + 48;
++
++  /* Describe our frame.  If the frame size is larger than an integer,
++     print it as zero to avoid an assembler error.  We won't be
++     properly describing such a frame, but that's the best we can do.  */
++  if (!flag_inhibit_size_directive)
++    fprintf (file, "\t.frame $%d," HOST_WIDE_INT_PRINT_DEC ",$26,%d\n",
++	     (frame_pointer_needed ? HARD_FRAME_POINTER_REGNUM
++				   : STACK_POINTER_REGNUM),
++	     frame_size >= max_frame_size ? 0 : frame_size,
++	     crtl->args.pretend_args_size);
++
++  /* Describe which registers were spilled.  */
++  if (!flag_inhibit_size_directive)
++    {
++      if (imask)
++	{
++	  fprintf (file, "\t.mask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", imask,
++		   frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
++
++	  for (i = 0; i < 32; ++i)
++	    if (imask & (1UL << i))
++	      reg_offset += 8;
++	}
++
++      if (fmask)
++	fprintf (file, "\t.fmask 0x%lx," HOST_WIDE_INT_PRINT_DEC "\n", fmask,
++		 frame_size >= max_frame_size ? 0 : reg_offset - frame_size);
++    }
++}
++
++/* Emit the .prologue note at the scheduled end of the prologue.  */
++
++static void
++sw_64_output_function_end_prologue (FILE *file)
++{
++  if (!flag_inhibit_size_directive)
++    fprintf (file, "\t.prologue %d\n",
++	     sw_64_function_needs_gp || cfun->is_thunk);
++}
++
++/* Write function epilogue.  */
++
++void
++sw_64_expand_epilogue (void)
++{
++  /* Registers to save.  */
++  unsigned HOST_WIDE_INT sa_mask = cfun->machine->frame.sa_mask;
++  /* Stack space needed for pushing registers clobbered by us.  */
++  HOST_WIDE_INT sa_size = cfun->machine->frame.saved_regs_size;
++  /* Complete stack size needed.  */
++  HOST_WIDE_INT frame_size = cfun->machine->frame.frame_size;
++  /* Offset from base reg to register save area.  */
++  HOST_WIDE_INT reg_offset;
++  int fp_is_frame_pointer, fp_offset;
++  rtx sa_reg, sa_reg_exp = NULL;
++  rtx sp_adj1, sp_adj2, mem, reg, insn;
++  rtx eh_ofs;
++  rtx cfa_restores = NULL_RTX;
++  bool fp_flag = false;
++
++#ifdef SW_64_ENABLE_FULL_ASAN
++    reg_offset = aligned_upper_bound (crtl->outgoing_args_size,
++				      STACK_BOUNDARY / BITS_PER_UNIT);
++#else
++    reg_offset = SW_64_ROUND (crtl->outgoing_args_size);
++#endif
++
++  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
++      && flag_sw_hardware_prefetch)
++    {
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-256)));
++      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (8));
++      rtx tmp16 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (16));
++      rtx tmp17 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (24));
++      rtx tmp18 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (32));
++      rtx tmp19 = gen_rtx_MEM (Pmode, mem_address);
++      mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (40));
++      rtx tmp26 = gen_rtx_MEM (Pmode, mem_address);
++
++      emit_move_insn (tmp16, gen_rtx_REG (DImode, 16));
++      emit_move_insn (tmp17, gen_rtx_REG (DImode, 17));
++      emit_move_insn (tmp18, gen_rtx_REG (DImode, 18));
++      emit_move_insn (tmp19, gen_rtx_REG (DImode, 19));
++      emit_move_insn (tmp26, gen_rtx_REG (DImode, 26));
++
++      rtx tmp_clt = gen_rtx_REG (DImode, 7);
++      rtx tmp_cnt = gen_rtx_REG (DImode, 8);
++      rtx op = gen_rtx_REG (DImode, 17);
++
++      unsigned long clt1, clt2, clt3;
++      unsigned long cnt1, cnt2, cnt3;
++      clt1 = 1;
++      clt2 = 0;
++      clt3 = 1;
++      cnt1 = 0;
++      cnt2 = 0;
++      cnt3 = 5;
++      sw_64_emit_set_const (op, DImode, 0x10, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt1, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x11, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt2, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x12, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, clt3, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x1, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt1, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x4, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt2, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      sw_64_emit_set_const (op, DImode, 0x8, 3, false);
++      sw_64_emit_set_const (tmp_clt, DImode, cnt3, 3, false);
++      emit_insn (gen_hardware_prefetch_use_syscall (tmp_clt, op));
++
++      emit_move_insn (gen_rtx_REG (DImode, 16), tmp16);
++      emit_move_insn (gen_rtx_REG (DImode, 17), tmp17);
++      emit_move_insn (gen_rtx_REG (DImode, 18), tmp18);
++      emit_move_insn (gen_rtx_REG (DImode, 19), tmp19);
++      emit_move_insn (gen_rtx_REG (DImode, 26), tmp26);
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (256)));
++    }
++
++  fp_is_frame_pointer = frame_pointer_needed;
++  fp_offset = 0;
++  sa_reg = stack_pointer_rtx;
++
++  if (crtl->calls_eh_return)
++    eh_ofs = EH_RETURN_STACKADJ_RTX;
++  else
++    eh_ofs = NULL_RTX;
++
++  if (sa_size)
++    {
++      /* If we have a frame pointer, restore SP from it.  */
++      if (frame_pointer_needed)
++	{
++	  long adj_size = SW_64_ROUND (crtl->outgoing_args_size);
++	  if (adj_size > 0x8000)
++	    {
++	      int low = ((adj_size & 0xffff) ^ 0x8000) - 0x8000;
++	      HOST_WIDE_INT bias;
++
++	      if (low <= 0x8000)
++		bias = adj_size - low, adj_size = low;
++	      else
++		bias = adj_size, adj_size = 0;
++
++	      rtx sa_reg = stack_pointer_rtx;
++	      rtx sa_reg_exp
++		= plus_constant (Pmode, hard_frame_pointer_rtx, -bias);
++	      emit_move_insn (sa_reg, sa_reg_exp);
++	      if (adj_size != 0)
++		emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
++					  GEN_INT (-adj_size)));
++	    }
++	  else
++	    {
++	      emit_insn (
++		gen_stack_tie (stack_pointer_rtx, hard_frame_pointer_rtx));
++	      rtx insn
++		= gen_add3_insn (stack_pointer_rtx, hard_frame_pointer_rtx,
++				 GEN_INT (
++				   -SW_64_ROUND (crtl->outgoing_args_size)));
++	      emit_insn (insn);
++	    }
++	}
++      //	emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx);
++
++      /* Cope with very large offsets to the register save area.  */
++      if (reg_offset + sa_size > 0x8000)
++	{
++	  int low = ((reg_offset & 0xffff) ^ 0x8000) - 0x8000;
++	  HOST_WIDE_INT bias;
++
++	  if (low + sa_size <= 0x8000)
++	    bias = reg_offset - low, reg_offset = low;
++	  else
++	    bias = reg_offset, reg_offset = 0;
++
++	  sa_reg = gen_rtx_REG (Pmode, 22);
++	  sa_reg_exp = plus_constant (Pmode, stack_pointer_rtx, bias);
++
++	  emit_move_insn (sa_reg, sa_reg_exp);
++	}
++
++      /* Restore registers in order, excepting a true frame pointer.  */
++      for (unsigned i = REG_RA; sa_mask != 0; i = ctz_hwi (sa_mask))
++	{
++	  if (fp_is_frame_pointer && i != REG_RA && fp_flag == false)
++	    {
++	      emit_insn (gen_blockage ());
++	      mem = gen_frame_mem (DImode,
++				   plus_constant (Pmode, sa_reg, reg_offset));
++	      emit_move_insn (hard_frame_pointer_rtx, mem);
++	      cfa_restores
++		= alloc_reg_note (REG_CFA_RESTORE, hard_frame_pointer_rtx,
++				  cfa_restores);
++	      sa_mask &= ~(1UL << HARD_FRAME_POINTER_REGNUM);
++	      reg_offset += 8;
++	      fp_offset = reg_offset;
++	      fp_flag = true;
++	    }
++	  else
++	    {
++	      mem = gen_frame_mem (Pmode,
++				   plus_constant (Pmode, sa_reg, reg_offset));
++	      reg = gen_rtx_REG (Pmode, i);
++	      emit_move_insn (reg, mem);
++	      cfa_restores
++		= alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
++	      reg_offset += 8;
++	      sa_mask &= ~(HOST_WIDE_INT_1U << i);
++	    }
++	}
++    }
++
++  if (frame_size || eh_ofs)
++    {
++      sp_adj1 = stack_pointer_rtx;
++
++      if (eh_ofs)
++	{
++	  sp_adj1 = gen_rtx_REG (Pmode, 23);
++	  emit_move_insn (sp_adj1,
++			  gen_rtx_PLUS (Pmode, stack_pointer_rtx, eh_ofs));
++	}
++
++      /* If the stack size is large, begin computation into a temporary
++	 register so as not to interfere with a potential fp restore,
++	 which must be consecutive with an SP restore.  */
++      if (frame_size < 32768 && !cfun->calls_alloca)
++	sp_adj2 = GEN_INT (frame_size);
++      else if (frame_size < 0x40007fffL)
++	{
++	  int low = ((frame_size & 0xffff) ^ 0x8000) - 0x8000;
++
++	  sp_adj2 = plus_constant (Pmode, sp_adj1, frame_size - low);
++	  if (sa_reg_exp && rtx_equal_p (sa_reg_exp, sp_adj2))
++	    sp_adj1 = sa_reg;
++	  else
++	    {
++	      sp_adj1 = gen_rtx_REG (Pmode, 23);
++	      emit_move_insn (sp_adj1, sp_adj2);
++	    }
++	  sp_adj2 = GEN_INT (low);
++	}
++      else
++	{
++	  rtx tmp = gen_rtx_REG (Pmode, 23);
++	  sp_adj2 = sw_64_emit_set_const (tmp, Pmode, frame_size, 3, false);
++	  if (!sp_adj2)
++	    {
++	      /* We can't drop new things to memory this late, afaik,
++		 so build it up by pieces.  */
++	      sp_adj2 = sw_64_emit_set_long_const (tmp, frame_size);
++	      gcc_assert (sp_adj2);
++	    }
++	}
++
++      /* Restore the stack pointer.  */
++      emit_insn (gen_blockage ());
++      if (sp_adj2 == const0_rtx)
++	insn = emit_move_insn (stack_pointer_rtx, sp_adj1);
++      else
++	insn = emit_move_insn (stack_pointer_rtx,
++			       gen_rtx_PLUS (Pmode, sp_adj1, sp_adj2));
++      REG_NOTES (insn) = cfa_restores;
++      add_reg_note (insn, REG_CFA_DEF_CFA, stack_pointer_rtx);
++      RTX_FRAME_RELATED_P (insn) = 1;
++    }
++  else
++    {
++      gcc_assert (cfa_restores == NULL);
++    }
++  if (strcmp ("main", lang_hooks.decl_printable_name (cfun->decl, 1)) == 0
++      && (TARGET_SW_32ALIGN || TARGET_SW_SIMD))
++    {
++      rtx mem_address = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (0));
++      rtx tmp7 = gen_rtx_MEM (Pmode, mem_address);
++      emit_move_insn (gen_rtx_REG (DImode, 7), tmp7);
++      rtx const16 = gen_rtx_REG (DImode, 7);
++      emit_insn (
++	gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (32)));
++      emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, const16));
++    }
++}
++
++/* Output the rest of the textual info surrounding the epilogue.  */
++
++void
++sw_64_end_function (FILE *file, const char *fnname, tree decl ATTRIBUTE_UNUSED)
++{
++  rtx_insn *insn;
++
++  /* We output a nop after noreturn calls at the very end of the function to
++     ensure that the return address always remains in the caller's code range,
++     as not doing so might confuse unwinding engines.  */
++  insn = get_last_insn ();
++  if (!INSN_P (insn))
++    insn = prev_active_insn (insn);
++  if (insn && CALL_P (insn))
++    output_asm_insn (get_insn_template (CODE_FOR_nop, NULL), NULL);
++
++  /* End the function.  */
++  if (!flag_inhibit_size_directive)
++    {
++      fputs ("\t.end ", file);
++      assemble_name (file, fnname);
++      putc ('\n', file);
++    }
++  inside_function = FALSE;
++}
++
++/* Emit a tail call to FUNCTION after adjusting THIS by DELTA.
++
++   In order to avoid the hordes of differences between generated code
++   with and without TARGET_EXPLICIT_RELOCS, and to avoid duplicating
++   lots of code loading up large constants, generate rtl and emit it
++   instead of going straight to text.
++
++   Not sure why this idea hasn't been explored before...  */
++
++static void
++sw_64_output_mi_thunk_osf (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
++			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
++			   tree function)
++{
++  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
++  HOST_WIDE_INT hi, lo;
++  rtx this_rtx, funexp;
++  rtx_insn *insn;
++
++  /* We always require a valid GP.  */
++  if (TARGET_SW_M32)
++    emit_insn (gen_prologue_ldgp_32 ());
++  else
++    emit_insn (gen_prologue_ldgp ());
++  emit_note (NOTE_INSN_PROLOGUE_END);
++
++  /* Find the "this" pointer.  If the function returns a structure,
++     the structure return pointer is in $16.  */
++  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
++    this_rtx = gen_rtx_REG (Pmode, 17);
++  else
++    this_rtx = gen_rtx_REG (Pmode, 16);
++
++  /* Add DELTA.  When possible we use ldih+ldi.  Otherwise load the
++     entire constant for the add.  */
++  lo = ((delta & 0xffff) ^ 0x8000) - 0x8000;
++  hi = (((delta - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
++  if (hi + lo == delta)
++    {
++      if (hi)
++	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (hi)));
++      if (lo)
++	emit_insn (gen_adddi3 (this_rtx, this_rtx, GEN_INT (lo)));
++    }
++  else
++    {
++      rtx tmp = sw_64_emit_set_long_const (gen_rtx_REG (Pmode, 0), delta);
++      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
++    }
++
++  /* Add a delta stored in the vtable at VCALL_OFFSET.  */
++  if (vcall_offset)
++    {
++      rtx tmp, tmp2;
++
++      tmp = gen_rtx_REG (Pmode, 0);
++      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
++
++      lo = ((vcall_offset & 0xffff) ^ 0x8000) - 0x8000;
++      hi = (((vcall_offset - lo) & 0xffffffff) ^ 0x80000000) - 0x80000000;
++      if (hi + lo == vcall_offset)
++	{
++	  if (hi)
++	    emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (hi)));
++	}
++      else
++	{
++	  tmp2
++	    = sw_64_emit_set_long_const (gen_rtx_REG (Pmode, 1), vcall_offset);
++	  emit_insn (gen_adddi3 (tmp, tmp, tmp2));
++	  lo = 0;
++	}
++      if (lo)
++	tmp2 = gen_rtx_PLUS (Pmode, tmp, GEN_INT (lo));
++      else
++	tmp2 = tmp;
++      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp2));
++
++      emit_insn (gen_adddi3 (this_rtx, this_rtx, tmp));
++    }
++
++  /* Generate a tail call to the target function.  */
++  if (!TREE_USED (function))
++    {
++      assemble_external (function);
++      TREE_USED (function) = 1;
++    }
++  funexp = XEXP (DECL_RTL (function), 0);
++  funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
++  insn = emit_call_insn (gen_sibcall (funexp, const0_rtx));
++  SIBLING_CALL_P (insn) = 1;
++
++  /* Run just enough of rest_of_compilation to get the insns emitted.
++     There's not really enough bulk here to make other passes such as
++     instruction scheduling worth while.  */
++  insn = get_insns ();
++  shorten_branches (insn);
++  assemble_start_function (thunk_fndecl, fnname);
++  final_start_function (insn, file, 1);
++  final (insn, file, 1);
++  final_end_function ();
++  assemble_end_function (thunk_fndecl, fnname);
++}
++
++
++/* Debugging support.  */
++
++#include "gstab.h"
++
++/* Name of the file containing the current function.  */
++
++static const char *current_function_file = "";
++
++/* Offsets to sw_64 virtual arg/local debugging pointers.  */
++
++long sw_64_arg_offset;
++long sw_64_auto_offset;
++
++/* Emit a new filename to a stream.  */
++
++void
++sw_64_output_filename (FILE *stream, const char *name)
++{
++  static int first_time = TRUE;
++
++  if (first_time)
++    {
++      first_time = FALSE;
++      ++num_source_filenames;
++      current_function_file = name;
++      fprintf (stream, "\t.file\t ");
++      output_quoted_string (stream, name);
++      fprintf (stream, "\n");
++    }
++
++  else if (name != current_function_file
++	   && strcmp (name, current_function_file) != 0)
++    {
++      ++num_source_filenames;
++      current_function_file = name;
++      fprintf (stream, "\t.file\t ");
++
++      output_quoted_string (stream, name);
++      fprintf (stream, "\n");
++    }
++}
++
++/* Structure to show the current status of registers and memory.  */
++
++struct shadow_summary
++{
++  struct {
++    unsigned int i     : 31;	/* Mask of int regs.  */
++    unsigned int fp    : 31;	/* Mask of fp regs.  */
++    unsigned int mem   :  1;	/* mem == imem | fpmem.  */
++  } used, defd;
++};
++
++/* Summary the effects of expression X on the machine.  Update SUM, a pointer
++   to the summary structure.  SET is nonzero if the insn is setting the
++   object, otherwise zero.  */
++
++static void
++summarize_insn (rtx x, struct shadow_summary *sum, int set)
++{
++  const char *format_ptr;
++  int i, j;
++
++  if (x == 0)
++    return;
++
++  switch (GET_CODE (x))
++    {
++      /* ??? Note that this case would be incorrect if the Sw_64 had a
++	 ZERO_EXTRACT in SET_DEST.  */
++    case SET:
++      summarize_insn (SET_SRC (x), sum, 0);
++      summarize_insn (SET_DEST (x), sum, 1);
++      break;
++
++    case CLOBBER:
++      summarize_insn (XEXP (x, 0), sum, 1);
++      break;
++
++    case USE:
++      summarize_insn (XEXP (x, 0), sum, 0);
++      break;
++
++    case ASM_OPERANDS:
++      for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; i--)
++	summarize_insn (ASM_OPERANDS_INPUT (x, i), sum, 0);
++      break;
++
++    case PARALLEL:
++      for (i = XVECLEN (x, 0) - 1; i >= 0; i--)
++	summarize_insn (XVECEXP (x, 0, i), sum, 0);
++      break;
++
++    case SUBREG:
++      summarize_insn (SUBREG_REG (x), sum, 0);
++      break;
++
++    case REG:
++      {
++	int regno = REGNO (x);
++	unsigned long mask = ((unsigned long) 1) << (regno % 32);
++
++	if (regno == 31 || regno == 63)
++	  break;
++
++	if (set)
++	  {
++	    if (regno < 32)
++	      sum->defd.i |= mask;
++	    else
++	      sum->defd.fp |= mask;
++	  }
++	else
++	  {
++	    if (regno < 32)
++	      sum->used.i |= mask;
++	    else
++	      sum->used.fp |= mask;
++	  }
++      }
++      break;
++
++    case MEM:
++      if (set)
++	sum->defd.mem = 1;
++      else
++	sum->used.mem = 1;
++
++      /* Find the regs used in memory address computation: */
++      summarize_insn (XEXP (x, 0), sum, 0);
++      break;
++
++    case CONST_INT:
++    case CONST_WIDE_INT:
++    case CONST_DOUBLE:
++    case SYMBOL_REF:
++    case LABEL_REF:
++    case CONST:
++    case SCRATCH:
++    case ASM_INPUT:
++      break;
++
++      /* Handle common unary and binary ops for efficiency.  */
++    case COMPARE:
++    case PLUS:
++    case MINUS:
++    case MULT:
++    case DIV:
++    case MOD:
++    case UDIV:
++    case UMOD:
++    case AND:
++    case IOR:
++    case XOR:
++    case ASHIFT:
++    case ROTATE:
++    case ASHIFTRT:
++    case LSHIFTRT:
++    case ROTATERT:
++    case SMIN:
++    case SMAX:
++    case UMIN:
++    case UMAX:
++    case NE:
++    case EQ:
++    case GE:
++    case GT:
++    case LE:
++    case LT:
++    case GEU:
++    case GTU:
++    case LEU:
++    case LTU:
++      summarize_insn (XEXP (x, 0), sum, 0);
++      summarize_insn (XEXP (x, 1), sum, 0);
++      break;
++
++    case NEG:
++    case NOT:
++    case SIGN_EXTEND:
++    case ZERO_EXTEND:
++    case TRUNCATE:
++    case FLOAT_EXTEND:
++    case FLOAT_TRUNCATE:
++    case FLOAT:
++    case FIX:
++    case UNSIGNED_FLOAT:
++    case UNSIGNED_FIX:
++    case ABS:
++    case SQRT:
++    case FFS:
++      summarize_insn (XEXP (x, 0), sum, 0);
++      break;
++
++    default:
++      format_ptr = GET_RTX_FORMAT (GET_CODE (x));
++      for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
++	switch (format_ptr[i])
++	  {
++	  case 'e':
++	    summarize_insn (XEXP (x, i), sum, 0);
++	    break;
++
++	  case 'E':
++	    for (j = XVECLEN (x, i) - 1; j >= 0; j--)
++	      summarize_insn (XVECEXP (x, i, j), sum, 0);
++	    break;
++
++	  case 'i':
++	    break;
++
++	  default:
++	    gcc_unreachable ();
++	  }
++    }
++}
++
++/* Ensure a sufficient number of `memb' insns are in the code when
++   the user requests code with a trap precision of functions or
++   instructions.
++
++   In naive mode, when the user requests a trap-precision of
++   "instruction", a memb is needed after every instruction that may
++   generate a trap.  This ensures that the code is resumption safe but
++   it is also slow.
++
++   When optimizations are turned on, we delay issuing a memb as long
++   as possible.  In this context, a trap shadow is the sequence of
++   instructions that starts with a (potentially) trap generating
++   instruction and extends to the next memb.  We can delay (and
++   therefore sometimes omit) a memb subject to the following
++   conditions:
++
++   (a) On entry to the trap shadow, if any Sw_64 register or memory
++   location contains a value that is used as an operand value by some
++   instruction in the trap shadow (live on entry), then no instruction
++   in the trap shadow may modify the register or memory location.
++
++   (b) Within the trap shadow, the computation of the base register
++   for a memory load or store instruction may not involve using the
++   result of an instruction that might generate an UNPREDICTABLE
++   result.
++
++   (c) Within the trap shadow, no register may be used more than once
++   as a destination register.  (This is to make life easier for the
++   trap-handler.)
++
++   (d) The trap shadow may not include any branch instructions.  */
++
++static void
++sw_64_handle_trap_shadows (void)
++{
++  struct shadow_summary shadow;
++  int trap_pending, exception_nesting;
++  rtx_insn *i, *n;
++
++  trap_pending = 0;
++  exception_nesting = 0;
++  shadow.used.i = 0;
++  shadow.used.fp = 0;
++  shadow.used.mem = 0;
++  shadow.defd = shadow.used;
++
++  for (i = get_insns (); i; i = NEXT_INSN (i))
++    {
++      if (NOTE_P (i))
++	{
++	  switch (NOTE_KIND (i))
++	    {
++	    case NOTE_INSN_EH_REGION_BEG:
++	      exception_nesting++;
++	      if (trap_pending)
++		goto close_shadow;
++	      break;
++
++	    case NOTE_INSN_EH_REGION_END:
++	      exception_nesting--;
++	      if (trap_pending)
++		goto close_shadow;
++	      break;
++
++	    case NOTE_INSN_EPILOGUE_BEG:
++	      if (trap_pending && sw_64_tp >= SW_64_TP_FUNC)
++		goto close_shadow;
++	      break;
++	    }
++	}
++      else if (trap_pending)
++	{
++	  if (sw_64_tp == SW_64_TP_FUNC)
++	    {
++	      if (JUMP_P (i) && GET_CODE (PATTERN (i)) == RETURN)
++		goto close_shadow;
++	    }
++	  else if (sw_64_tp == SW_64_TP_INSN)
++	    {
++	      if (optimize > 0)
++		{
++		  struct shadow_summary sum;
++
++		  sum.used.i = 0;
++		  sum.used.fp = 0;
++		  sum.used.mem = 0;
++		  sum.defd = sum.used;
++
++		  switch (GET_CODE (i))
++		    {
++		    case INSN:
++		      /* Annoyingly, get_attr_trap will die on these.  */
++		      if (GET_CODE (PATTERN (i)) == USE
++			  || GET_CODE (PATTERN (i)) == CLOBBER)
++			break;
++
++		      summarize_insn (PATTERN (i), &sum, 0);
++
++		      if ((sum.defd.i & shadow.defd.i)
++			  || (sum.defd.fp & shadow.defd.fp))
++			{
++			  /* (c) would be violated.  */
++			  goto close_shadow;
++			}
++
++		      /* Combine shadow with summary of current insn: */
++		      shadow.used.i |= sum.used.i;
++		      shadow.used.fp |= sum.used.fp;
++		      shadow.used.mem |= sum.used.mem;
++		      shadow.defd.i |= sum.defd.i;
++		      shadow.defd.fp |= sum.defd.fp;
++		      shadow.defd.mem |= sum.defd.mem;
++
++		      if ((sum.defd.i & shadow.used.i)
++			  || (sum.defd.fp & shadow.used.fp)
++			  || (sum.defd.mem & shadow.used.mem))
++			{
++			  /* (a) would be violated (also takes care of (b))  */
++			  gcc_assert (get_attr_trap (i) != TRAP_YES
++				      || (!(sum.defd.i & sum.used.i)
++					  && !(sum.defd.fp & sum.used.fp)));
++
++			  goto close_shadow;
++			}
++		      break;
++
++		    case BARRIER:
++		      /* __builtin_unreachable can expand to no code at all,
++			 leaving (barrier) RTXes in the instruction stream.  */
++		      goto close_shadow_notrapb;
++
++		    case JUMP_INSN:
++		    case CALL_INSN:
++		    case CODE_LABEL:
++		      goto close_shadow;
++
++		    case DEBUG_INSN:
++		      break;
++
++		    default:
++		      gcc_unreachable ();
++		    }
++		}
++	      else
++		{
++		close_shadow:
++		  n = emit_insn_before (gen_trapb (), i);
++		  PUT_MODE (n, TImode);
++		  PUT_MODE (i, TImode);
++		close_shadow_notrapb:
++		  trap_pending = 0;
++		  shadow.used.i = 0;
++		  shadow.used.fp = 0;
++		  shadow.used.mem = 0;
++		  shadow.defd = shadow.used;
++		}
++	    }
++	}
++
++      if ((exception_nesting > 0 || sw_64_tp >= SW_64_TP_FUNC)
++	  && NONJUMP_INSN_P (i) && GET_CODE (PATTERN (i)) != USE
++	  && GET_CODE (PATTERN (i)) != CLOBBER && get_attr_trap (i) == TRAP_YES)
++	{
++	  if (optimize && !trap_pending)
++	    summarize_insn (PATTERN (i), &shadow, 0);
++	  trap_pending = 1;
++	}
++    }
++}
++
++/* Sw_64 can only issue instruction groups simultaneously if they are
++   suitably aligned.  This is very processor-specific.  */
++
++/* The instruction group alignment main loop.  */
++
++static void
++sw_64_align_insns_1 (unsigned int max_align,
++		     rtx_insn *(*next_group) (rtx_insn *, int *, int *),
++		     rtx (*next_nop) (int *))
++{
++  /* ALIGN is the known alignment for the insn group.  */
++  unsigned int align;
++  /* OFS is the offset of the current insn in the insn group.  */
++  int ofs;
++  int prev_in_use, in_use, len, ldgp;
++  rtx_insn *i, *next;
++
++  /* Let shorten branches care for assigning alignments to code labels.  */
++  shorten_branches (get_insns ());
++
++  unsigned int option_alignment = align_functions.levels[0].get_value ();
++  if (option_alignment < 4)
++    align = 4;
++  else if ((unsigned int) option_alignment < max_align)
++    align = option_alignment;
++  else
++    align = max_align;
++
++  ofs = prev_in_use = 0;
++  i = get_insns ();
++  if (NOTE_P (i))
++    i = next_nonnote_insn (i);
++
++  ldgp = sw_64_function_needs_gp ? 8 : 0;
++
++  while (i)
++    {
++      next = (*next_group) (i, &in_use, &len);
++
++      /* When we see a label, resync alignment etc.  */
++      if (LABEL_P (i))
++	{
++	  unsigned int new_align
++	    = label_to_alignment (i).levels[0].get_value ();
++	  if (new_align >= align)
++	    {
++	      align = new_align < max_align ? new_align : max_align;
++	      ofs = 0;
++	    }
++
++	  else if (ofs & (new_align - 1))
++	    ofs = (ofs | (new_align - 1)) + 1;
++	  gcc_assert (!len);
++	}
++
++      /* Handle complex instructions special.  */
++      else if (in_use == 0)
++	{
++	  /* Asms will have length < 0.  This is a signal that we have
++	     lost alignment knowledge.  Assume, however, that the asm
++	     will not mis-align instructions.  */
++	  if (len < 0)
++	    {
++	      ofs = 0;
++	      align = 4;
++	      len = 0;
++	    }
++	}
++
++      /* If the known alignment is smaller than the recognized insn group,
++	 realign the output.  */
++      else if ((int) align < len)
++	{
++	  unsigned int new_log_align = len > 8 ? 4 : 3;
++	  rtx_insn *prev, *where;
++
++	  where = prev = prev_nonnote_insn (i);
++	  if (!where || !LABEL_P (where))
++	    where = i;
++
++	  /* Can't realign between a call and its gp reload.  */
++	  if (!(TARGET_EXPLICIT_RELOCS && prev && CALL_P (prev)))
++	    {
++	      emit_insn_before (gen_realign (GEN_INT (new_log_align)), where);
++	      align = 1 << new_log_align;
++	      ofs = 0;
++	    }
++	}
++
++      /* We may not insert padding inside the initial ldgp sequence.  */
++      else if (ldgp > 0)
++	ldgp -= len;
++
++      /* If the group won't fit in the same INT16 as the previous,
++	 we need to add padding to keep the group together.  Rather
++	 than simply leaving the insn filling to the assembler, we
++	 can make use of the knowledge of what sorts of instructions
++	 were issued in the previous group to make sure that all of
++	 the added nops are really free.  */
++      else if (ofs + len > (int) align)
++	{
++	  int nop_count = (align - ofs) / 4;
++	  rtx_insn *where;
++
++	  /* Insert nops before labels, branches, and calls to truly merge
++	     the execution of the nops with the previous instruction group.  */
++	  where = prev_nonnote_insn (i);
++	  if (where)
++	    {
++	      if (LABEL_P (where))
++		{
++		  rtx_insn *where2 = prev_nonnote_insn (where);
++		  if (where2 && JUMP_P (where2))
++		    where = where2;
++		}
++	      else if (NONJUMP_INSN_P (where))
++		where = i;
++	    }
++	  else
++	    where = i;
++
++	  do
++	    emit_insn_before ((*next_nop) (&prev_in_use), where);
++	  while (--nop_count);
++	  ofs = 0;
++	}
++
++      ofs = (ofs + len) & (align - 1);
++      prev_in_use = in_use;
++      i = next;
++    }
++}
++
++static void
++sw_64_align_insns (void)
++{
++  gcc_unreachable ();
++}
++
++/* Insert an unop between sibcall or noreturn function call and GP load.  */
++
++static void
++sw_64_pad_function_end (void)
++{
++  rtx_insn *insn, *next;
++
++  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
++    {
++      if (!CALL_P (insn)
++	  || !(SIBLING_CALL_P (insn)
++	       || find_reg_note (insn, REG_NORETURN, NULL_RTX)))
++	continue;
++
++      next = next_active_insn (insn);
++      if (next)
++	{
++	  rtx pat = PATTERN (next);
++
++	  if (GET_CODE (pat) == SET
++	      && GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
++	      && XINT (SET_SRC (pat), 1) == UNSPECV_LDGP1)
++	    emit_insn_after (gen_unop (), insn);
++	}
++    }
++}
++
++/* Machine dependent reorg pass.  */
++
++static void
++sw_64_reorg (void)
++{
++  /* Workaround for a linker error that triggers when an exception
++     handler immediatelly follows a sibcall or a noreturn function.
++
++In the sibcall case:
++
++     The instruction stream from an object file:
++
++ 1d8:   00 00 fb 6b     jmp     (t12)
++ 1dc:   00 00 ba 27     ldih    gp,0(ra)
++ 1e0:   00 00 bd 23     ldi     gp,0(gp)
++ 1e4:   00 00 7d a7     ldl     t12,0(gp)
++ 1e8:   00 40 5b 6b     call     ra,(t12),1ec <__funcZ+0x1ec>
++
++     was converted in the final link pass to:
++
++   12003aa88:   67 fa ff c3     br      120039428 <...>
++   12003aa8c:   00 00 fe 2f     unop
++   12003aa90:   00 00 fe 2f     unop
++   12003aa94:   48 83 7d a7     ldl     t12,-31928(gp)
++   12003aa98:   00 40 5b 6b     call     ra,(t12),12003aa9c <__func+0x1ec>
++
++And in the noreturn case:
++
++     The instruction stream from an object file:
++
++  54:   00 40 5b 6b     call     ra,(t12),58 <__func+0x58>
++  58:   00 00 ba 27     ldih    gp,0(ra)
++  5c:   00 00 bd 23     ldi     gp,0(gp)
++  60:   00 00 7d a7     ldl     t12,0(gp)
++  64:   00 40 5b 6b     call     ra,(t12),68 <__func+0x68>
++
++     was converted in the final link pass to:
++
++   fdb24:       a0 03 40 d3     bsr     ra,fe9a8 <_called_func+0x8>
++   fdb28:       00 00 fe 2f     unop
++   fdb2c:       00 00 fe 2f     unop
++   fdb30:       30 82 7d a7     ldl     t12,-32208(gp)
++   fdb34:       00 40 5b 6b     call     ra,(t12),fdb38 <__func+0x68>
++
++     GP load instructions were wrongly cleared by the linker relaxation
++     pass.  This workaround prevents removal of GP loads by inserting
++     an unop instruction between a sibcall or noreturn function call and
++     exception handler prologue.  */
++
++  if (current_function_has_exception_handlers ())
++    sw_64_pad_function_end ();
++}
++
++static void
++sw_64_file_start (void)
++{
++  default_file_start ();
++
++  fputs ("\t.set noreorder\n", asm_out_file);
++  fputs ("\t.set volatile\n", asm_out_file);
++    fputs ("\t.set noat\n", asm_out_file);
++  if (TARGET_EXPLICIT_RELOCS)
++    fputs ("\t.set nomacro\n", asm_out_file);
++  if (TARGET_SUPPORT_ARCH | TARGET_BWX | TARGET_MAX | TARGET_FIX | TARGET_CIX
++      | TARGET_SW6A | TARGET_SW6B | TARGET_SW8A)
++    {
++      const char *arch;
++
++      if (sw_64_cpu == PROCESSOR_SW6 || PROCESSOR_SW8 || TARGET_FIX
++	  || TARGET_CIX)
++	{
++	  if (TARGET_SW6A)
++	    arch = "sw6a";
++	  else if (TARGET_SW6B)
++	    arch = "sw6b";
++	  else if (TARGET_SW8A)
++	    arch = "sw8a";
++	  else
++	    arch = "sw6b";
++	}
++      else
++	arch = "sw6b";
++
++      fprintf (asm_out_file, "\t.arch %s\n", arch);
++    }
++}
++
++/* Since we don't have a .dynbss section, we should not allow global
++   relocations in the .rodata section.  */
++
++static int
++sw_64_elf_reloc_rw_mask (void)
++{
++  return flag_pic ? 3 : 2;
++}
++
++/* Return a section for X.  The only special thing we do here is to
++   honor small data.  */
++
++static section *
++sw_64_elf_select_rtx_section (machine_mode mode, rtx x,
++			      unsigned HOST_WIDE_INT align)
++{
++  if (TARGET_SMALL_DATA && GET_MODE_SIZE (mode) <= g_switch_value)
++    /* ??? Consider using mergeable sdata sections.  */
++    return sdata_section;
++  else
++    return default_elf_select_rtx_section (mode, x, align);
++}
++
++static unsigned int
++sw_64_elf_section_type_flags (tree decl, const char *name, int reloc)
++{
++  unsigned int flags = 0;
++
++  if (strcmp (name, ".sdata") == 0 || strncmp (name, ".sdata.", 7) == 0
++      || strncmp (name, ".gnu.linkonce.s.", 16) == 0
++      || strcmp (name, ".sbss") == 0 || strncmp (name, ".sbss.", 6) == 0
++      || strncmp (name, ".gnu.linkonce.sb.", 17) == 0)
++    flags = SECTION_SMALL;
++
++  flags |= default_section_type_flags (decl, name, reloc);
++  return flags;
++}
++
++/* Structure to collect function names for final output in link section.  */
++/* Note that items marked with GTY can't be ifdef'ed out.  */
++
++enum reloc_kind
++{
++  KIND_LINKAGE,
++  KIND_CODEADDR
++};
++
++struct GTY (()) sw_64_links
++{
++  rtx func;
++  rtx linkage;
++  enum reloc_kind rkind;
++};
++
++rtx
++sw_64_use_linkage (rtx func ATTRIBUTE_UNUSED, bool lflag ATTRIBUTE_UNUSED,
++		   bool rflag ATTRIBUTE_UNUSED)
++{
++  return NULL_RTX;
++}
++
++static void
++sw_64_init_libfuncs (void)
++{
++#ifdef MEM_LIBFUNCS_INIT
++      MEM_LIBFUNCS_INIT;
++#endif
++}
++
++/* On the Sw_64, we use this to disable the floating-point registers
++   when they don't exist.  */
++
++static void
++sw_64_conditional_register_usage (void)
++{
++  int i;
++  if (!TARGET_FPREGS)
++    for (i = 32; i < 63; i++)
++      fixed_regs[i] = call_used_regs[i] = 1;
++}
++
++/* Canonicalize a comparison from one we don't have to one we do have.  */
++
++static void
++sw_64_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
++			       bool op0_preserve_value)
++{
++  if (!op0_preserve_value
++      && (*code == GE || *code == GT || *code == GEU || *code == GTU)
++      && (REG_P (*op1) || *op1 == const0_rtx))
++    {
++      std::swap (*op0, *op1);
++      *code = (int) swap_condition ((enum rtx_code) * code);
++    }
++
++  if ((*code == LT || *code == LTU) && CONST_INT_P (*op1)
++      && INTVAL (*op1) == 256)
++    {
++      *code = *code == LT ? LE : LEU;
++      *op1 = GEN_INT (255);
++    }
++}
++
++/* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV.  */
++
++static void
++sw_64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
++{
++  const unsigned HOST_WIDE_INT SWCR_STATUS_MASK = (0x3fUL << 17);
++
++  tree fenv_var, get_fpscr, set_fpscr, mask, ld_fenv, masked_fenv;
++  tree new_fenv_var, reload_fenv, restore_fnenv;
++  tree update_call, atomic_feraiseexcept, hold_fnclex;
++
++  /* Generate the equivalent of :
++       unsigned long fenv_var;
++       fenv_var = __ieee_get_fp_control ();
++
++       unsigned long masked_fenv;
++       masked_fenv = fenv_var & mask;
++
++       __ieee_set_fp_control (masked_fenv);  */
++
++  fenv_var = create_tmp_var_raw (long_unsigned_type_node);
++  get_fpscr
++    = build_fn_decl ("__ieee_get_fp_control",
++		     build_function_type_list (long_unsigned_type_node, NULL));
++  set_fpscr = build_fn_decl ("__ieee_set_fp_control",
++			     build_function_type_list (void_type_node, NULL));
++  mask = build_int_cst (long_unsigned_type_node, ~SWCR_STATUS_MASK);
++  ld_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, fenv_var,
++		    build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
++  masked_fenv = build2 (BIT_AND_EXPR, long_unsigned_type_node, fenv_var, mask);
++  hold_fnclex = build_call_expr (set_fpscr, 1, masked_fenv);
++  *hold = build2 (COMPOUND_EXPR, void_type_node,
++		  build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
++		  hold_fnclex);
++
++  /* Store the value of masked_fenv to clear the exceptions:
++     __ieee_set_fp_control (masked_fenv);  */
++
++  *clear = build_call_expr (set_fpscr, 1, masked_fenv);
++
++  /* Generate the equivalent of :
++       unsigned long new_fenv_var;
++       new_fenv_var = __ieee_get_fp_control ();
++
++       __ieee_set_fp_control (fenv_var);
++
++       __atomic_feraiseexcept (new_fenv_var);  */
++
++  new_fenv_var = create_tmp_var_raw (long_unsigned_type_node);
++  reload_fenv = build4 (TARGET_EXPR, long_unsigned_type_node, new_fenv_var,
++			build_call_expr (get_fpscr, 0), NULL_TREE, NULL_TREE);
++  restore_fnenv = build_call_expr (set_fpscr, 1, fenv_var);
++  atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
++  update_call
++    = build_call_expr (atomic_feraiseexcept, 1,
++		       fold_convert (integer_type_node, new_fenv_var));
++  *update = build2 (COMPOUND_EXPR, void_type_node,
++		    build2 (COMPOUND_EXPR, void_type_node, reload_fenv,
++			    restore_fnenv),
++		    update_call);
++}
++
++/* Implement TARGET_HARD_REGNO_MODE_OK.  On Sw_64, the integer registers
++   can hold any mode.  The floating-point registers can hold 64-bit
++   integers as well, but not smaller values.  */
++
++static bool
++sw_64_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
++{
++  if (IN_RANGE (regno, 32, 62))
++    return (mode == SFmode || mode == DFmode || mode == DImode || mode == SCmode
++	    || mode == DCmode);
++  return true;
++}
++
++/* Implement TARGET_MODES_TIEABLE_P.  This asymmetric test is true when
++   MODE1 could be put in an FP register but MODE2 could not.  */
++
++static bool
++sw_64_modes_tieable_p (machine_mode mode1, machine_mode mode2)
++{
++  return (sw_64_hard_regno_mode_ok (32, mode1)
++	    ? sw_64_hard_regno_mode_ok (32, mode2)
++	    : true);
++}
++
++/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  */
++
++/*************************************************
++ *
++ *	float fast_inverse_sqrt (float x)
++ *	{
++ *	    float xhalf = 0.5f * x;
++ *	    int i = *(int *)&x ;
++ *	    i = 0x5f3759df - (i >> 1);
++ *	    x = *(float *)&i;
++ *	    x = x *(1.5f - xhalf * x *x);
++ *	    x = x *(1.5f - xhalf * x *x); // SPEC2006 435 need this
++ *	    return x;
++ *	}
++ *
++ ***************************************************/
++
++/* Load up a constant. all of the vector elements.  */
++static rtx
++sw_64_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
++{
++  rtx reg;
++
++  if (mode == SFmode || mode == DFmode)
++    {
++      rtx d = const_double_from_real_value (dconst, mode);
++      reg = force_reg (mode, d);
++    }
++  else
++    gcc_unreachable ();
++
++  return reg;
++}
++
++void
++sw_64_emit_rsqrt (rtx dst, rtx x, bool note_p)
++{
++  machine_mode mode = GET_MODE (dst);
++  rtx one, xhalf, mhalf, i, magical, x0, x1, x2;
++
++  enum insn_code code = optab_handler (smul_optab, mode);
++  insn_gen_fn gen_mul = GEN_FCN (code);
++  gcc_assert (code != CODE_FOR_nothing);
++
++  enum insn_code code1 = optab_handler (sub_optab, SImode);
++  insn_gen_fn gen_sub = GEN_FCN (code1);
++  gcc_assert (code1 != CODE_FOR_nothing);
++
++  enum insn_code code2 = optab_handler (fnma_optab, mode);
++  insn_gen_fn gen_fnma = GEN_FCN (code2);
++  gcc_assert (code2 != CODE_FOR_nothing);
++
++  enum insn_code code3 = optab_handler (add_optab, mode);
++  insn_gen_fn gen_add = GEN_FCN (code3);
++  gcc_assert (code3 != CODE_FOR_nothing);
++
++  one = sw_64_load_constant_and_splat (mode, dconst1);
++  mhalf = sw_64_load_constant_and_splat (mode, dconsthalf);
++
++  /* xhalf = 0.5f * x.  */
++  xhalf = gen_reg_rtx (mode);
++  emit_insn (gen_mul (xhalf, mhalf, x));
++
++  if (x == CONST0_RTX (mode))
++    gcc_unreachable ();
++
++  /* int i = *(int *)&x.  */
++  rtx vreg = gen_rtx_REG (SFmode, 28);
++
++  emit_insn (
++    gen_rtx_SET (vreg, gen_rtx_UNSPEC (mode, gen_rtvec (1, x), UNSPEC_FIMOVS)));
++
++  /* i = i >> 1.  */
++  i = gen_reg_rtx (DImode);
++  rtx subreg = gen_rtx_SUBREG (SImode, vreg, 0);
++  emit_insn (gen_extendsidi2 (i, subreg));
++  emit_insn (gen_ashrdi3 (i, i, const1_rtx));
++
++  /* magical number: 0x5f3759df.  */
++  magical = gen_reg_rtx (SImode);
++  emit_insn (gen_rtx_SET (magical, GEN_INT (0x5f370000)));
++  emit_insn (
++    gen_rtx_SET (magical, gen_rtx_PLUS (SImode, magical, GEN_INT (0x59df))));
++
++  /* x0 = 0x5f3759df - i.  */
++  subreg = gen_rtx_SUBREG (SImode, i, 0);
++  x0 = gen_reg_rtx (SImode);
++  emit_insn (gen_sub (x0, magical, subreg));
++
++  /* x = *(float *)&x0.  */
++  x = gen_rtx_REG (mode, 60);
++  x0 = gen_rtx_SUBREG (SFmode, x0, 0);
++  emit_insn (gen_rtx_SET (x, x0));
++
++  /* x= x *(1.5f - xhalf * x *x) */
++  rtx number = gen_reg_rtx (mode);
++  emit_insn (gen_add (number, one, mhalf));
++
++  x1 = gen_reg_rtx (mode);
++  emit_insn (gen_mul (x1, x, x));
++  emit_insn (gen_fnma (x1, x1, xhalf, number));
++  emit_insn (gen_mul (x1, x1, x));
++
++  /* second iteration, SPEC2006 435 need this.  */
++  x2 = gen_reg_rtx (mode);
++  emit_insn (gen_mul (x2, x1, x1));
++  emit_insn (gen_fnma (x2, x2, xhalf, number));
++  emit_insn (gen_mul (dst, x2, x1));
++}
++
++rtx
++gen_move_reg (rtx x)
++{
++  rtx temp = gen_reg_rtx (GET_MODE (x));
++  emit_move_insn (temp, x);
++  return temp;
++}
++
++/* Newton-Raphson approximation of floating point divide DST = N/D.  If NOTE_P,
++ *  *    add a reg_note saying that this was a division.  Support both scalar
++ * and
++ *   *       vector divide.  Assumes no trapping math and finite arguments.  */
++void
++sw_64_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
++{
++  machine_mode mode = GET_MODE (dst);
++  rtx one, x0, e0, x1, x2, xprev, eprev, xnext, enext, u, v;
++  int i;
++
++  int passes = flag_sw_recip_precision ? 2 : 1;
++  if (mode == DFmode)
++    passes += 2;
++
++  enum insn_code code = optab_handler (smul_optab, mode);
++  insn_gen_fn gen_mul = GEN_FCN (code);
++  gcc_assert (code != CODE_FOR_nothing);
++
++  enum insn_code code1 = optab_handler (fma_optab, mode);
++  insn_gen_fn gen_fma = GEN_FCN (code1);
++  gcc_assert (code1 != CODE_FOR_nothing);
++
++  enum insn_code code2 = optab_handler (fnma_optab, mode);
++  insn_gen_fn gen_fnma = GEN_FCN (code2);
++  gcc_assert (code2 != CODE_FOR_nothing);
++
++  one = sw_64_load_constant_and_splat (mode, dconst1);
++
++  /* x0 = 1./d estimate */
++
++  x0 = gen_reg_rtx (mode);
++  emit_insn (
++    gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d), UNSPEC_FRECX)));
++
++  /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i).  */
++  if (passes > 1)
++    {
++      /* e0 = 1. - d * x0  */
++      e0 = gen_reg_rtx (mode);
++      emit_insn (gen_fnma (e0, d, x0, one));
++
++      /* x1 = x0 + e0 * x0  */
++      x1 = gen_reg_rtx (mode);
++      emit_insn (gen_fma (x1, x0, e0, x0));
++
++      for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
++	   ++i, xprev = xnext, eprev = enext)
++	{
++	  /* enext = eprev * eprev  */
++	  enext = gen_reg_rtx (mode);
++	  emit_insn (gen_mul (enext, eprev, eprev));
++
++	  /* xnext = xprev + enext * xprev  */
++	  xnext = gen_reg_rtx (mode);
++	  emit_insn (gen_fma (xnext, xprev, enext, xprev));
++	}
++    }
++  else
++    xprev = x0;
++
++  /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i).  */
++  /* u = n * xprev  */
++  u = gen_reg_rtx (mode);
++  emit_insn (gen_mul (u, n, xprev));
++
++  /* v = n - (d * u)  */
++  v = gen_reg_rtx (mode);
++  emit_insn (gen_fnma (v, d, u, n));
++
++  /* dst = (v * xprev) + u  */
++  emit_insn (gen_fma (dst, v, xprev, u));
++}
++
++int
++enable_asan_check_stack ()
++{
++  return asan_sanitize_stack_p ();
++}
++
++static bool
++sw_64_can_change_mode_class (machine_mode from, machine_mode to,
++			     reg_class_t rclass)
++{
++  return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
++	  || !reg_classes_intersect_p (FLOAT_REGS, rclass));
++}
++bool
++sw_64_slow_unaligned_access (machine_mode mode, unsigned int align)
++{
++  return (flag_sw_unalign_byte != 1 || TARGET_SW8A == 0);
++}
++
++static bool
++sw_64_macro_fusion_p ()
++{
++  return (flag_sw_branch_fusion == 1);
++}
++
++static bool
++sw_64_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
++{
++  rtx src, dest;
++  enum rtx_code ccode;
++  rtx compare_set = NULL_RTX, test_if, cond;
++  rtx alu_set = NULL_RTX, addr = NULL_RTX;
++  if (get_attr_type (condjmp) != TYPE_IBR)
++    return false;
++  if (get_attr_type (condgen) != TYPE_ICMP)
++    return false;
++  compare_set = single_set (condgen);
++  if (compare_set == NULL_RTX)
++    {
++      int i;
++      rtx pat = PATTERN (condgen);
++      for (i = 0; i < XVECLEN (pat, 0); i++)
++	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
++	  {
++	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
++	    alu_set = XVECEXP (pat, 0, i);
++	  }
++    }
++  if (compare_set == NULL_RTX)
++    return false;
++  src = SET_SRC (compare_set);
++  if (GET_CODE (src) == UNSPEC)
++    return false;
++  test_if = SET_SRC (pc_set (condjmp));
++  cond = XEXP (test_if, 0);
++  ccode = GET_CODE (cond);
++  return true;
++}
++
++/* Implement the TARGET_ASAN_SHADOW_OFFSET hook.  */
++static unsigned HOST_WIDE_INT
++sw_64_asan_shadow_offset (void)
++{
++  return (HOST_WIDE_INT_1 << 49);
++}
++
++static void
++sw_64_sa_mask (unsigned long *imaskP, unsigned long *fmaskP)
++{
++  unsigned long imask = 0;
++  unsigned long fmask = 0;
++  unsigned int i;
++
++  /* When outputting a thunk, we don't have valid register life info,
++     but assemble_start_function wants to output .frame and .mask
++     directives.  */
++  if (cfun->is_thunk)
++    {
++      *imaskP = 0;
++      *fmaskP = 0;
++      return;
++    }
++
++#ifdef SW_64_ENABLE_FULL_ASAN
++  if (frame_pointer_needed)
++    imask |= (1UL << HARD_FRAME_POINTER_REGNUM);
++#endif
++
++  /* One for every register we have to save.  */
++  for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++    if (!fixed_regs[i] && !call_used_regs[i] && df_regs_ever_live_p (i)
++	&& i != REG_RA)
++      {
++	if (i < 32)
++	  imask |= (1UL << i);
++	else
++	  fmask |= (1UL << (i - 32));
++      }
++
++  /* We need to restore these for the handler.  */
++  if (crtl->calls_eh_return)
++    {
++      for (i = 0;; ++i)
++	{
++	  unsigned regno = EH_RETURN_DATA_REGNO (i);
++	  if (regno == INVALID_REGNUM)
++	    break;
++	  imask |= 1UL << regno;
++	}
++    }
++
++  /* If any register spilled, then spill the return address also.  */
++  /* ??? This is required by the Digital stack unwind specification
++     and isn't needed if we're doing Dwarf2 unwinding.  */
++  if (imask || fmask || sw_64_ra_ever_killed ())
++    imask |= (1UL << REG_RA);
++
++  *imaskP = imask;
++  *fmaskP = fmask;
++}
++
++int
++sw_64_sa_size (void)
++{
++  unsigned long mask[2];
++  int sa_size = 0;
++  int i, j;
++
++  sw_64_sa_mask (&mask[0], &mask[1]);
++
++  for (j = 0; j < 2; ++j)
++    for (i = 0; i < 32; ++i)
++      if ((mask[j] >> i) & 1)
++	sa_size++;
++
++      /* Our size must be even (multiple of 16 bytes).  */
++      if (sa_size & 1)
++	sa_size++;
++  return sa_size * 8;
++}
++
++#if 1
++/* Sw64 stack frames generated by this compiler look like:
++
++  +-------------------------------+
++  |				  |
++  |  incoming stack arguments     |
++  |				  |
++  +-------------------------------+
++  |				  | <-- incoming stack pointer (aligned)
++  |  callee-allocated save area   |
++  |  for register varargs	  |
++  |				  |
++  +-------------------------------+
++  |  local variables		  | <-- frame_pointer_rtx
++  |				  |
++  +-------------------------------+
++  |  padding			  |
++  +-------------------------------+
++  |  callee-saved registers       |  frame.saved_regs_size
++  +-------------------------------+
++  |  FP'			  |
++  +-------------------------------+
++  |  RA'			  |
++  +-------------------------------+ <- hard_frame_pointer_rtx (aligned)
++  |  padding			  |
++  +-------------------------------+
++  |  outgoing stack arguments     | <-- arg_pointer
++  |				  |
++  +-------------------------------+
++  |				  | <-- stack_pointer_rtx (aligned)
++
++   The following registers are reserved during frame layout and should not be
++   used for any other purpose:
++
++	 TODO: add other register purpose
++   - r26(RA), r15(FP): Used by standard frame layout.
++
++   These registers must be avoided in frame layout related code unless the
++   explicit intention is to interact with one of the features listed above.  */
++
++static void
++sw_64_layout_frame (void)
++{
++  poly_int64 offset = 0;
++
++  cfun->machine->frame.emit_frame_pointer
++    = frame_pointer_needed || crtl->calls_eh_return;
++
++  unsigned HOST_WIDE_INT sa_mask = 0;
++  int sa_size;
++
++  /* When outputting a thunk, we don't have valid register life info,
++     but assemble_start_function wants to output .frame and .mask
++     directives.  */
++  if (!cfun->is_thunk)
++    {
++      /* One for every register we have to save.  */
++      for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
++	if (!call_used_or_fixed_reg_p (i) && df_regs_ever_live_p (i)
++	    && i != REG_RA)
++	  sa_mask |= HOST_WIDE_INT_1U << i;
++
++      /* We need to restore these for the handler.  */
++      if (crtl->calls_eh_return)
++	{
++	  for (unsigned i = 0;; ++i)
++	    {
++	      unsigned regno = EH_RETURN_DATA_REGNO (i);
++	      if (regno == INVALID_REGNUM)
++		break;
++	      sa_mask |= HOST_WIDE_INT_1U << regno;
++	    }
++	}
++      /* If any register spilled, then spill the return address also.  */
++      /* ??? This is required by the Digital stack unwind specification
++	 and isn't needed if we're doing Dwarf2 unwinding.  */
++      if (sa_mask || sw_64_ra_ever_killed ())
++	sa_mask |= HOST_WIDE_INT_1U << REG_RA;
++    }
++  sa_size = popcount_hwi (sa_mask);
++  poly_int64 frame_size = get_frame_size ();
++
++  /* Our size must be even (multiple of 16 bytes).  */
++  if (sa_size & 1)
++    sa_size++;
++  sa_size *= 8;
++
++  poly_int64 varargs_and_saved_regs_size
++    = sa_size + cfun->machine->frame.saved_varargs_size
++      + crtl->args.pretend_args_size;
++
++  poly_int64 varargs_size
++    = cfun->machine->frame.saved_varargs_size + crtl->args.pretend_args_size;
++
++  HOST_WIDE_INT extra_alignment
++    = SW_64_ROUND (frame_size + cfun->machine->frame.saved_varargs_size)
++      - cfun->machine->frame.saved_varargs_size;
++
++  poly_int64 outgoing_args = SW_64_ROUND (crtl->outgoing_args_size);
++
++  cfun->machine->frame.local_offset
++    = cfun->machine->frame.saved_varargs_size + crtl->args.pretend_args_size;
++
++  poly_int64 total_size
++    = aligned_upper_bound (varargs_and_saved_regs_size + frame_size,
++			   STACK_BOUNDARY / BITS_PER_UNIT)
++      + outgoing_args;
++
++  cfun->machine->frame.hard_frame_pointer_offset
++    = aligned_upper_bound (varargs_and_saved_regs_size + frame_size,
++			   STACK_BOUNDARY / BITS_PER_UNIT);
++
++  // TODO: does sw64 need this feild?
++  cfun->machine->frame.callee_offset
++    = cfun->machine->frame.hard_frame_pointer_offset;
++
++  cfun->machine->frame.arg_pointer_offset = total_size - varargs_size;
++
++  cfun->machine->frame.sa_mask = sa_mask;
++  cfun->machine->frame.saved_regs_size = sa_size;
++  cfun->machine->frame.frame_size = total_size;
++}
++#endif
++
++/* Define the offset between two registers, one to be eliminated,
++   and the other its replacement, at the start of a routine.  */
++
++HOST_WIDE_INT
++sw_64_initial_elimination_offset (unsigned int from,
++				  unsigned int to ATTRIBUTE_UNUSED)
++{
++  HOST_WIDE_INT ret;
++#ifdef SW_64_ENABLE_FULL_ASAN
++  if (to == HARD_FRAME_POINTER_REGNUM)
++    {
++      if (from == ARG_POINTER_REGNUM)
++	{
++	  // TODO: in sw64 variable arguments processing, all regs
++	  // and pretending arguments offset a passive, so we have
++	  // to minus varargs size.  May be fix it is a better way?
++	  return cfun->machine->frame.hard_frame_pointer_offset
++		 - cfun->machine->frame.local_offset;
++	}
++
++      if (from == FRAME_POINTER_REGNUM)
++	{
++	  return cfun->machine->frame.hard_frame_pointer_offset
++		 - cfun->machine->frame.local_offset;
++	}
++    }
++
++  if (to == STACK_POINTER_REGNUM)
++    {
++      if (from == ARG_POINTER_REGNUM)
++	{
++	  // TODO: same as HARD_FRAME_POINTER_REGNUM;
++	  return cfun->machine->frame.arg_pointer_offset;
++	}
++      if (from == FRAME_POINTER_REGNUM)
++	{
++	  return cfun->machine->frame.arg_pointer_offset;
++	}
++    }
++
++  return cfun->machine->frame.frame_size;
++#else
++  ret = sw_64_sa_size ();
++  if (!frame_pointer_needed)
++    ret += SW_64_ROUND (crtl->outgoing_args_size);
++
++  switch (from)
++    {
++    case FRAME_POINTER_REGNUM:
++      break;
++
++    case ARG_POINTER_REGNUM:
++      ret += (SW_64_ROUND (get_frame_size () + crtl->args.pretend_args_size)
++	      - crtl->args.pretend_args_size);
++      break;
++
++    default:
++      gcc_unreachable ();
++    }
++
++  return ret;
++#endif
++}
++
++/* Compute the frame size.  SIZE is the size of the "naked" frame
++   and SA_SIZE is the size of the register save area.  */
++
++static HOST_WIDE_INT
++compute_frame_size (HOST_WIDE_INT size, HOST_WIDE_INT sa_size)
++{
++#ifdef SW_64_ENABLE_FULL_ASAN
++  //  sw_64_layout_frame ();
++  return cfun->machine->frame.frame_size;
++#else
++    return SW_64_ROUND (crtl->outgoing_args_size) + sa_size
++	   + SW_64_ROUND (size + crtl->args.pretend_args_size);
++#endif
++}
++
++/* Initialize the GCC target structure.  */
++#undef TARGET_IN_SMALL_DATA_P
++#define TARGET_IN_SMALL_DATA_P sw_64_in_small_data_p
++
++#undef TARGET_ASM_ALIGNED_HI_OP
++#define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
++#undef TARGET_ASM_ALIGNED_DI_OP
++#define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
++
++/* Default unaligned ops are provided for ELF systems.  To get unaligned
++   data for non-ELF systems, we have to turn off auto alignment.  */
++#undef TARGET_ASM_RELOC_RW_MASK
++#define TARGET_ASM_RELOC_RW_MASK sw_64_elf_reloc_rw_mask
++#undef TARGET_ASM_SELECT_RTX_SECTION
++#define TARGET_ASM_SELECT_RTX_SECTION sw_64_elf_select_rtx_section
++#undef TARGET_SECTION_TYPE_FLAGS
++#define TARGET_SECTION_TYPE_FLAGS sw_64_elf_section_type_flags
++
++#undef TARGET_ASM_FUNCTION_END_PROLOGUE
++#define TARGET_ASM_FUNCTION_END_PROLOGUE sw_64_output_function_end_prologue
++
++#undef TARGET_INIT_LIBFUNCS
++#define TARGET_INIT_LIBFUNCS sw_64_init_libfuncs
++
++#undef TARGET_LEGITIMIZE_ADDRESS
++#define TARGET_LEGITIMIZE_ADDRESS sw_64_legitimize_address
++#undef TARGET_MODE_DEPENDENT_ADDRESS_P
++#define TARGET_MODE_DEPENDENT_ADDRESS_P sw_64_mode_dependent_address_p
++
++#undef TARGET_ASM_FILE_START
++#define TARGET_ASM_FILE_START sw_64_file_start
++
++#undef TARGET_SCHED_ADJUST_COST
++#define TARGET_SCHED_ADJUST_COST sw_64_adjust_cost
++#undef TARGET_SCHED_ISSUE_RATE
++#define TARGET_SCHED_ISSUE_RATE sw_64_issue_rate
++#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
++#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD		       \
++  sw_64_multipass_dfa_lookahead
++
++#undef TARGET_HAVE_TLS
++#define TARGET_HAVE_TLS HAVE_AS_TLS
++
++#undef TARGET_BUILTIN_DECL
++#define TARGET_BUILTIN_DECL sw_64_builtin_decl
++#undef TARGET_INIT_BUILTINS
++#define TARGET_INIT_BUILTINS sw_64_init_builtins
++#undef TARGET_EXPAND_BUILTIN
++#define TARGET_EXPAND_BUILTIN sw_64_expand_builtin
++#undef TARGET_FOLD_BUILTIN
++#define TARGET_FOLD_BUILTIN sw_64_fold_builtin
++#undef TARGET_GIMPLE_FOLD_BUILTIN
++#define TARGET_GIMPLE_FOLD_BUILTIN sw_64_gimple_fold_builtin
++
++#undef TARGET_FUNCTION_OK_FOR_SIBCALL
++#define TARGET_FUNCTION_OK_FOR_SIBCALL sw_64_function_ok_for_sibcall
++#undef TARGET_CANNOT_COPY_INSN_P
++#define TARGET_CANNOT_COPY_INSN_P sw_64_cannot_copy_insn_p
++#undef TARGET_LEGITIMATE_CONSTANT_P
++#define TARGET_LEGITIMATE_CONSTANT_P sw_64_legitimate_constant_p
++#undef TARGET_CANNOT_FORCE_CONST_MEM
++#define TARGET_CANNOT_FORCE_CONST_MEM sw_64_cannot_force_const_mem
++
++#undef TARGET_ASM_OUTPUT_MI_THUNK
++#define TARGET_ASM_OUTPUT_MI_THUNK sw_64_output_mi_thunk_osf
++#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
++#define TARGET_ASM_CAN_OUTPUT_MI_THUNK					 \
++  hook_bool_const_tree_hwi_hwi_const_tree_true
++#undef TARGET_STDARG_OPTIMIZE_HOOK
++#define TARGET_STDARG_OPTIMIZE_HOOK sw_64_stdarg_optimize_hook
++
++#undef TARGET_PRINT_OPERAND
++#define TARGET_PRINT_OPERAND sw_64_print_operand
++#undef TARGET_PRINT_OPERAND_ADDRESS
++#define TARGET_PRINT_OPERAND_ADDRESS sw_64_print_operand_address
++#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
++#define TARGET_PRINT_OPERAND_PUNCT_VALID_P sw_64_print_operand_punct_valid_p
++
++/* Use 16-bits anchor.  */
++#undef TARGET_MIN_ANCHOR_OFFSET
++#define TARGET_MIN_ANCHOR_OFFSET -0x7fff - 1
++#undef TARGET_MAX_ANCHOR_OFFSET
++#define TARGET_MAX_ANCHOR_OFFSET 0x7fff
++#undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
++#define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
++
++#undef TARGET_REGISTER_MOVE_COST
++#define TARGET_REGISTER_MOVE_COST sw_64_register_move_cost
++#undef TARGET_MEMORY_MOVE_COST
++#define TARGET_MEMORY_MOVE_COST sw_64_memory_move_cost
++#undef TARGET_RTX_COSTS
++#define TARGET_RTX_COSTS sw_64_rtx_costs
++#undef TARGET_ADDRESS_COST
++#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
++
++#undef TARGET_MACHINE_DEPENDENT_REORG
++#define TARGET_MACHINE_DEPENDENT_REORG sw_64_reorg
++
++#undef TARGET_PROMOTE_FUNCTION_MODE
++#define TARGET_PROMOTE_FUNCTION_MODE					   \
++  default_promote_function_mode_always_promote
++#undef TARGET_PROMOTE_PROTOTYPES
++#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_false
++
++#undef TARGET_FUNCTION_VALUE
++#define TARGET_FUNCTION_VALUE sw_64_function_value
++#undef TARGET_LIBCALL_VALUE
++#define TARGET_LIBCALL_VALUE sw_64_libcall_value
++#undef TARGET_FUNCTION_VALUE_REGNO_P
++#define TARGET_FUNCTION_VALUE_REGNO_P sw_64_function_value_regno_p
++#undef TARGET_RETURN_IN_MEMORY
++#define TARGET_RETURN_IN_MEMORY sw_64_return_in_memory
++#undef TARGET_PASS_BY_REFERENCE
++#define TARGET_PASS_BY_REFERENCE sw_64_pass_by_reference
++#undef TARGET_SETUP_INCOMING_VARARGS
++#define TARGET_SETUP_INCOMING_VARARGS sw_64_setup_incoming_varargs
++#undef TARGET_STRICT_ARGUMENT_NAMING
++#define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
++#undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
++#define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
++#undef TARGET_SPLIT_COMPLEX_ARG
++#define TARGET_SPLIT_COMPLEX_ARG sw_64_split_complex_arg
++#undef TARGET_GIMPLIFY_VA_ARG_EXPR
++#define TARGET_GIMPLIFY_VA_ARG_EXPR sw_64_gimplify_va_arg
++#undef TARGET_ARG_PARTIAL_BYTES
++#define TARGET_ARG_PARTIAL_BYTES sw_64_arg_partial_bytes
++#undef TARGET_FUNCTION_ARG
++#define TARGET_FUNCTION_ARG sw_64_function_arg
++#undef TARGET_FUNCTION_ARG_ADVANCE
++#define TARGET_FUNCTION_ARG_ADVANCE sw_64_function_arg_advance
++#undef TARGET_TRAMPOLINE_INIT
++#define TARGET_TRAMPOLINE_INIT sw_64_trampoline_init
++
++#undef TARGET_INSTANTIATE_DECLS
++#define TARGET_INSTANTIATE_DECLS sw_64_instantiate_decls
++
++#undef TARGET_SECONDARY_RELOAD
++#define TARGET_SECONDARY_RELOAD sw_64_secondary_reload
++#undef TARGET_SECONDARY_MEMORY_NEEDED
++#define TARGET_SECONDARY_MEMORY_NEEDED sw_64_secondary_memory_needed
++#undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
++#define TARGET_SECONDARY_MEMORY_NEEDED_MODE sw_64_secondary_memory_needed_mode
++
++#undef TARGET_SCALAR_MODE_SUPPORTED_P
++#define TARGET_SCALAR_MODE_SUPPORTED_P sw_64_scalar_mode_supported_p
++#undef TARGET_VECTOR_MODE_SUPPORTED_P
++#define TARGET_VECTOR_MODE_SUPPORTED_P sw_64_vector_mode_supported_p
++
++#undef TARGET_BUILD_BUILTIN_VA_LIST
++#define TARGET_BUILD_BUILTIN_VA_LIST sw_64_build_builtin_va_list
++
++#undef TARGET_EXPAND_BUILTIN_VA_START
++#define TARGET_EXPAND_BUILTIN_VA_START sw_64_va_start
++
++#undef TARGET_OPTION_OVERRIDE
++#define TARGET_OPTION_OVERRIDE sw_64_option_override
++
++#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
++#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE sw_64_override_options_after_change
++
++#ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
++#undef TARGET_MANGLE_TYPE
++#define TARGET_MANGLE_TYPE sw_64_mangle_type
++#endif
++
++#undef TARGET_LRA_P
++#define TARGET_LRA_P hook_bool_void_false
++
++#undef TARGET_LEGITIMATE_ADDRESS_P
++#define TARGET_LEGITIMATE_ADDRESS_P sw_64_legitimate_address_p
++
++#undef TARGET_CONDITIONAL_REGISTER_USAGE
++#define TARGET_CONDITIONAL_REGISTER_USAGE sw_64_conditional_register_usage
++
++#undef TARGET_CANONICALIZE_COMPARISON
++#define TARGET_CANONICALIZE_COMPARISON sw_64_canonicalize_comparison
++
++#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
++#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sw_64_atomic_assign_expand_fenv
++
++#undef TARGET_HARD_REGNO_MODE_OK
++#define TARGET_HARD_REGNO_MODE_OK sw_64_hard_regno_mode_ok
++#undef TARGET_SLOW_UNALIGNED_ACCESS
++#define TARGET_SLOW_UNALIGNED_ACCESS sw_64_slow_unaligned_access
++#undef TARGET_MODES_TIEABLE_P
++#define TARGET_MODES_TIEABLE_P sw_64_modes_tieable_p
++
++#undef TARGET_CAN_CHANGE_MODE_CLASS
++#define TARGET_CAN_CHANGE_MODE_CLASS sw_64_can_change_mode_class
++
++#undef TARGET_SCHED_MACRO_FUSION_P
++#define TARGET_SCHED_MACRO_FUSION_P sw_64_macro_fusion_p
++
++#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
++#define TARGET_SCHED_MACRO_FUSION_PAIR_P sw_64_macro_fusion_pair_p
++#undef TARGET_ASAN_SHADOW_OFFSET
++#define TARGET_ASAN_SHADOW_OFFSET sw_64_asan_shadow_offset
++
++struct gcc_target targetm = TARGET_INITIALIZER;
++
++#include "gt-sw-64.h"
+diff --git a/gcc/config/sw_64/sw_64.h b/gcc/config/sw_64/sw_64.h
+new file mode 100644
+index 000000000..8e3bb0241
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64.h
+@@ -0,0 +1,999 @@
++/* Definitions of target machine for GNU compiler, for Sw_64.
++   Copyright (C) 1992-2020 Free Software Foundation, Inc.
++   Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++You should have received a copy of the GNU General Public License
++along with GCC; see the file COPYING3.  If not see
++<http://www.gnu.org/licenses/>.  */
++
++/* Target CPU builtins.  */
++#define TARGET_CPU_CPP_BUILTINS()					      \
++  do									   \
++    {									  \
++      builtin_define ("__sw_64");					      \
++      builtin_define ("__sw_64__");					    \
++      builtin_assert ("cpu=sw_64");					    \
++      builtin_assert ("machine=sw_64");					\
++      if (TARGET_CIX)							  \
++	{								      \
++	  builtin_define ("__sw_64_cix__");				    \
++	  builtin_assert ("cpu=cix");					  \
++	}								      \
++      if (TARGET_FIX)							  \
++	{								      \
++	  builtin_define ("__sw_64_fix__");				    \
++	  builtin_assert ("cpu=fix");					  \
++	}								      \
++      if (TARGET_BWX)							  \
++	{								      \
++	  builtin_define ("__sw_64_bwx__");				    \
++	  builtin_assert ("cpu=bwx");					  \
++	}								      \
++      if (TARGET_MAX)							  \
++	{								      \
++	  builtin_define ("__sw_64_max__");				    \
++	  builtin_assert ("cpu=max");					  \
++	}								      \
++      if (sw_64_cpu_string)						    \
++	{								      \
++	  if (strcmp (sw_64_cpu_string, "sw6a") == 0)			  \
++	    {								  \
++	      builtin_define ("__sw_64_sw6a__");			       \
++	      builtin_assert ("cpu=sw6a");				     \
++	    }								  \
++	  else if (strcmp (sw_64_cpu_string, "sw6b") == 0)		     \
++	    {								  \
++	      builtin_define ("__sw_64_sw6b__");			       \
++	      builtin_assert ("cpu=sw6b");				     \
++	    }								  \
++	  else if (strcmp (sw_64_cpu_string, "sw8a") == 0)		     \
++	    {								  \
++	      builtin_define ("__sw_64_sw8a__");			       \
++	      builtin_assert ("cpu=sw8a");				     \
++	    }								  \
++	}								      \
++      else /* Presumably sw6b.  */					     \
++	{								      \
++	  builtin_define ("__sw_64_sw6b__");				   \
++	  builtin_assert ("cpu=sw6b");					 \
++	}								      \
++      if (TARGET_IEEE || TARGET_IEEE_WITH_INEXACT)			     \
++	builtin_define ("_IEEE_FP");					   \
++      if (TARGET_IEEE_WITH_INEXACT)					    \
++	builtin_define ("_IEEE_FP_INEXACT");				   \
++      if (TARGET_LONG_DOUBLE_128)					      \
++	builtin_define ("__LONG_DOUBLE_128__");				\
++									       \
++      /* Macros dependent on the C dialect.  */				\
++      SUBTARGET_LANGUAGE_CPP_BUILTINS ();				      \
++    }									  \
++  while (0)
++
++#ifndef SUBTARGET_LANGUAGE_CPP_BUILTINS
++#define SUBTARGET_LANGUAGE_CPP_BUILTINS()				      \
++  do									   \
++    {									  \
++      if (preprocessing_asm_p ())					      \
++	builtin_define_std ("LANGUAGE_ASSEMBLY");			      \
++      else if (c_dialect_cxx ())					       \
++	{								      \
++	  builtin_define ("__LANGUAGE_C_PLUS_PLUS");			   \
++	  builtin_define ("__LANGUAGE_C_PLUS_PLUS__");			 \
++	}								      \
++      else								     \
++	builtin_define_std ("LANGUAGE_C");				     \
++      if (c_dialect_objc ())						   \
++	{								      \
++	  builtin_define ("__LANGUAGE_OBJECTIVE_C");			   \
++	  builtin_define ("__LANGUAGE_OBJECTIVE_C__");			 \
++	}								      \
++    }									  \
++  while (0)
++#endif
++
++/* Run-time compilation parameters selecting different hardware subsets.  */
++
++/* Which processor to schedule for.  The cpu attribute defines a list that
++   mirrors this list, so changes to sw_64.md must be made at the same time.  */
++
++enum processor_type
++{
++  PROCESSOR_SW6, /* SW6 */
++  PROCESSOR_SW8, /* SW8 */
++  PROCESSOR_MAX
++};
++
++extern enum processor_type sw_64_cpu;
++extern enum processor_type sw_64_tune;
++
++enum sw_64_trap_precision
++{
++  SW_64_TP_PROG, /* No precision (default).  */
++  SW_64_TP_FUNC, /* Trap contained within originating function.  */
++  SW_64_TP_INSN  /* Instruction accuracy and code is resumption safe.  */
++};
++
++enum sw_64_fp_rounding_mode
++{
++  SW_64_FPRM_NORM, /* Normal rounding mode.  */
++  SW_64_FPRM_MINF, /* Round towards minus-infinity.  */
++  SW_64_FPRM_CHOP, /* Chopped rounding mode (towards 0).  */
++  SW_64_FPRM_DYN   /* Dynamic rounding mode.  */
++};
++
++enum sw_64_fp_trap_mode
++{
++  SW_64_FPTM_N,		/* Normal trap mode.  */
++  SW_64_FPTM_U,		/* Underflow traps enabled.  */
++  SW_64_FPTM_SU,	/* Software completion, w/underflow traps.  */
++  SW_64_FPTM_SUI	/* Software completion, w/underflow & inexact traps.  */
++};
++
++extern enum sw_64_trap_precision sw_64_tp;
++extern enum sw_64_fp_rounding_mode sw_64_fprm;
++extern enum sw_64_fp_trap_mode sw_64_fptm;
++
++/* Invert the easy way to make options work.  */
++#define TARGET_FP (!TARGET_SOFT_FP)
++
++/* Macros to silence warnings about numbers being signed in traditional
++ *    C and unsigned in ISO C when compiled on 32-bit hosts.  */
++
++#define BITMASK_HIGH (((unsigned long) 1) << 31) /* 0x80000000.  */
++
++/* These are for target os support and cannot be changed at runtime.  */
++#define TARGET_ABI_OPEN_VMS 0
++#define TARGET_ABI_OSF 1
++
++#ifndef TARGET_CAN_FAULT_IN_PROLOGUE
++#define TARGET_CAN_FAULT_IN_PROLOGUE 0
++#endif
++#ifndef TARGET_HAS_XFLOATING_LIBS
++#define TARGET_HAS_XFLOATING_LIBS TARGET_LONG_DOUBLE_128
++#endif
++#ifndef TARGET_PROFILING_NEEDS_GP
++#define TARGET_PROFILING_NEEDS_GP 0
++#endif
++#ifndef HAVE_AS_TLS
++#define HAVE_AS_TLS 0
++#endif
++
++#define TARGET_DEFAULT MASK_FPREGS
++
++#ifndef TARGET_CPU_DEFAULT
++#define TARGET_CPU_DEFAULT 0
++#endif
++
++#ifndef TARGET_DEFAULT_EXPLICIT_RELOCS
++#ifdef HAVE_AS_EXPLICIT_RELOCS
++#define TARGET_DEFAULT_EXPLICIT_RELOCS MASK_EXPLICIT_RELOCS
++#define TARGET_SUPPORT_ARCH 1
++#else
++#define TARGET_DEFAULT_EXPLICIT_RELOCS 0
++#endif
++#endif
++
++#ifndef TARGET_SUPPORT_ARCH
++#define TARGET_SUPPORT_ARCH 0
++#endif
++
++/* Support for a compile-time default CPU, et cetera.  The rules are:
++   --with-cpu is ignored if -mcpu is specified.
++   --with-tune is ignored if -mtune is specified.  */
++#define OPTION_DEFAULT_SPECS						   \
++  {"cpu", "%{!mcpu=*:-mcpu=%(VALUE)}"},					\
++  {									    \
++    "tune", "%{!mtune=*:-mtune=%(VALUE)}"				      \
++  }
++
++/* target machine storage layout */
++
++
++/* Define the size of `int'.  The default is the same as the word size.  */
++#define INT_TYPE_SIZE 32
++
++#define LONG_TYPE_SIZE (TARGET_SW_M32 ? 32 : 64)
++
++/* Define the size of `long long'.  The default is the twice the word size.  */
++#define LONG_LONG_TYPE_SIZE 64
++
++/* The two floating-point formats we support are S-floating, which is
++   4 bytes, and T-floating, which is 8 bytes.  `float' is S and `double'
++   and `long double' are T.  */
++
++#define FLOAT_TYPE_SIZE 32
++#define DOUBLE_TYPE_SIZE 64
++#define LONG_DOUBLE_TYPE_SIZE (TARGET_LONG_DOUBLE_128 ? 128 : 64)
++
++/* Work around target_flags dependency in ada/targtyps.c.  */
++#define WIDEST_HARDWARE_FP_SIZE 64
++
++#define WCHAR_TYPE "unsigned int"
++#define WCHAR_TYPE_SIZE 32
++
++/* Define this macro if it is advisable to hold scalars in registers
++   in a wider mode than that declared by the program.  In such cases,
++   the value is constrained to be within the bounds of the declared
++   type, but kept valid in the wider mode.  The signedness of the
++   extension may differ from that of the type.
++
++   For Sw_64, we always store objects in a full register.  32-bit integers
++   are always sign-extended, but smaller objects retain their signedness.
++
++   Note that small vector types can get mapped onto integer modes at the
++   whim of not appearing in sw_64-modes.def.  We never promoted these
++   values before; don't do so now that we've trimmed the set of modes to
++   those actually implemented in the backend.  */
++
++#define PROMOTE_MODE(MODE, UNSIGNEDP, TYPE)				    \
++  if (!TARGET_SW_M32							   \
++      && (GET_MODE_CLASS (MODE) == MODE_INT				    \
++	  && (TYPE == NULL || TREE_CODE (TYPE) != VECTOR_TYPE)		 \
++	  && GET_MODE_SIZE (MODE) < UNITS_PER_WORD))			   \
++    {									  \
++      if ((MODE) == SImode)						    \
++	(UNSIGNEDP) = 0;						       \
++      (MODE) = DImode;							 \
++    }
++
++/* Define this if most significant bit is lowest numbered
++   in instructions that operate on numbered bit-fields.
++
++   There are no such instructions on the Sw_64, but the documentation
++   is little endian.  */
++#define BITS_BIG_ENDIAN 0
++
++/* Define this if most significant byte of a word is the lowest numbered.
++   This is false on the Sw_64.  */
++#define BYTES_BIG_ENDIAN 0
++
++/* Define this if most significant word of a multiword number is lowest
++   numbered.
++
++   For Sw_64 we can decide arbitrarily since there are no machine instructions
++   for them.  Might as well be consistent with bytes.  */
++#define WORDS_BIG_ENDIAN 0
++
++/* Width of a word, in units (bytes).  */
++#define UNITS_PER_WORD 8
++
++/* Width in bits of a pointer.
++   See also the macro `Pmode' defined below.  */
++#define POINTER_SIZE (TARGET_SW_M32 ? 32 : 64)
++
++/* Allocation boundary (in *bits*) for storing arguments in argument list.  */
++#define PARM_BOUNDARY 64
++
++/* Boundary (in *bits*) on which stack pointer should be aligned.  */
++#define STACK_BOUNDARY ((TARGET_SW_SIMD || TARGET_SW_32ALIGN) ? 256 : 128)
++
++/* Allocation boundary (in *bits*) for the code of a function.  */
++#define FUNCTION_BOUNDARY 32
++
++/* Alignment of field after `int : 0' in a structure.  */
++#define EMPTY_FIELD_BOUNDARY 64
++
++/* Every structure's size must be a multiple of this.  */
++#define STRUCTURE_SIZE_BOUNDARY 8
++
++/* A bit-field declared as `int' forces `int' alignment for the struct.  */
++#undef PCC_BITFILED_TYPE_MATTERS
++#define PCC_BITFIELD_TYPE_MATTERS 1
++
++/* No data type wants to be aligned rounder than this.  */
++#define BIGGEST_ALIGNMENT ((TARGET_SW_SIMD || TARGET_SW_32ALIGN) ? 256 : 128)
++/* For atomic access to objects, must have at least 32-bit alignment
++   unless the machine has byte operations.  */
++#define MINIMUM_ATOMIC_ALIGNMENT ((unsigned int) (TARGET_BWX ? 8 : 32))
++
++/* Align all constants and variables to at least a word boundary so
++   we can pick up pieces of them faster.  */
++/* ??? Only if block-move stuff knows about different source/destination
++   alignment.  */
++#if 0
++#define DATA_ALIGNMENT(EXP, ALIGN) MAX ((ALIGN), BITS_PER_WORD)
++#endif
++
++/* Set this nonzero if move instructions will actually fail to work
++   when given unaligned data.
++
++   Since we get an error message when we do one, call them invalid.  */
++
++#define STRICT_ALIGNMENT 1
++
++#define SW64_EXPAND_ALIGNMENT(COND, EXP, ALIGN)				\
++  (((COND) && ((ALIGN) < BITS_PER_WORD)					\
++    && (TREE_CODE (EXP) == ARRAY_TYPE || TREE_CODE (EXP) == UNION_TYPE	 \
++	|| TREE_CODE (EXP) == RECORD_TYPE))				    \
++     ? BITS_PER_WORD							   \
++     : (ALIGN))
++
++/* Similarly, make sure that objects on the stack are sensibly aligned.  */
++#define LOCAL_ALIGNMENT(EXP, ALIGN)					    \
++  SW64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
++
++/* Standard register usage.  */
++
++/* Number of actual hardware registers.
++   The hardware registers are assigned numbers for the compiler
++   from 0 to just below FIRST_PSEUDO_REGISTER.
++   All registers that the compiler knows about must be given numbers,
++   even those that are not normally considered general registers.
++
++   We define all 32 integer registers, even though $31 is always zero,
++   and all 32 floating-point registers, even though $f31 is also
++   always zero.  We do not bother defining the FP status register and
++   there are no other registers.
++
++   Since $31 is always zero, we will use register number 31 as the
++   argument pointer.  It will never appear in the generated code
++   because we will always be eliminating it in favor of the stack
++   pointer or hardware frame pointer.
++
++   Likewise, we use $f31 for the frame pointer, which will always
++   be eliminated in favor of the hardware frame pointer or the
++   stack pointer.  */
++
++#define FIRST_PSEUDO_REGISTER 64
++
++/* 1 for registers that have pervasive standard uses
++   and are not available for the register allocator.  */
++
++#define FIXED_REGISTERS							\
++  {									    \
++    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, \
++      0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  \
++      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1			      \
++  }
++
++/* 1 for registers not available across function calls.
++   These must include the FIXED_REGISTERS and also any
++   registers that can be used without being saved.
++   The latter must include the registers where values are returned
++   and the register where structure-value addresses are passed.
++   Aside from that, you can include as many other registers as you like.  */
++#define CALL_USED_REGISTERS						    \
++  {									    \
++    1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, \
++      1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1,  \
++      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1			      \
++  }
++
++/* List the order in which to allocate registers.  Each register must be
++   listed once, even those in FIXED_REGISTERS.  */
++
++#define REG_ALLOC_ORDER							\
++  {									    \
++    1, 2, 3, 4, 5, 6, 7, 8,   /* nonsaved integer registers */		 \
++      22, 23, 24, 25, 28,     /* likewise */				   \
++      0,		      /* likewise, but return value */		 \
++      21, 20, 19, 18, 17, 16, /* likewise, but input args */		   \
++      27,		      /* likewise, but SYSV procedure value */	 \
++									       \
++      42, 43, 44, 45, 46, 47, /* nonsaved floating-point registers */	  \
++      54, 55, 56, 57, 58, 59, /* likewise */				   \
++      60, 61, 62,	     /* likewise */				   \
++      32, 33,		      /* likewise, but return values */		\
++      53, 52, 51, 50, 49, 48, /* likewise, but input args */		   \
++									       \
++      9, 10, 11, 12, 13, 14, /* saved integer registers */		     \
++      26,		     /* return address */			      \
++      15,		     /* hard frame pointer */			  \
++									       \
++      34, 35, 36, 37, 38, 39, /* saved floating-point registers */	     \
++      40, 41,		      /* likewise */				   \
++									       \
++      29, 30, 31, 63 /* gp, sp, ap, sfp */				     \
++  }
++
++/* Specify the registers used for certain standard purposes.
++   The values of these macros are register numbers.  */
++
++/* Sw_64 pc isn't overloaded on a register that the compiler knows about.  */
++/* #define PC_REGNUM  */
++
++/* Register to use for pushing function arguments.  */
++#define STACK_POINTER_REGNUM 30
++
++/* Base register for access to local variables of the function.  */
++#define HARD_FRAME_POINTER_REGNUM 15
++
++/* Base register for access to arguments of the function.  */
++#define ARG_POINTER_REGNUM 31
++
++/* Base register for access to local variables of function.  */
++#define FRAME_POINTER_REGNUM 63
++
++/* Register in which static-chain is passed to a function.
++
++   For the Sw_64, this is based on an example; the calling sequence
++   doesn't seem to specify this.  */
++#define STATIC_CHAIN_REGNUM 1
++
++/* The register number of the register used to address a table of
++   static data addresses in memory.  */
++#define PIC_OFFSET_TABLE_REGNUM 29
++
++/* Define this macro if the register defined by `PIC_OFFSET_TABLE_REGNUM'
++   is clobbered by calls.  */
++/* ??? It is and it isn't.  It's required to be valid for a given
++   function when the function returns.  It isn't clobbered by
++   current_file functions.  Moreover, we do not expose the ldgp
++   until after reload, so we're probably safe.  */
++/* #define PIC_OFFSET_TABLE_REG_CALL_CLOBBERED */
++
++/* Define the classes of registers for register constraints in the
++   machine description.  Also define ranges of constants.
++
++   One of the classes must always be named ALL_REGS and include all hard regs.
++   If there is more than one class, another class must be named NO_REGS
++   and contain no registers.
++
++   The name GENERAL_REGS must be the name of a class (or an alias for
++   another name such as ALL_REGS).  This is the class of registers
++   that is allowed by "g" or "r" in a register constraint.
++   Also, registers outside this class are allocated only when
++   instructions express preferences for them.
++
++   The classes must be numbered in nondecreasing order; that is,
++   a larger-numbered class must never be contained completely
++   in a smaller-numbered class.
++
++   For any two classes, it is very desirable that there be another
++   class that represents their union.  */
++
++enum reg_class
++{
++  NO_REGS,
++  R0_REG,
++  R24_REG,
++  R25_REG,
++  R27_REG,
++  GENERAL_REGS,
++  FLOAT_REGS,
++  ALL_REGS,
++  LIM_REG_CLASSES
++};
++
++#define N_REG_CLASSES (int) LIM_REG_CLASSES
++
++/* Give names of register classes as strings for dump file.  */
++
++#define REG_CLASS_NAMES							\
++  {									    \
++    "NO_REGS", "R0_REG", "R24_REG", "R25_REG", "R27_REG", "GENERAL_REGS",      \
++      "FLOAT_REGS", "ALL_REGS"						 \
++  }
++
++/* Define which registers fit in which classes.
++   This is an initializer for a vector of HARD_REG_SET
++   of length N_REG_CLASSES.  */
++
++#define REG_CLASS_CONTENTS						     \
++  {									    \
++    {0x00000000, 0x00000000},   /* NO_REGS */				  \
++      {0x00000001, 0x00000000}, /* R0_REG */				   \
++      {0x01000000, 0x00000000}, /* R24_REG */				  \
++      {0x02000000, 0x00000000}, /* R25_REG */				  \
++      {0x08000000, 0x00000000}, /* R27_REG */				  \
++      {0xffffffff, 0x80000000}, /* GENERAL_REGS */			     \
++      {0x00000000, 0x7fffffff}, /* FLOAT_REGS */			       \
++    {									  \
++      0xffffffff, 0xffffffff						   \
++    }									  \
++  }
++
++/* The same information, inverted:
++   Return the class number of the smallest class containing
++   reg number REGNO.  This could be a conditional expression
++   or could index an array.  */
++
++#define REGNO_REG_CLASS(REGNO)						 \
++  ((REGNO) == 0								\
++     ? R0_REG								  \
++     : (REGNO) == 24							   \
++	 ? R24_REG							     \
++	 : (REGNO) == 25						       \
++	     ? R25_REG							 \
++	     : (REGNO) == 27						   \
++		 ? R27_REG						     \
++		 : IN_RANGE ((REGNO), 32, 62) ? FLOAT_REGS : GENERAL_REGS)
++
++/* The class value for index registers, and the one for base regs.  */
++#define INDEX_REG_CLASS NO_REGS
++#define BASE_REG_CLASS GENERAL_REGS
++
++/* Given an rtx X being reloaded into a reg required to be
++   in class CLASS, return the class of reg to actually use.
++   In general this is just CLASS; but on some machines
++   in some cases it is preferable to use a more restrictive class.  */
++
++#define PREFERRED_RELOAD_CLASS sw_64_preferred_reload_class
++
++/* Provide the cost of a branch.  Exact meaning under development.  */
++#define BRANCH_COST(speed_p, predictable_p) 5
++
++/* Stack layout; function entry, exit and calling.  */
++
++/* Define this if pushing a word on the stack
++   makes the stack pointer a smaller address.  */
++#define STACK_GROWS_DOWNWARD 1
++
++/* Define this to nonzero if the nominal address of the stack frame
++   is at the high-address end of the local variables;
++   that is, each additional local variable allocated
++   goes at a more negative offset in the frame.  */
++//#define FRAME_GROWS_DOWNWARD SW_64_ENABLE_ASAN
++#define FRAME_GROWS_DOWNWARD 1
++
++/* If we generate an insn to push BYTES bytes,
++   this says how many the stack pointer really advances by.
++   On Sw_64, don't define this because there are no push insns.  */
++/*  #define PUSH_ROUNDING(BYTES) */
++
++/* Define this to be nonzero if stack checking is built into the ABI.  */
++#define STACK_CHECK_BUILTIN 1
++
++/* Define this if the maximum size of all the outgoing args is to be
++   accumulated and pushed during the prologue.  The amount can be
++   found in the variable crtl->outgoing_args_size.  */
++#define ACCUMULATE_OUTGOING_ARGS 1
++
++/* Offset of first parameter from the argument pointer register value.  */
++
++#define FIRST_PARM_OFFSET(FNDECL) 0
++
++/* Definitions for register eliminations.
++
++   We have two registers that can be eliminated on the Sw_64.  First, the
++   frame pointer register can often be eliminated in favor of the stack
++   pointer register.  Secondly, the argument pointer register can always be
++   eliminated; it is replaced with either the stack or frame pointer.  */
++
++/* This is an array of structures.  Each structure initializes one pair
++   of eliminable registers.  The "from" register number is given first,
++   followed by "to".  Eliminations of the same "from" register are listed
++   in order of preference.  */
++
++#define ELIMINABLE_REGS							\
++  {									    \
++    {ARG_POINTER_REGNUM, STACK_POINTER_REGNUM},				\
++      {ARG_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM},			 \
++      {FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},			    \
++    {									  \
++      FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM			  \
++    }									  \
++  }
++
++/* Round up to a multiple of 16 bytes.  */
++#define SW_64_ROUND(X)							 \
++  ((TARGET_SW_32ALIGN || TARGET_SW_SIMD) ? ROUND_UP ((X), 32)		  \
++					 : ROUND_UP ((X), 16))
++
++/* Define the offset between two registers, one to be eliminated, and the other
++   its replacement, at the start of a routine.  */
++#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			   \
++  ((OFFSET) = sw_64_initial_elimination_offset (FROM, TO))
++
++/* Define this if stack space is still allocated for a parameter passed
++   in a register.  */
++/* #define REG_PARM_STACK_SPACE */
++
++/* 1 if N is a possible register number for function argument passing.
++   On Sw_64, these are $16-$21 and $f16-$f21.  */
++
++#define FUNCTION_ARG_REGNO_P(N)						\
++  (IN_RANGE ((N), 16, 21) || ((N) >= 16 + 32 && (N) <= 21 + 32))
++
++/* Define a data type for recording info about an argument list
++   during the scan of that argument list.  This data type should
++   hold all necessary information about the function itself
++   and about the args processed so far, enough to enable macros
++   such as FUNCTION_ARG to determine where the next arg should go.
++
++   On Sw_64, this is a single integer, which is a number of words
++   of arguments scanned so far.
++   Thus 6 or more means all following args should go on the stack.  */
++
++#define CUMULATIVE_ARGS int
++
++/* Initialize a variable CUM of type CUMULATIVE_ARGS
++   for a call to a function whose data type is FNTYPE.
++   For a library call, FNTYPE is 0.  */
++
++#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, INDIRECT, N_NAMED_ARGS)     \
++  (CUM) = 0
++
++/* Define intermediate macro to compute
++   the size (in registers) of an argument.  */
++
++#define SW_64_ARG_SIZE(MODE, TYPE)					     \
++  ((MODE) == TFmode || (MODE) == TCmode					\
++     ? 1								       \
++     : CEIL (((MODE) == BLKmode ? int_size_in_bytes (TYPE)		     \
++				: GET_MODE_SIZE (MODE)),		       \
++	     UNITS_PER_WORD))
++
++/* Make (or fake) .linkage entry for function call.
++   IS_LOCAL is 0 if name is used in call, 1 if name is used in definition.  */
++
++/* This macro defines the start of an assembly comment.  */
++
++#define ASM_COMMENT_START " #"
++
++/* This macro produces the initial definition of a function.  */
++
++#undef ASM_DECLARE_FUNCTION_NAME
++#define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			    \
++  sw_64_start_function (FILE, NAME, DECL);
++
++/* This macro closes up a function definition for the assembler.  */
++
++#undef ASM_DECLARE_FUNCTION_SIZE
++#define ASM_DECLARE_FUNCTION_SIZE(FILE, NAME, DECL)			    \
++  sw_64_end_function (FILE, NAME, DECL)
++
++/* Output any profiling code before the prologue.  */
++
++#define PROFILE_BEFORE_PROLOGUE 1
++
++/* Never use profile counters.  */
++
++#define NO_PROFILE_COUNTERS 1
++
++/* Output assembler code to FILE to increment profiler label # LABELNO
++   for profiling a function entry.  Under SYSV, profiling is enabled
++   by simply passing -pg to the assembler and linker.  */
++
++#define FUNCTION_PROFILER(FILE, LABELNO)
++
++/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
++   the stack pointer does not matter.  The value is tested only in
++   functions that have frame pointers.
++   No definition is equivalent to always zero.  */
++
++#define EXIT_IGNORE_STACK 1
++
++/* Define registers used by the epilogue and return instruction.  */
++
++#define EPILOGUE_USES(REGNO) ((REGNO) == 26)
++
++/* Length in units of the trampoline for entering a nested function.  */
++
++#define TRAMPOLINE_SIZE 32
++
++/* The alignment of a trampoline, in bits.  */
++
++#define TRAMPOLINE_ALIGNMENT 64
++
++/* A C expression whose value is RTL representing the value of the return
++   address for the frame COUNT steps up from the current frame.
++   FRAMEADDR is the frame pointer of the COUNT frame, or the frame pointer of
++   the COUNT-1 frame if RETURN_ADDR_IN_PREVIOUS_FRAME is defined.  */
++
++#define RETURN_ADDR_RTX sw_64_return_addr
++
++/* Provide a definition of DWARF_FRAME_REGNUM here so that fallback unwinders
++   can use DWARF_ALT_FRAME_RETURN_COLUMN defined below.  This is just the same
++   as the default definition in dwarf2out.c.  */
++#undef DWARF_FRAME_REGNUM
++#define DWARF_FRAME_REGNUM(REG) DBX_REGISTER_NUMBER (REG)
++
++/* Before the prologue, RA lives in $26.  */
++#define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, 26)
++#define DWARF_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (26)
++#define DWARF_ALT_FRAME_RETURN_COLUMN DWARF_FRAME_REGNUM (64)
++#define DWARF_ZERO_REG 31
++
++/* Describe how we implement __builtin_eh_return.  */
++#define EH_RETURN_DATA_REGNO(N) ((N) < 4 ? (N) + 16 : INVALID_REGNUM)
++#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, 28)
++#define EH_RETURN_HANDLER_RTX						  \
++  gen_rtx_MEM (Pmode, plus_constant (Pmode, stack_pointer_rtx,		 \
++				     crtl->outgoing_args_size))
++
++/* Addressing modes, and classification of registers for them.  */
++
++/* Macros to check register numbers against specific register classes.  */
++
++/* These assume that REGNO is a hard or pseudo reg number.
++   They give nonzero only if REGNO is a hard reg of the suitable class
++   or a pseudo reg currently allocated to a suitable hard reg.
++   Since they use reg_renumber, they are safe only once reg_renumber
++   has been allocated, which happens in reginfo.c during register
++   allocation.  */
++
++#define REGNO_OK_FOR_INDEX_P(REGNO) 0
++#define REGNO_OK_FOR_BASE_P(REGNO)					     \
++  ((REGNO) < 32 || (unsigned) reg_renumber[REGNO] < 32 || (REGNO) == 63	\
++   || reg_renumber[REGNO] == 63)
++
++/* Maximum number of registers that can appear in a valid memory address.  */
++#define MAX_REGS_PER_ADDRESS 1
++
++/* Recognize any constant value that is a valid address.  For the Sw_64,
++   there are only constants none since we want to use LDI to load any
++   symbolic addresses into registers.  */
++
++#define CONSTANT_ADDRESS_P(X)						  \
++  (CONST_INT_P (X) && ((UINTVAL (X) + 0x8000) < 0x10000))
++
++/* The macros REG_OK_FOR..._P assume that the arg is a REG rtx
++   and check its validity for a certain class.
++   We have two alternate definitions for each of them.
++   The usual definition accepts all pseudo regs; the other rejects
++   them unless they have been allocated suitable hard regs.
++   The symbol REG_OK_STRICT causes the latter definition to be used.
++
++   Most source files want to accept pseudo regs in the hope that
++   they will get allocated to the class that the insn wants them to be in.
++   Source files for reload pass need to be strict.
++   After reload, it makes no difference, since pseudo regs have
++   been eliminated by then.  */
++
++/* Nonzero if X is a hard reg that can be used as an index
++   or if it is a pseudo reg.  */
++#define REG_OK_FOR_INDEX_P(X) 0
++
++/* Nonzero if X is a hard reg that can be used as a base reg
++   or if it is a pseudo reg.  */
++#define NONSTRICT_REG_OK_FOR_BASE_P(X)					 \
++  (REGNO (X) < 32 || REGNO (X) == 63 || REGNO (X) >= FIRST_PSEUDO_REGISTER)
++
++/* ??? Nonzero if X is the frame pointer, or some virtual register
++   that may eliminate to the frame pointer.  These will be allowed to
++   have offsets greater than 32K.  This is done because register
++   elimination offsets will change the hi/lo split, and if we split
++   before reload, we will require additional instructions.  */
++#define NONSTRICT_REG_OK_FP_BASE_P(X)					  \
++  (REGNO (X) == 31 || REGNO (X) == 63					  \
++   || (REGNO (X) >= FIRST_PSEUDO_REGISTER				      \
++       && REGNO (X) < LAST_VIRTUAL_POINTER_REGISTER))
++
++/* Nonzero if X is a hard reg that can be used as a base reg.  */
++#define STRICT_REG_OK_FOR_BASE_P(X) REGNO_OK_FOR_BASE_P (REGNO (X))
++
++#ifdef REG_OK_STRICT
++#define REG_OK_FOR_BASE_P(X) STRICT_REG_OK_FOR_BASE_P (X)
++#else
++#define REG_OK_FOR_BASE_P(X) NONSTRICT_REG_OK_FOR_BASE_P (X)
++#endif
++
++/* Try a machine-dependent way of reloading an illegitimate address
++   operand.  If we find one, push the reload and jump to WIN.  This
++   macro is used in only one place: `find_reloads_address' in reload.c.  */
++
++#define LEGITIMIZE_RELOAD_ADDRESS(X, MODE, OPNUM, TYPE, IND_L, WIN)	    \
++  do									   \
++    {									  \
++      rtx new_x								\
++	= sw_64_legitimize_reload_address (X, MODE, OPNUM, TYPE, IND_L);       \
++      if (new_x)							       \
++	{								      \
++	  X = new_x;							   \
++	  goto WIN;							    \
++	}								      \
++    }									  \
++  while (0)
++
++/* Specify the machine mode that this machine uses
++   for the index in the tablejump instruction.  */
++#define CASE_VECTOR_MODE SImode
++
++/* Define as C expression which evaluates to nonzero if the tablejump
++   instruction expects the table to contain offsets from the address of the
++   table.
++
++   Do not define this if the table should contain absolute addresses.
++   On the Sw_64, the table is really GP-relative, not relative to the PC
++   of the table, but we pretend that it is PC-relative; this should be OK,
++   but we should try to find some better way sometime.  */
++#define CASE_VECTOR_PC_RELATIVE 1
++
++/* Define this as 1 if `char' should by default be signed; else as 0.  */
++#define DEFAULT_SIGNED_CHAR 1
++
++/* Max number of bytes we can move to or from memory
++   in one reasonably fast instruction.  */
++
++#define MOVE_MAX 8
++
++/* If a memory-to-memory move would take MOVE_RATIO or more simple
++   move-instruction pairs, we will do a movmem or libcall instead.
++
++   Without byte/word accesses, we want no more than four instructions;
++   with, several single byte accesses are better.  */
++
++#define MOVE_RATIO(speed) (TARGET_BWX ? 7 : 2)
++
++/* Largest number of bytes of an object that can be placed in a register.
++   On the Sw_64 we have plenty of registers, so use TImode.  */
++#define MAX_FIXED_MODE_SIZE GET_MODE_BITSIZE (TImode)
++
++/* Nonzero if access to memory by bytes is no faster than for words.
++   Also nonzero if doing byte operations (specifically shifts) in registers
++   is undesirable.
++
++   On the Sw_64, we want to not use the byte operation and instead use
++   masking operations to access fields; these will save instructions.  */
++
++#define SLOW_BYTE_ACCESS 1
++
++/* Define if operations between registers always perform the operation
++   on the full register even if a narrower mode is specified.  */
++#define WORD_REGISTER_OPERATIONS 1
++
++/* Define if loading in MODE, an integral mode narrower than BITS_PER_WORD
++   will either zero-extend or sign-extend.  The value of this macro should
++   be the code that says which one of the two operations is implicitly
++   done, UNKNOWN if none.  */
++#define LOAD_EXTEND_OP(MODE) ((MODE) == SImode ? SIGN_EXTEND : ZERO_EXTEND)
++
++/* Define if loading short immediate values into registers sign extends.  */
++#define SHORT_IMMEDIATES_SIGN_EXTEND 1
++
++/* The CIX ctlz and cttz instructions return 64 for zero.  */
++#define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)				 \
++  ((VALUE) = 64, TARGET_CIX ? 1 : 0)
++#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE)				 \
++  ((VALUE) = 64, TARGET_CIX ? 1 : 0)
++
++/* Define the value returned by a floating-point comparison instruction.  */
++
++#define FLOAT_STORE_FLAG_VALUE(MODE)					   \
++  REAL_VALUE_ATOF ((TARGET_FLOAT_VAX ? "0.5" : "2.0"), (MODE))
++
++/* Specify the machine mode that pointers have.
++   After generation of rtl, the compiler makes no further distinction
++   between pointers and any other objects of this machine mode.  */
++#define Pmode (TARGET_SW_M32 ? SImode : DImode)
++
++/* Mode of a function address in a call instruction (for indexing purposes).  */
++
++#define FUNCTION_MODE Pmode
++
++/* Define this if addresses of constant functions
++   shouldn't be put through pseudo regs where they can be cse'd.
++   Desirable on machines where ordinary constants are expensive
++   but a CALL with constant address is cheap.
++
++   We define this on the Sw_64 so that gen_call and gen_call_value
++   get to see the SYMBOL_REF (for the hint field of the jsr).  It will
++   then copy it into a register, thus actually letting the address be
++   cse'ed.  */
++
++#define NO_FUNCTION_CSE 1
++
++/* Define this to be nonzero if shift instructions ignore all but the low-order
++   few bits.  */
++#define SHIFT_COUNT_TRUNCATED 1
++
++/* Control the assembler format that we output.  */
++
++/* Output to assembler file text saying following lines
++   may contain character constants, extra white space, comments, etc.  */
++#define ASM_APP_ON (TARGET_EXPLICIT_RELOCS ? "\t.set\tmacro\n" : "")
++
++/* Output to assembler file text saying following lines
++   no longer contain unusual constructs.  */
++#define ASM_APP_OFF (TARGET_EXPLICIT_RELOCS ? "\t.set\tnomacro\n" : "")
++
++#define TEXT_SECTION_ASM_OP "\t.text"
++
++/* Output before writable data.  */
++
++#define DATA_SECTION_ASM_OP "\t.data"
++
++/* How to refer to registers in assembler output.
++   This sequence is indexed by compiler's hard-register-number (see above).  */
++
++#define REGISTER_NAMES							 \
++  {									    \
++    "$0", "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$9", "$10", "$11",  \
++      "$12", "$13", "$14", "$15", "$16", "$17", "$18", "$19", "$20", "$21",    \
++      "$22", "$23", "$24", "$25", "$26", "$27", "$28", "$29", "$30", "AP",     \
++      "$f0", "$f1", "$f2", "$f3", "$f4", "$f5", "$f6", "$f7", "$f8", "$f9",    \
++      "$f10", "$f11", "$f12", "$f13", "$f14", "$f15", "$f16", "$f17", "$f18",  \
++      "$f19", "$f20", "$f21", "$f22", "$f23", "$f24", "$f25", "$f26", "$f27",  \
++      "$f28", "$f29", "$f30", "FP"					     \
++  }
++
++/* Strip name encoding when emitting labels.  */
++
++#define ASM_OUTPUT_LABELREF(STREAM, NAME)				      \
++  do									   \
++    {									  \
++      const char *name_ = NAME;						\
++      if (*name_ == '@' || *name_ == '%')				      \
++	name_ += 2;							    \
++      if (*name_ == '*')						       \
++	name_++;							       \
++      else								     \
++	fputs (user_label_prefix, STREAM);				     \
++      fputs (name_, STREAM);						   \
++    }									  \
++  while (0)
++
++/* Globalizing directive for a label.  */
++#define GLOBAL_ASM_OP "\t.globl "
++
++/* Use dollar signs rather than periods in special g++ assembler names.  */
++
++#undef NO_DOLLAR_IN_LABEL
++
++/* This is how to store into the string LABEL
++   the symbol_ref name of an internal numbered label where
++   PREFIX is the class of label and NUM is the number within the class.
++   This is suitable for output with `assemble_name'.  */
++
++#undef ASM_GENERATE_INTERNAL_LABEL
++#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)			\
++  sprintf ((LABEL), "*$%s%ld", (PREFIX), (long) (NUM))
++
++/* This is how to output an element of a case-vector that is relative.  */
++
++#define ASM_OUTPUT_ADDR_DIFF_ELT(FILE, BODY, VALUE, REL)		       \
++  fprintf (FILE, "\t.gprel32 $L%d\n", (VALUE))
++
++/* If we use NM, pass -g to it so it only lists globals.  */
++#define NM_FLAGS "-pg"
++
++/* Definitions for debugging.  */
++
++/* Correct the offset of automatic variables and arguments.  Note that
++   the Sw_64 debug format wants all automatic variables and arguments
++   to be in terms of two different offsets from the virtual frame pointer,
++   which is the stack pointer before any adjustment in the function.
++   The offset for the argument pointer is fixed for the native compiler,
++   it is either zero (for the no arguments case) or large enough to hold
++   all argument registers.
++   The offset for the auto pointer is the fourth argument to the .frame
++   directive (local_offset).
++   To stay compatible with the native tools we use the same offsets
++   from the virtual frame pointer and adjust the debugger arg/auto offsets
++   accordingly.  These debugger offsets are set up in output_prolog.  */
++
++extern long sw_64_arg_offset;
++extern long sw_64_auto_offset;
++#define DEBUGGER_AUTO_OFFSET(X)						\
++  ((GET_CODE (X) == PLUS ? INTVAL (XEXP (X, 1)) : 0) + sw_64_auto_offset)
++#define DEBUGGER_ARG_OFFSET(OFFSET, X) (OFFSET + sw_64_arg_offset)
++
++#define ASM_OUTPUT_SOURCE_FILENAME(STREAM, NAME)			       \
++  sw_64_output_filename (STREAM, NAME)
++
++/* By default, turn on GDB extensions.  */
++#define DEFAULT_GDB_EXTENSIONS 1
++
++/* This version don't define SYSTEM_IMPLICIT_EXTERN_C Replace
++ * NO_IMPLICIT_EXTERN_C with SYSTEM_IMPLICIT_EXTERN_C.  */
++/* The system headers under Sw_64 systems are generally C++-aware.  */
++/*#define NO_IMPLICIT_EXTERN_C*/
++
++#define TARGET_SUPPORTS_WIDE_INT 1
++#define SW64_TARGET_SUPPORT_FPCR 1
++
++#define HAVE_POST_INCREMENT (TARGET_SW8A ? 1 : 0)
++#define HAVE_POST_DECREMENT (TARGET_SW8A ? 1 : 0)
++#define HAVE_POST_MODIFY_DISP (TARGET_SW8A ? 1 : 0)
++int
++enable_asan_check_stack ();
++#ifndef SW_64_ENABLE_ASAN
++#define SW_64_ENABLE_FULL_ASAN 1
++#else
++#undef SW_64_ENABLE_FULL_ASAN
++#define SW_64_ENABLE_ASAN 0
++#endif
++#define TARGET_CRC32 0
+diff --git a/gcc/config/sw_64/sw_64.md b/gcc/config/sw_64/sw_64.md
+new file mode 100644
+index 000000000..8cf036b10
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64.md
+@@ -0,0 +1,7865 @@
++;; Machine description for Sw_64 for GNU C compiler
++;; Copyright (C) 1992-2020 Free Software Foundation, Inc.
++;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++;;- See file "rtl.def" for documentation on define_insn, match_*, et. al.
++
++;; Uses of UNSPEC in this file:
++
++(define_c_enum "unspec" [
++  UNSPEC_XFLT_COMPARE
++  UNSPEC_ARG_HOME
++  UNSPEC_LDGP1
++  UNSPEC_INSXH
++  UNSPEC_MSKXH
++  UNSPEC_CVTQL
++  UNSPEC_CVTLQ
++  UNSPEC_LDGP2
++  UNSPEC_LITERAL
++  UNSPEC_LITUSE
++  UNSPEC_SIBCALL
++  UNSPEC_SYMBOL
++  UNSPEC_FRINTZ
++  UNSPEC_FRINTP
++  UNSPEC_FRINTG
++  UNSPEC_FRINTN
++  UNSPEC_FRINTI
++  UNSPEC_FRECX
++
++
++  ;; TLS Support
++  UNSPEC_TLSGD_CALL
++  UNSPEC_TLSLDM_CALL
++  UNSPEC_TLSGD
++  UNSPEC_TLSLDM
++  UNSPEC_DTPREL
++  UNSPEC_TPREL
++  UNSPEC_TP
++  UNSPEC_TLSRELGOT
++  UNSPEC_GOTDTPREL
++
++  ;; Builtins
++  UNSPEC_CMPBGE
++  UNSPEC_ZAP
++  UNSPEC_AMASK
++  UNSPEC_IMPLVER
++  UNSPEC_PERR
++  UNSPEC_COPYSIGN
++  UNSPEC_PFSC
++  UNSPEC_PFTC
++  UNSPEC_SBT
++  UNSPEC_CBT
++  UNSPEC_FIMOVS			; SHENJQ20230404_RSQRT
++
++  ;; Atomic operations
++  UNSPEC_MB
++  UNSPEC_ATOMIC
++  UNSPEC_CMPXCHG
++  UNSPEC_XCHG
++  UNSPECV_LDGP2
++  UNSPECV_HARDWARE_PREFETCH_CNT
++
++])
++
++;; UNSPEC_VOLATILE:
++
++(define_c_enum "unspecv" [
++  UNSPECV_IMB
++  UNSPECV_BLOCKAGE
++  UNSPECV_SPECULATION_BARRIER
++  UNSPECV_SETJMPR	; builtin_setjmp_receiver
++  UNSPECV_LONGJMP	; builtin_longjmp
++  UNSPECV_TRAPB
++  UNSPECV_PSPL		; prologue_stack_probe_loop
++  UNSPECV_REALIGN
++  UNSPECV_EHR		; exception_receiver
++  UNSPECV_MCOUNT
++  UNSPECV_FORCE_MOV
++  UNSPECV_LDGP1
++  UNSPECV_PLDGP2	; prologue ldgp
++  UNSPECV_SET_TP
++  UNSPECV_RPCC
++  UNSPECV_SETJMPR_ER	; builtin_setjmp_receiver fragment
++  UNSPECV_LL		; load-locked
++  UNSPECV_SC		; store-conditional
++  UNSPECV_CMPXCHG
++
++  UNSPEC_TIE ;; TIE
++])
++
++;; CQImode must be handled the similarly to HImode
++;; when generating reloads.
++(define_mode_iterator RELOAD12 [QI HI CQI])
++(define_mode_attr reloadmode [(QI "qi") (HI "hi") (CQI "hi")])
++
++;; Other mode iterators
++(define_mode_iterator IMODE [QI HI SI DI])
++(define_mode_iterator I12MODE [QI HI])
++(define_mode_iterator I124MODE [QI HI SI])
++(define_mode_iterator I24MODE [HI SI])
++(define_mode_iterator I248MODE [HI SI DI])
++(define_mode_iterator I48MODE [SI DI])
++
++(define_mode_attr DWI [(SI "DI") (DI "TI")])
++(define_mode_attr modesuffix [(QI "b") (HI "h") (SI "w") (DI "l")
++				(V8QI "b8") (V4HI "w4")
++				(SF "%,") (DF "%-")])
++(define_mode_attr vecmodesuffix [(QI "b8") (HI "w4")])
++
++(define_code_iterator any_maxmin [smax smin umax umin])
++
++(define_code_attr maxmin [(smax "maxs") (smin "mins")
++			  (umax "maxu") (umin "minu")])
++
++(define_mode_iterator SFDF [SF DF])
++(define_mode_attr SD [(SF "s") (DF "d")])
++(define_int_iterator FRINT [UNSPEC_FRINTZ UNSPEC_FRINTP UNSPEC_FRINTN
++			    UNSPEC_FRINTG UNSPEC_FRINTI])
++
++;; Standard pattern names for floating-point rounding instructions.
++(define_int_attr frint_pattern [(UNSPEC_FRINTZ "btrunc")
++				(UNSPEC_FRINTP "ceil")
++				(UNSPEC_FRINTN "floor")
++				(UNSPEC_FRINTI "nearbyint")
++				(UNSPEC_FRINTG "round")])
++
++;; frint suffix for floating-point rounding instructions.
++(define_int_attr frint_suffix [(UNSPEC_FRINTZ "_z")
++			       (UNSPEC_FRINTP "_p")
++			       (UNSPEC_FRINTN "_n")
++			       (UNSPEC_FRINTG "_g")
++			       (UNSPEC_FRINTI "")])
++;; endif
++
++;; Where necessary, the suffixes _le and _be are used to distinguish between
++;; little-endian and big-endian patterns.
++;;
++;; Note that the Unicos/Mk assembler does not support the following
++;; opcodes: mov, fmov, nop, fnop, unop.
++
++;; Processor type -- this attribute must exactly match the processor_type
++;; enumeration in sw_64.h.
++
++(define_attr "tune" "sw6,sw8"
++  (const (symbol_ref "((enum attr_tune) sw_64_tune)")))
++
++;; Define an insn type attribute.  This is used in function unit delay
++;; computations, among other purposes.  For the most part, we use the names
++;; defined in the documentation, but add a few that we have to know about
++;; separately.
++
++(define_attr "type"
++  "ild,fld,ldsym,ist,fst,ibr,callpal,fbr,call,iadd,ilog,shift,icmov,fcmov,
++   icmp,imul,fadd,fmul,fmadd,fcpys,fdiv,fsqrt,misc,mvi,ftoi,itof,mb,ld_l,st_c,
++   multi,vld,vst,ctpop,none,jsr,vcmp,frint,fp,fminmax,vsum,vinv,vsel,crc32,crc32c"
++  (const_string "iadd"))
++
++;; Describe a user's asm statement.
++(define_asm_attributes
++  [(set_attr "type" "multi")])
++
++;; Define the operand size an insn operates on.  Used primarily by mul
++;; and div operations that have size dependent timings.
++
++(define_attr "opsize" "si,di,udi"
++  (const_string "di"))
++
++;; The TRAP attribute marks instructions that may generate traps
++;; (which are imprecise and may need a trapb if software completion
++;; is desired).
++
++(define_attr "trap" "no,yes"
++  (const_string "no"))
++
++;; The ROUND_SUFFIX attribute marks which instructions require a
++;; rounding-mode suffix.  The value NONE indicates no suffix,
++;; the value NORMAL indicates a suffix controlled by sw_64_fprm.
++
++(define_attr "round_suffix" "none,normal,c"
++  (const_string "none"))
++
++;; The TRAP_SUFFIX attribute marks instructions requiring a trap-mode suffix:
++;;   NONE	no suffix
++;;   SU		accepts only /su (cmpt et al)
++;;   SUI	accepts only /sui (cvtqt and cvtqs)
++;;   V_SV	accepts /v and /sv (cvtql only)
++;;   V_SV_SVI	accepts /v, /sv and /svi (cvttq only)
++;;   U_SU_SUI	accepts /u, /su and /sui (most fp instructions)
++;;
++;; The actual suffix emitted is controlled by sw_64_fptm.
++
++(define_attr "trap_suffix" "none,su,sui,v_sv,v_sv_svi,u_su_sui"
++  (const_string "none"))
++
++;; The length of an instruction sequence in bytes.
++
++(define_attr "length" ""
++  (const_int 4))
++
++;; The USEGP attribute marks instructions that have relocations that use
++;; the GP.
++
++(define_attr "usegp" "no,yes"
++  (cond [(eq_attr "type" "ldsym,call")
++	   (const_string "yes")
++	 (eq_attr "type" "ild,fld,ist,fst")
++	   (symbol_ref "((enum attr_usegp) sw_64_find_lo_sum_using_gp (insn))")
++	]
++	(const_string "no")))
++
++;; The CANNOT_COPY attribute marks instructions with relocations that
++;; cannot easily be duplicated.  This includes insns with gpdisp relocs
++;; since they have to stay in 1-1 correspondence with one another.  This
++;; also includes call insns, since they must stay in correspondence with
++;; the immediately following gpdisp instructions.
++
++(define_attr "cannot_copy" "false,true"
++  (const_string "false"))
++
++;; Used to control the "enabled" attribute on a per-instruction basis.
++;; For convenience, conflate ABI issues re loading of addresses with
++;; an "isa".
++(define_attr "isa" "base,bwx,max,fix,cix,vms,ner,er,sw6a,sw6b,sw8a"
++  (const_string "base"))
++
++(define_attr "enabled" ""
++  (cond [(eq_attr "isa" "bwx")	(symbol_ref "TARGET_BWX")
++	 (eq_attr "isa" "max")	(symbol_ref "TARGET_MAX")
++	 (eq_attr "isa" "fix")	(symbol_ref "TARGET_FIX")
++	 (eq_attr "isa" "cix")	(symbol_ref "TARGET_CIX")
++	 (eq_attr "isa" "vms")  (symbol_ref "!TARGET_ABI_OSF")
++	 (eq_attr "isa" "ner")	(symbol_ref "!TARGET_EXPLICIT_RELOCS")
++	 (eq_attr "isa" "er")	(symbol_ref "TARGET_EXPLICIT_RELOCS")
++	 (eq_attr "isa" "sw6a")	(symbol_ref "TARGET_SW6A")
++	 (eq_attr "isa" "sw6b")	(symbol_ref "TARGET_SW6B")
++	 (eq_attr "isa" "sw8a")	(symbol_ref "TARGET_SW8A")
++	]
++	(const_int 1)))
++
++;; Include scheduling descriptions.
++
++(include "sw6.md")
++(include "sw8.md")
++
++
++;; Operand and operator predicates and constraints
++
++(include "predicates.md")
++(include "constraints.md")
++
++
++;; First define the arithmetic insns.  Note that the 32-bit forms also
++;; sign-extend.
++
++;; Handle 32-64 bit extension from memory to a floating point register
++;; specially, since this occurs frequently in int->double conversions.
++;;
++;; Note that while we must retain the =f case in the insn for reload's
++;; benefit, it should be eliminated after reload, so we should never emit
++;; code for that case.  But we don't reject the possibility.
++
++(define_expand "extendsidi2"
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))])
++
++(define_insn "*cvtlq"
++  [(set (match_operand:DI 0 "register_operand" "=f")
++	(unspec:DI [(match_operand:SF 1 "reg_or_0_operand" "fG")]
++		   UNSPEC_CVTLQ))]
++  ""
++  "fcvtwl %1,%0"
++  [(set_attr "type" "fadd")])
++
++(define_insn "*extendsidi2_1"
++  [(set (match_operand:DI 0 "register_operand" "=r,r,!*f")
++	(sign_extend:DI
++	  (match_operand:SI 1 "nonimmediate_operand" "r,m,m")))]
++  ""
++  "@
++   addw $31,%1,%0
++   ldw%U1 %0,%1
++   flds %0,%1\;fcvtwl %0,%0"
++  [(set_attr "type" "iadd,ild,fld")
++   (set_attr "length" "*,*,8")])
++
++(define_split
++  [(set (match_operand:DI 0 "hard_fp_register_operand")
++	(sign_extend:DI (match_operand:SI 1 "memory_operand")))]
++  "reload_completed"
++  [(set (match_dup 2) (match_dup 1))
++   (set (match_dup 0) (unspec:DI [(match_dup 2)] UNSPEC_CVTLQ))]
++{
++  operands[1] = adjust_address (operands[1], SFmode, 0);
++  operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));
++})
++
++;; Optimize sign-extension of SImode loads.  This shows up in the wake of
++;; reload when converting fp->int.
++
++(define_peephole2
++  [(set (match_operand:SI 0 "hard_int_register_operand")
++	(match_operand:SI 1 "memory_operand"))
++   (set (match_operand:DI 2 "hard_int_register_operand")
++	(sign_extend:DI (match_dup 0)))]
++  "true_regnum (operands[0]) == true_regnum (operands[2])
++   || peep2_reg_dead_p (2, operands[0])"
++  [(set (match_dup 2)
++	(sign_extend:DI (match_dup 1)))])
++
++(define_peephole2
++[
++(set (match_operand:DF 0 "register_operand")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "register_operand")
++			    (match_operand:DF 3 "const0_operand")]))
++(set (match_operand:DF 4 "register_operand")
++	(match_operator:DF 5 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 6 "reg_or_0_operand")
++			    (match_operand:DF 7 "reg_or_0_operand")]))
++(set (match_operand:SFDF 8 "register_operand")
++	(if_then_else:SFDF
++	  (match_operand 9 "comparison_operator")
++	  (match_operand:SFDF 10 "reg_or_8bit_operand")
++	  (match_operand:SFDF 11 "reg_or_8bit_operand")))
++]
++"(GET_CODE (operands[1])==LE ||  GET_CODE (operands[1])==LT)
++       && GET_CODE (operands[5])==EQ && GET_CODE (operands[9])==NE && flag_sw_fselect
++"
++
++[
++(set (match_operand:SFDF 8 "reg_or_0_operand")
++	(if_then_else:SFDF
++	 (match_operator 1 "sw_64_fp_comparison_operator"
++			 [(match_operand:SFDF 2 "reg_or_0_operand")
++			  (match_operand:SFDF 3 "const0_operand")])
++	 (match_operand:SFDF 11 "reg_or_0_operand")
++	 (match_operand:SFDF 10 "reg_or_0_operand")))
++]
++)
++(define_peephole2
++[
++(set (match_operand:DF 0 "register_operand")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "const0_operand")
++			    (match_operand:DF 3 "reg_or_0_operand")]))
++(set (match_operand:DF 4 "register_operand")
++	(match_operator:DF 5 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 6 "reg_or_0_operand")
++			    (match_operand:DF 7 "reg_or_0_operand")]))
++(set (match_operand:SFDF 8 "register_operand")
++	(if_then_else:SFDF
++	  (match_operand 9 "comparison_operator")
++	  (match_operand:SFDF 10 "reg_or_8bit_operand")
++	  (match_operand:SFDF 11 "reg_or_8bit_operand")))
++]
++"(GET_CODE (operands[1])==LE ||  GET_CODE (operands[1])==LT)
++       && GET_CODE (operands[5])==EQ && GET_CODE (operands[9])==NE && flag_sw_fselect
++"
++
++[
++(set (match_operand:SFDF 8 "reg_or_0_operand")
++	(if_then_else:SFDF
++	 (match_operator 1 "sw_64_fp_comparison_operator"
++			 [(match_operand:SFDF 3 "reg_or_0_operand")
++			  (match_operand:SFDF 2 "const0_operand")])
++	 (match_operand:SFDF 10 "reg_or_0_operand")
++	 (match_operand:SFDF 11 "reg_or_0_operand")))
++]
++)
++
++(define_peephole2
++[
++(set (match_operand:DF 0 "register_operand")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "register_operand")
++			    (match_operand:DF 3 "const0_operand")]))
++(set (match_operand:DF 4 "register_operand")
++	(match_operator:DF 5 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 6 "register_operand")
++			    (match_operand:DF 7 "const0_operand")]))
++(set (match_operand:SFDF 8 "register_operand")
++	(if_then_else:SFDF
++	  (match_operand 9 "comparison_operator")
++	  (match_operand:SFDF 10 "reg_or_8bit_operand")
++	  (match_operand:SFDF 11 "reg_or_8bit_operand")))
++]
++"GET_CODE (operands[1])==EQ  && GET_CODE (operands[5])==EQ &&
++       (GET_CODE (operands[9])==NE || GET_CODE (operands[9])==EQ)&&
++	       (operands[0] == operands[6]) && flag_sw_fselect"
++[
++(set (match_operand:SFDF 8 "reg_or_0_operand")
++	(if_then_else:SFDF
++	 (match_operator 9 "sw_64_fp_comparison_operator"
++			 [(match_operand:SFDF 2 "reg_or_0_operand")
++			  (match_operand:SFDF 3 "const0_operand")])
++	 (match_operand:SFDF 10 "reg_or_0_operand")
++	 (match_operand:SFDF 11 "reg_or_0_operand")))
++]
++)
++
++
++
++(define_insn "addsi3"
++  [(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
++	(plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ,rJ")
++		 (match_operand:SI 2 "add_operand" "rI,O,K,L")))]
++  ""
++  "@
++   addw %r1,%2,%0
++   subw %r1,%n2,%0
++   ldi %0,%2(%r1)
++   ldih %0,%h2(%r1)")
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(plus:SI (match_operand:SI 1 "register_operand")
++		 (match_operand:SI 2 "const_int_operand")))]
++  "! add_operand (operands[2], SImode)"
++  [(set (match_dup 0) (plus:SI (match_dup 1) (match_dup 3)))
++   (set (match_dup 0) (plus:SI (match_dup 0) (match_dup 4)))]
++{
++  HOST_WIDE_INT val = INTVAL (operands[2]);
++  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
++  HOST_WIDE_INT rest = val - low;
++
++  operands[3] = GEN_INT (rest);
++  operands[4] = GEN_INT (low);
++})
++
++(define_insn "*addsi_se"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(sign_extend:DI
++	 (plus:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
++		  (match_operand:SI 2 "sext_add_operand" "rI,O"))))]
++  ""
++  "@
++   addw %r1,%2,%0
++   subw %r1,%n2,%0")
++
++(define_insn "*addsi_se2"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(sign_extend:DI
++	 (subreg:SI (plus:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
++			     (match_operand:DI 2 "sext_add_operand" "rI,O"))
++		    0)))]
++  ""
++  "@
++   addw %r1,%2,%0
++   subw %r1,%n2,%0")
++
++;; (plus:SI (ashift:SI (match_dup 3)-> (plus:SI (mult:SI (match_dup 3)
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extend:DI
++	 (plus:SI (match_operand:SI 1 "reg_not_elim_operand")
++		  (match_operand:SI 2 "const_int_operand"))))
++   (clobber (match_operand:SI 3 "reg_not_elim_operand"))]
++  "! sext_add_operand (operands[2], SImode) && INTVAL (operands[2]) > 0
++   && INTVAL (operands[2]) % 4 == 0"
++  [(set (match_dup 3) (match_dup 4))
++   (set (match_dup 0) (sign_extend:DI (plus:SI (mult:SI (match_dup 3)
++							  (match_dup 5))
++					       (match_dup 1))))]
++{
++  HOST_WIDE_INT val = INTVAL (operands[2]) / 4;
++  int mult = 4;
++
++  if (val % 2 == 0)
++    val /= 2, mult = 8;
++
++  operands[4] = GEN_INT (val);
++  operands[5] = GEN_INT (mult);
++})
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extend:DI
++	 (plus:SI (match_operator:SI 1 "comparison_operator"
++				     [(match_operand 2)
++				      (match_operand 3)])
++		  (match_operand:SI 4 "add_operand"))))
++   (clobber (match_operand:DI 5 "register_operand"))]
++  ""
++  [(set (match_dup 5) (match_dup 6))
++   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 7) (match_dup 4))))]
++{
++  operands[6] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
++				operands[2], operands[3]);
++  operands[7] = gen_lowpart (SImode, operands[5]);
++})
++
++(define_expand "adddi3"
++  [(set (match_operand:DI 0 "register_operand")
++	(plus:DI (match_operand:DI 1 "register_operand")
++		 (match_operand:DI 2 "add_operand")))])
++
++(define_insn "*adddi_er_lo16_dtp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++		   (match_operand:DI 2 "dtp16_symbolic_operand")))]
++  "HAVE_AS_TLS"
++  "ldi %0,%2(%1)\t\t!dtprel")
++
++(define_insn "*adddi_er_hi32_dtp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(plus:DI (match_operand:DI 1 "register_operand" "r")
++		 (high:DI (match_operand:DI 2 "dtp32_symbolic_operand"))))]
++  "HAVE_AS_TLS"
++  "ldih %0,%2(%1)\t\t!dtprelhi")
++
++(define_insn "*adddi_er_lo32_dtp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++		   (match_operand:DI 2 "dtp32_symbolic_operand")))]
++  "HAVE_AS_TLS"
++  "ldi %0,%2(%1)\t\t!dtprello")
++
++(define_insn "*adddi_er_lo16_tp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++		   (match_operand:DI 2 "tp16_symbolic_operand")))]
++  "HAVE_AS_TLS"
++  "ldi %0,%2(%1)\t\t!tprel")
++
++(define_insn "*adddi_er_hi32_tp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(plus:DI (match_operand:DI 1 "register_operand" "r")
++		 (high:DI (match_operand:DI 2 "tp32_symbolic_operand"))))]
++  "HAVE_AS_TLS"
++  "ldih %0,%2(%1)\t\t!tprelhi")
++
++(define_insn "*adddi_er_lo32_tp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++		   (match_operand:DI 2 "tp32_symbolic_operand")))]
++  "HAVE_AS_TLS"
++  "ldi %0,%2(%1)\t\t!tprello")
++
++(define_insn "*adddi_er_high_l"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(plus:DI (match_operand:DI 1 "register_operand" "r")
++		 (high:DI (match_operand:DI 2 "local_symbolic_operand"))))]
++  "TARGET_EXPLICIT_RELOCS && reload_completed"
++  "ldih %0,%2(%1)\t\t!gprelhigh"
++  [(set_attr "usegp" "yes")])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(high:DI (match_operand:DI 1 "local_symbolic_operand")))]
++  "TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(plus:DI (match_dup 2) (high:DI (match_dup 1))))]
++  "operands[2] = pic_offset_table_rtx;")
++
++;; We used to expend quite a lot of effort choosing addl/subl/ldi.
++;; With complications like
++;;
++;;   The NT stack unwind code can't handle a subl to adjust the stack
++;;   (that's a bug, but not one we can do anything about).  As of NT4.0 SP3,
++;;   the exception handling code will loop if a subl is used and an
++;;   exception occurs.
++;;
++;;   The 19980616 change to emit prologues as RTL also confused some
++;;   versions of GDB, which also interprets prologues.  This has been
++;;   fixed as of GDB 4.18, but it does not harm to unconditionally
++;;   use ldi here.
++;;
++;; and the fact that the three insns schedule exactly the same, it's
++;; just not worth the effort.
++
++(define_insn "*adddi_internal"
++  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
++	(plus:DI (match_operand:DI 1 "register_operand" "%r,r,r")
++		 (match_operand:DI 2 "add_operand" "r,K,L")))]
++  ""
++  "@
++   addl %1,%2,%0
++   ldi %0,%2(%1)
++   ldih %0,%h2(%1)")
++
++;; ??? Allow large constants when basing off the frame pointer or some
++;; virtual register that may eliminate to the frame pointer.  This is
++;; done because register elimination offsets will change the hi/lo split,
++;; and if we split before reload, we will require additional instructions.
++
++(define_insn "*adddi_fp_hack"
++  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
++	(plus:DI (match_operand:DI 1 "reg_no_subreg_operand" "r,r,r")
++		 (match_operand:DI 2 "const_int_operand" "K,L,n")))]
++  "NONSTRICT_REG_OK_FP_BASE_P (operands[1])
++   && INTVAL (operands[2]) >= 0
++   /* This is the largest constant an ldi+ldih pair can add, minus
++      an upper bound on the displacement between SP and AP during
++      register elimination.  See INITIAL_ELIMINATION_OFFSET.  */
++   && INTVAL (operands[2])
++	< (0x7fff8000
++	   - FIRST_PSEUDO_REGISTER * UNITS_PER_WORD
++	   - SW_64_ROUND (crtl->outgoing_args_size)
++	   - (SW_64_ROUND (get_frame_size ()
++			   + max_reg_num () * UNITS_PER_WORD
++			   + crtl->args.pretend_args_size)
++	      - crtl->args.pretend_args_size))"
++  "@
++   ldi %0,%2(%1)
++   ldih %0,%h2(%1)
++   #")
++
++;; Don't do this if we are adjusting SP since we don't want to do it
++;; in two steps.  Don't split FP sources for the reason listed above.
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(plus:DI (match_operand:DI 1 "register_operand")
++		 (match_operand:DI 2 "const_int_operand")))]
++  "! add_operand (operands[2], DImode)
++   && operands[0] != stack_pointer_rtx
++   && operands[1] != frame_pointer_rtx
++   && operands[1] != arg_pointer_rtx"
++  [(set (match_dup 0) (plus:DI (match_dup 1) (match_dup 3)))
++   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
++{
++  HOST_WIDE_INT val = INTVAL (operands[2]);
++  HOST_WIDE_INT low = (val & 0xffff) - 2 * (val & 0x8000);
++  HOST_WIDE_INT rest = val - low;
++  rtx rest_rtx = GEN_INT (rest);
++
++  operands[4] = GEN_INT (low);
++  if (satisfies_constraint_L (rest_rtx))
++    operands[3] = rest_rtx;
++  else if (can_create_pseudo_p ())
++    {
++      operands[3] = gen_reg_rtx (DImode);
++      emit_move_insn (operands[3], operands[2]);
++      emit_insn (gen_adddi3 (operands[0], operands[1], operands[3]));
++      DONE;
++    }
++  else
++    FAIL;
++})
++
++; *sadd<monesuffix>->*saddl/*saddq
++(define_insn "*saddl"
++  [(set (match_operand:SI 0 "register_operand" "=r,r")
++	(plus:SI
++	 (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
++		       (match_operand:SI 2 "const48_operand" "I,I"))
++	 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
++  ""
++  "@
++   s%2addw %1,%3,%0
++   s%2subw %1,%n3,%0")
++
++(define_insn "*saddq"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(plus:DI
++	 (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
++		       (match_operand:DI 2 "const48_operand" "I,I"))
++	 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
++  ""
++  "@
++   s%2addl %1,%3,%0
++   s%2subl %1,%n3,%0")
++
++(define_insn "*saddl_se"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(sign_extend:DI
++	 (plus:SI
++	  (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r,r")
++		     (match_operand:SI 2 "const48_operand" "I,I"))
++	 (match_operand:SI 3 "sext_add_operand" "rI,O"))))]
++  ""
++  "@
++   s%2addw %1,%3,%0
++   s%2subw %1,%n3,%0")
++
++(define_insn "*sxaddw"
++  [(set (match_operand:SI 0 "register_operand" "=r,r")
++	(plus:SI
++	 (subreg:SI
++	  (ashift:DI
++	   (subreg:DI (match_operand:SI 1 "reg_not_elim_operand" "r,r") 0)
++	   (match_operand:DI 2 "const_int_operand" "I,I"))
++	  0)
++	 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
++  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
++  {
++  switch (which_alternative)
++    {
++    case 0:
++      if (INTVAL (operands[2]) == 3)
++	return "s8addw %1,%3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4addw %1,%3,%0";
++    case 1:
++      if (INTVAL (operands[2]) == 3)
++	return "s8subw %1,%n3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4subw %1,%n3,%0";
++    default:
++      gcc_unreachable ();
++    }
++  })
++
++(define_insn "*sxsubw"
++  [(set (match_operand:SI 0 "register_operand" "=r,r")
++	(minus:SI
++	 (subreg:SI
++	  (ashift:DI
++	   (subreg:DI (match_operand:SI 1 "reg_not_elim_operand" "r,r") 0)
++	   (match_operand:DI 2 "const_int_operand" "I,I"))
++	  0)
++	 (match_operand:SI 3 "sext_add_operand" "rI,O")))]
++  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
++  {
++  switch (which_alternative)
++    {
++    case 0:
++      if (INTVAL (operands[2]) == 3)
++	return "s8subw %1,%3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4subw %1,%3,%0";
++    case 1:
++      if (INTVAL (operands[2]) == 3)
++	return "s8addw %1,%n3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4addw %1,%n3,%0";
++    default:
++      gcc_unreachable ();
++    }
++  })
++
++(define_insn "*sxaddl"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(plus:DI
++	 (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
++		       (match_operand:DI 2 "const_int_operand" "I,I"))
++	 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
++  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
++  {
++  switch (which_alternative)
++    {
++    case 0:
++      if (INTVAL (operands[2]) == 3)
++	return "s8addl %1,%3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4addl %1,%3,%0";
++    case 1:
++      if (INTVAL (operands[2]) == 3)
++	return "s8subl %1,%n3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4subl %1,%n3,%0";
++    default:
++      gcc_unreachable ();
++    }
++  })
++
++(define_insn "*sxsubl"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(minus:DI
++	 (ashift:DI (match_operand:DI 1 "reg_not_elim_operand" "r,r")
++		       (match_operand:DI 2 "const_int_operand" "I,I"))
++	 (match_operand:DI 3 "sext_add_operand" "rI,O")))]
++  "flag_sw_sxaddl==1 && (INTVAL (operands[2])==3 || INTVAL (operands[2])==2)"
++  {
++  switch (which_alternative)
++    {
++    case 0:
++      if (INTVAL (operands[2]) == 3)
++	return "s8subl %1,%3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4subl %1,%3,%0";
++    case 1:
++      if (INTVAL (operands[2]) == 3)
++	return "s8addl %1,%n3,%0";
++      if (INTVAL (operands[2]) == 2)
++	return "s4addl %1,%n3,%0";
++    default:
++      gcc_unreachable ();
++    }
++  })
++
++
++;; plus:SI (ashift:SI -> plus:SI (mult:SI
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extend:DI
++	 (plus:SI (mult:SI (match_operator:SI 1 "comparison_operator"
++					      [(match_operand 2)
++					       (match_operand 3)])
++			   (match_operand:SI 4 "const48_operand"))
++		  (match_operand:SI 5 "sext_add_operand"))))
++   (clobber (match_operand:DI 6 "reg_not_elim_operand"))]
++  ""
++  [(set (match_dup 6) (match_dup 7))
++   (set (match_dup 0)
++	(sign_extend:DI (plus:SI (mult:SI (match_dup 8) (match_dup 4))
++				 (match_dup 5))))]
++{
++  operands[7] = gen_rtx_fmt_ee (GET_CODE (operands[1]), DImode,
++				operands[2], operands[3]);
++  operands[8] = gen_lowpart (SImode, operands[6]);
++})
++
++
++(define_insn "neg<mode>2"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(neg:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI")))]
++  ""
++  "sub<modesuffix> $31,%1,%0")
++
++(define_insn "*negsi_se"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI (neg:SI
++			 (match_operand:SI 1 "reg_or_8bit_operand" "rI"))))]
++  ""
++  "subw $31,%1,%0")
++
++(define_insn "sub<mode>3"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(minus:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "rJ")
++		       (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))]
++  ""
++  "sub<modesuffix> %r1,%2,%0")
++
++(define_insn "*subsi_se"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	 (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
++		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
++  ""
++  "subw %r1,%2,%0")
++
++(define_insn "*subsi_se2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	 (subreg:SI (minus:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++			      (match_operand:DI 2 "reg_or_8bit_operand" "rI"))
++		    0)))]
++  ""
++  "subw %r1,%2,%0")
++
++(define_insn "*ssubl"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(minus:SI
++	 (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
++		       (match_operand:SI 2 "const48_operand" "I"))
++		  (match_operand:SI 3 "reg_or_8bit_operand" "rI")))]
++  ""
++  "s%2subw %1,%3,%0")
++
++(define_insn "*ssubq"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(minus:DI
++	 (mult:DI (match_operand:DI 1 "reg_not_elim_operand" "r")
++		       (match_operand:DI 2 "const48_operand" "I"))
++		  (match_operand:DI 3 "reg_or_8bit_operand" "rI")))]
++  ""
++  "s%2subl %1,%3,%0")
++
++;;"s%P2subw %1,%3,%0"
++(define_insn "*ssubl_se"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	 (minus:SI
++	  (mult:SI (match_operand:SI 1 "reg_not_elim_operand" "r")
++		     (match_operand:SI 2 "const48_operand" "I"))
++	 (match_operand:SI 3 "reg_or_8bit_operand" "rI"))))]
++  ""
++  "s%2subw %1,%3,%0")
++
++
++(define_insn "mul<mode>3"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(mult:I48MODE (match_operand:I48MODE 1 "reg_or_0_operand" "%rJ")
++		      (match_operand:I48MODE 2 "reg_or_8bit_operand" "rI")))]
++  ""
++  "mul<modesuffix> %r1,%2,%0"
++  [(set_attr "type" "imul")
++   (set_attr "opsize" "<mode>")])
++
++(define_insn "*mulsi_se"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	  (mult:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ")
++		   (match_operand:SI 2 "reg_or_8bit_operand" "rI"))))]
++  ""
++  "mulw %r1,%2,%0"
++  [(set_attr "type" "imul")
++   (set_attr "opsize" "si")])
++
++(define_expand "umuldi3_highpart"
++  [(set (match_operand:DI 0 "register_operand")
++	(truncate:DI
++	 (lshiftrt:TI
++	  (mult:TI (zero_extend:TI
++		     (match_operand:DI 1 "register_operand"))
++		   (match_operand:DI 2 "reg_or_8bit_operand"))
++	  (const_int 64))))]
++  ""
++{
++  if (REG_P (operands[2]))
++    operands[2] = gen_rtx_ZERO_EXTEND (TImode, operands[2]);
++})
++
++(define_insn "*umuldi3_highpart_reg"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(truncate:DI
++	 (lshiftrt:TI
++	  (mult:TI (zero_extend:TI
++		     (match_operand:DI 1 "register_operand" "r"))
++		   (zero_extend:TI
++		     (match_operand:DI 2 "register_operand" "r")))
++	  (const_int 64))))]
++  ""
++  "umulh %1,%2,%0"
++  [(set_attr "type" "imul")
++   (set_attr "opsize" "udi")])
++
++(define_insn "*umuldi3_highpart_const"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(truncate:DI
++	 (lshiftrt:TI
++	  (mult:TI (zero_extend:TI (match_operand:DI 1 "register_operand" "r"))
++		   (match_operand:TI 2 "cint8_operand" "I"))
++	  (const_int 64))))]
++  ""
++  "umulh %1,%2,%0"
++  [(set_attr "type" "imul")
++   (set_attr "opsize" "udi")])
++
++(define_expand "umulditi3"
++  [(set (match_operand:TI 0 "register_operand")
++       (mult:TI
++	 (zero_extend:TI (match_operand:DI 1 "reg_no_subreg_operand"))
++	 (zero_extend:TI (match_operand:DI 2 "reg_no_subreg_operand"))))]
++  ""
++{
++  rtx l = gen_reg_rtx (DImode), h = gen_reg_rtx (DImode);
++  emit_insn (gen_muldi3 (l, operands[1], operands[2]));
++  emit_insn (gen_umuldi3_highpart (h, operands[1], operands[2]));
++  emit_move_insn (gen_lowpart (DImode, operands[0]), l);
++  emit_move_insn (gen_highpart (DImode, operands[0]), h);
++  DONE;
++})
++
++;; The divide and remainder operations take their inputs from r24 and
++;; r25, put their output in r27, and clobber r23 and r28 on all systems.
++;;
++;; ??? Force sign-extension here because some versions of SYSV and
++;; Interix/NT don't do the right thing if the inputs are not properly
++;; sign-extended.  But Linux, for instance, does not have this
++;; problem.  Is it worth the complication here to eliminate the sign
++;; extension?
++
++(define_code_iterator any_divmod [div mod udiv umod])
++
++(define_expand "<code>si3"
++  [(set (match_dup 3)
++	(sign_extend:DI (match_operand:SI 1 "nonimmediate_operand")))
++   (set (match_dup 4)
++	(sign_extend:DI (match_operand:SI 2 "nonimmediate_operand")))
++   (parallel [(set (match_dup 5)
++		   (sign_extend:DI
++		    (any_divmod:SI (match_dup 3) (match_dup 4))))
++	      (clobber (reg:DI 23))
++	      (clobber (reg:DI 28))])
++   (set (match_operand:SI 0 "nonimmediate_operand")
++	(subreg:SI (match_dup 5) 0))]
++  ""
++{
++  operands[3] = gen_reg_rtx (DImode);
++  operands[4] = gen_reg_rtx (DImode);
++  operands[5] = gen_reg_rtx (DImode);
++})
++
++(define_expand "<code>di3"
++  [(parallel [(set (match_operand:DI 0 "register_operand")
++		   (any_divmod:DI
++		    (match_operand:DI 1 "register_operand")
++		    (match_operand:DI 2 "register_operand")))
++	      (clobber (reg:DI 23))
++	      (clobber (reg:DI 28))])]
++  "")
++
++(define_insn "int_div_use_float_si"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")])))
++    (clobber (reg:DF 55))
++    (clobber (reg:DF 56))
++    (clobber (reg:DF 60))]
++     "flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==DIV)"
++       "ifmovd %1,$f23
++	fcvtld $f23,$f28
++	fcpys  $f28,$f28,$f23
++	ifmovd %2,$f24
++	fcvtld $f24,$f28
++	fdivd $f23,$f28,$f24
++	fcvtdl_z $f24,$f23
++	fimovd $f23,%0"
++    [(set_attr "type" "fdiv")])
++
++(define_insn "int_divu_use_float_si"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")])))
++    (clobber (reg:DF 55))
++    (clobber (reg:DF 56))
++    (clobber (reg:DF 60))]
++     "flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==UDIV)"
++       "zap %1,240,%1
++	zap %2,240,%2
++	ifmovd %1,$f23
++	fcvtld $f23,$f28
++	fcpys  $f28,$f28,$f23
++	ifmovd %2,$f24
++	fcvtld $f24,$f28
++	fdivd $f23,$f28,$f24
++	fcvtdl_z $f24,$f23
++	fimovd $f23,%0"
++     [(set_attr "type" "fdiv")])
++
++(define_insn "int_rem_use_float_si"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")])))
++    (clobber (reg:DF 54))
++    (clobber (reg:DF 55))
++    (clobber (reg:DF 56))
++    (clobber (reg:DF 60))]
++     "flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==MOD)"
++     "ifmovd %1,$f24
++	fcvtld $f24,$f28
++	fcpys $f28,$f28,$f24
++	ifmovd %2,$f23
++	fcvtld $f23,$f28
++	fdivd $f24,$f28,$f22
++	fcvtdl_z $f22,$f23
++	fcvtld $f23,$f22
++	fnmad $f22,$f28,$f24,$f23
++	fcvtdl_z $f23,$f22
++	fimovd $f22,%0"
++    [(set_attr "type" "fdiv")])
++
++(define_insn "int_remu_use_float_si"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")])))
++    (clobber (reg:DF 54))
++    (clobber (reg:DF 55))
++    (clobber (reg:DF 56))
++    (clobber (reg:DF 60))]
++     "flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==UMOD)"
++     "zap %1,240,%1
++	zap %2,240,%2
++	ifmovd %1,$f22
++	fcvtld $f22,$f24
++	ifmovd %2,$f22
++	fcvtld $f22,$f28
++	fdivd $f24,$f28,$f23
++	fcvtdl_z $f23,$f22
++	fcvtld $f22,$f23
++	fnmad $f23,$f28,$f24,$f22
++	fcvtdl_z $f22,$f23
++	fimovd $f23,%0"
++     [(set_attr "type" "fdiv")])
++
++
++(define_insn_and_split "*divmodsi_internal_er"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")])))
++   (clobber (reg:DI 23))
++   (clobber (reg:DI 28))]
++  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
++  "#"
++  "&& reload_completed"
++  [(parallel [(set (match_dup 0)
++		   (sign_extend:DI (match_dup 3)))
++	      (use (match_dup 0))
++	      (use (match_dup 4))
++	      (clobber (reg:DI 23))
++	      (clobber (reg:DI 28))])]
++{
++  if (flag_sw_int_div_opt)
++  {
++    const char *str;
++    operands[4] = GEN_INT (sw_64_next_sequence_number++);
++    switch (GET_CODE (operands[3]))
++      {
++      case DIV:
++	emit_insn (gen_int_div_use_float_si (operands[0], operands[1], operands[2], operands[3]));
++	break;
++      case UDIV:
++	emit_insn (gen_int_divu_use_float_si (operands[0], operands[1], operands[2], operands[3]));
++	break;
++      case MOD:
++	emit_insn (gen_int_rem_use_float_si (operands[0], operands[1], operands[2], operands[3]));
++	break;
++      case UMOD:
++	emit_insn (gen_int_remu_use_float_si (operands[0], operands[1], operands[2], operands[3]));
++	break;
++      default:
++	gcc_unreachable ();
++      }
++  }
++  else
++  {
++  const char *str;
++  switch (GET_CODE (operands[3]))
++    {
++  case DIV:
++      str = "__divw";
++      break;
++    case UDIV:
++      str = "__divwu";
++      break;
++    case MOD:
++      str = "__remw";
++      break;
++    case UMOD:
++      str = "__remwu";
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  operands[4] = GEN_INT (sw_64_next_sequence_number++);
++  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
++				  gen_rtx_SYMBOL_REF (DImode, str),
++				  operands[4]));
++  }
++}
++  [(set_attr "type" "call")
++   (set_attr "length" "8")])
++
++(define_insn "*divmodsi_internal_er_1"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")])))
++   (use (match_operand:DI 4 "register_operand" "c"))
++   (use (match_operand 5 "const_int_operand"))
++   (clobber (reg:DI 23))
++   (clobber (reg:DI 28))]
++  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
++  {
++      if (flag_sw_int_div_opt)
++      {
++	 switch (GET_CODE (operands[3]))
++	 {
++	    case DIV:
++	    case UDIV:
++	    case MOD:
++	    case UMOD:
++	    return "";
++	 }
++      }
++      else
++      {
++	 return "call $23,($27),__%E3%j5";
++      }
++  }
++  [(set_attr "type" "call")
++   (set_attr "length" "4")])
++
++(define_insn "*divmodsi_internal"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI (match_operator:SI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "r")
++			 (match_operand:DI 2 "register_operand" "r")])))
++   (clobber (reg:DI 23))
++   (clobber (reg:DI 28))]
++  "TARGET_SW8A && flag_sw_int_divmod"
++  {
++    switch (GET_CODE (operands[3]))
++    {
++      case DIV: return "divw %1,%2,%0";
++      case UDIV: return "udivw %1,%2,%0";
++      case MOD: return "remw %1,%2,%0";
++      case UMOD: return "uremw %1,%2,%0";
++    }
++  }
++  [(set_attr "length" "4")])
++
++(define_insn "int_div_use_float_di"
++[(set (match_operand:DI 0 "register_operand" "=c")
++      (match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")]))
++	    (use (match_operand:DI 4 "register_operand" "r"))
++	    (use (match_operand:DI 5 "symbolic_operand"))
++	    (use (match_operand 6 "const_int_operand"))
++	    (use (label_ref:DI (match_operand 7)))
++	    (use (label_ref:DI (match_operand 8)))
++   (clobber (reg:DF 55))
++   (clobber (reg:DI 27))
++   (clobber (reg:DI 28))
++   (clobber (reg:DF 59))
++   (clobber (reg:DF 60))]
++     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1 &&(GET_CODE (operands[3])==DIV)"
++	"srl %1,52,$28
++	srl %2,52,$27
++	bis $28,$27,$28
++	bne $28,%l7
++	ifmovd %1,$f23
++	fcvtld $f23,$f27
++	ifmovd %2,$f28
++	fcvtld $f28,$f23
++	fdivd $f27,$f23,$f28
++	fcvtdl_z $f28,$f23
++	fimovd $f23,%0
++	br %l8
++%l7:
++	ldl %0,%5(%4)\t\t!literal!%6
++	call $23,($27),__%E3%j6
++%l8:"
++    [(set_attr "cannot_copy" "true")
++    (set_attr "type" "fdiv")])
++
++(define_insn "int_divu_use_float_di"
++[(set (match_operand:DI 0 "register_operand" "=c")
++      (match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")]))
++	    (use (match_operand:DI 4 "register_operand" "r"))
++	    (use (match_operand:DI 5 "symbolic_operand"))
++	    (use (match_operand 6 "const_int_operand"))
++	    (use (label_ref:DI (match_operand 7)))
++	    (use (label_ref:DI (match_operand 8)))
++   (clobber (reg:DF 55))
++   (clobber (reg:DI 27))
++   (clobber (reg:DI 28))
++   (clobber (reg:DF 59))
++   (clobber (reg:DF 60))]
++     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==UDIV)"
++       "srl %1,52,$28
++	srl %2,52,$27
++	bis $28,$27,$28
++	bne $28,%l7
++	ifmovd %1,$f23
++	fcvtld $f23,$f27
++	ifmovd %2,$f28
++	fcvtld $f28,$f23
++	fdivd $f27,$f23,$f28
++	fcvtdl_z $f28,$f23
++	fimovd $f23,%0
++	br %l8
++%l7:
++       ldl %0,%5(%4)\t\t!literal!%6
++       call $23,($27),__%E3%j6
++%l8:"
++     [(set_attr "cannot_copy" "true")
++     (set_attr "type" "fdiv")])
++
++(define_insn "int_rem_use_float_di"
++[(set (match_operand:DI 0 "register_operand" "=c")
++      (match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")]))
++	    (use (match_operand:DI 4 "register_operand" "r"))
++	    (use (match_operand:DI 5 "symbolic_operand"))
++	    (use (match_operand 6 "const_int_operand"))
++	    (use (label_ref:DI (match_operand 7)))
++	    (use (label_ref:DI (match_operand 8)))
++   (clobber (reg:DF 54))
++   (clobber (reg:DF 55))
++   (clobber (reg:DI 27))
++   (clobber (reg:DI 28))
++   (clobber (reg:DF 56))
++   (clobber (reg:DF 60))]
++     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==MOD)"
++     "srl %1,52,$28
++	srl %2,52,$27
++	bis $28,$27,$28
++	bne $28,%l7
++	ifmovd %1,$f22
++	fcvtld $f22,$f24
++	ifmovd %2,$f22
++	fcvtld $f22,$f28
++	fdivd $f24,$f28,$f22
++	fcvtdl_z $f22,$f23
++	fcvtld $f23,$f22
++	fnmad $f22,$f28,$f24,$f23
++	fcvtdl_z $f23,$f22
++	fimovd $f22,%0
++	br  %l8
++%l7:
++       ldl %0,%5(%4)\t\t!literal!%6
++       call $23,($27),__%E3%j6
++%l8:"
++    [(set_attr "cannot_copy" "true")
++    (set_attr "type" "fdiv")])
++
++(define_insn "int_remu_use_float_di"
++[(set (match_operand:DI 0 "register_operand" "=c")
++      (match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")]))
++	    (use (match_operand:DI 4 "register_operand" "r"))
++	    (use (match_operand:DI 5 "symbolic_operand"))
++	    (use (match_operand 6 "const_int_operand"))
++	    (use (label_ref:DI (match_operand 7)))
++	    (use (label_ref:DI (match_operand 8)))
++   (clobber (reg:DF 54))
++   (clobber (reg:DF 55))
++   (clobber (reg:DI 27))
++   (clobber (reg:DI 28))
++   (clobber (reg:DF 56))
++   (clobber (reg:DF 60))]
++     "TARGET_EXPLICIT_RELOCS && flag_sw_int_div_opt == 1
++      &&(GET_CODE (operands[3])==UMOD)"
++     "  srl %1,52,$28
++	srl %2,52,$27
++	bis $28,$27,$28
++	bne $28,%l7
++	ifmovd %1,$f22
++	fcvtld $f22,$f24
++	ifmovd %2,$f22
++	fcvtld $f22,$f28
++	fdivd $f24,$f28,$f23
++	fcvtdl_z $f23,$f22
++	fcvtld $f22,$f23
++	fnmad $f23,$f28,$f24,$f22
++	fcvtdl_z $f22,$f23
++	fimovd $f23,%0
++	br  %l8
++%l7:
++       ldl %0,%5(%4)\t\t!literal!%6
++       call $23,($27),__%E3%j6
++%l8:"
++    [(set_attr "cannot_copy" "true")
++    (set_attr "type" "fdiv")])
++
++(define_insn_and_split "*divmoddi_internal_er"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")]))
++   (clobber (reg:DI 23))
++   (clobber (reg:DI 28))]
++  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
++  "#"
++  "&& reload_completed"
++  [(parallel [(set (match_dup 0) (match_dup 3))
++	      (use (match_dup 0))
++	      (use (match_dup 4))
++	      (clobber (reg:DI 23))
++	      (clobber (reg:DI 28))])]
++{
++  if (flag_sw_int_div_opt)
++  {
++    const char *str;
++    operands[4] = GEN_INT (sw_64_next_sequence_number++);
++    operands[7] = gen_label_rtx ();
++    operands[8] = gen_label_rtx ();
++    switch (GET_CODE (operands[3]))
++      {
++      case DIV:
++	str = "__divl";
++	emit_insn (gen_int_div_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
++	break;
++      case UDIV:
++	str = "__divlu";
++	emit_insn (gen_int_divu_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
++	break;
++      case MOD:
++	str = "__reml";
++	emit_insn (gen_int_rem_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
++	break;
++   case UMOD:
++	str = "__remlu";
++	emit_insn (gen_int_remu_use_float_di (operands[0],operands[1],operands[2],operands[3],pic_offset_table_rtx ,gen_rtx_SYMBOL_REF (DImode, str),operands[4],operands[7],operands[8]));
++	break;
++      default:
++	gcc_unreachable ();
++      }
++  }
++  else
++  {
++  const char *str;
++  switch (GET_CODE (operands[3]))
++    {
++    case DIV:
++      str = "__divl";
++      break;
++    case UDIV:
++      str = "__divlu";
++      break;
++    case MOD:
++      str = "__reml";
++      break;
++    case UMOD:
++      str = "__remlu";
++      break;
++    default:
++      gcc_unreachable ();
++    }
++  operands[4] = GEN_INT (sw_64_next_sequence_number++);
++  emit_insn (gen_movdi_er_high_g (operands[0], pic_offset_table_rtx,
++				  gen_rtx_SYMBOL_REF (DImode, str),
++				  operands[4]));
++  }
++}
++  [(set_attr "type" "call")
++   (set_attr "length" "8")])
++
++(define_insn "*divmoddi_internal_er_1"
++  [(set (match_operand:DI 0 "register_operand" "=c")
++	(match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "a")
++			 (match_operand:DI 2 "register_operand" "b")]))
++   (use (match_operand:DI 4 "register_operand" "c"))
++   (use (match_operand 5 "const_int_operand"))
++   (clobber (reg:DI 23))
++   (clobber (reg:DI 28))]
++  "TARGET_EXPLICIT_RELOCS && !(TARGET_SW8A && flag_sw_int_divmod)"
++   {
++    if (flag_sw_int_div_opt)
++    {
++      switch (GET_CODE (operands[3]))
++      {
++	case DIV:
++	case UDIV:
++	case MOD:
++	case UMOD:
++	return "";
++      }
++    }
++    else
++    {
++      return "call $23,($27),__%E3%j5";
++    }
++   }
++  [(set_attr "type" "call")
++   (set_attr "length" "4")])
++
++(define_insn "*divmoddi_internal"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(match_operator:DI 3 "divmod_operator"
++			[(match_operand:DI 1 "register_operand" "r")
++			 (match_operand:DI 2 "register_operand" "r")]))
++   (clobber (reg:DI 23))
++   (clobber (reg:DI 28))]
++  "TARGET_SW8A && flag_sw_int_divmod"
++  {
++    switch (GET_CODE (operands[3]))
++    {
++      case DIV: return "divl %1,%2,%0";
++      case UDIV: return "udivl %1,%2,%0";
++      case MOD: return "reml %1,%2,%0";
++      case UMOD: return "ureml %1,%2,%0";
++    }
++  }
++  [(set_attr "length" "4")])
++
++;; Next are the basic logical operations.  We only expose the DImode operations
++;; to the rtl expanders, but SImode versions exist for combine as well as for
++;; the atomic operation splitters.
++
++(define_insn "*andsi_internal"
++  [(set (match_operand:SI 0 "register_operand" "=r,r,r")
++	(and:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
++		(match_operand:SI 2 "and_operand" "rI,N,M")))]
++  ""
++  "@
++   and %r1,%2,%0
++   bic %r1,%N2,%0
++   zapnot %r1,%m2,%0"
++  [(set_attr "type" "ilog,ilog,shift")])
++
++(define_insn "anddi3"
++  [(set (match_operand:DI 0 "register_operand" "=r,r,r")
++	(and:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ,rJ")
++		(match_operand:DI 2 "and_operand" "rI,N,M")))]
++  ""
++  "@
++   and %r1,%2,%0
++   bic %r1,%N2,%0
++   zapnot %r1,%m2,%0"
++  [(set_attr "type" "ilog,ilog,shift")])
++
++;; There are times when we can split an AND into two AND insns.  This occurs
++;; when we can first clear any bytes and then clear anything else.  For
++;; example "I & 0xffff07" is "(I & 0xffffff) & 0xffffffffffffff07".
++;; Only do this when running on 64-bit host since the computations are
++;; too messy otherwise.
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(and:DI (match_operand:DI 1 "register_operand")
++		(match_operand:DI 2 "const_int_operand")))]
++  "! and_operand (operands[2], DImode)"
++  [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
++   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
++{
++  unsigned HOST_WIDE_INT mask1 = INTVAL (operands[2]);
++  unsigned HOST_WIDE_INT mask2 = mask1;
++  int i;
++
++  /* For each byte that isn't all zeros, make it all ones.  */
++  for (i = 0; i < 64; i += 8)
++    if ((mask1 & ((HOST_WIDE_INT) 0xff << i)) != 0)
++      mask1 |= (HOST_WIDE_INT) 0xff << i;
++
++  /* Now turn on any bits we've just turned off.  */
++  mask2 |= ~ mask1;
++
++  operands[3] = GEN_INT (mask1);
++  operands[4] = GEN_INT (mask2);
++})
++
++(define_insn "zero_extendqi<mode>2"
++  [(set (match_operand:I248MODE 0 "register_operand" "=r,r")
++	(zero_extend:I248MODE
++	  (match_operand:QI 1 "reg_or_bwx_memory_operand" "r,m")))]
++  ""
++  "@
++   and %1,0xff,%0
++   ldbu%U1 %0,%1"
++  [(set_attr "type" "ilog,ild")
++   (set_attr "isa" "*,bwx")])
++
++(define_insn "zero_extendhi<mode>2"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r,r")
++	(zero_extend:I48MODE
++	  (match_operand:HI 1 "reg_or_bwx_memory_operand" "r,m")))]
++  ""
++  "@
++   zapnot %1,3,%0
++   ldhu%U1 %0,%1"
++  [(set_attr "type" "shift,ild")
++   (set_attr "isa" "*,bwx")])
++
++(define_insn "zero_extendsidi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extend:DI (match_operand:SI 1 "register_operand" "r")))]
++  ""
++  "zapnot %1,15,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "andnot<mode>3"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(and:I48MODE
++	 (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI"))
++	 (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))]
++  ""
++  "bic %r2,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*iorsi_internal"
++  [(set (match_operand:SI 0 "register_operand" "=r,r")
++	(ior:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
++		(match_operand:SI 2 "or_operand" "rI,N")))]
++  ""
++  "@
++   bis %r1,%2,%0
++   ornot %r1,%N2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "iordi3"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(ior:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
++		(match_operand:DI 2 "or_operand" "rI,N")))]
++  ""
++  "@
++   bis %r1,%2,%0
++   ornot %r1,%N2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*one_cmplsi_internal"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(not:SI (match_operand:SI 1 "reg_or_8bit_operand" "rI")))]
++  ""
++  "ornot $31,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "one_cmpldi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(not:DI (match_operand:DI 1 "reg_or_8bit_operand" "rI")))]
++  ""
++  "ornot $31,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*iornot<mode>3"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(ior:I48MODE
++	 (not:I48MODE (match_operand:I48MODE 1 "reg_or_8bit_operand" "rI"))
++	 (match_operand:I48MODE 2 "reg_or_0_operand" "rJ")))]
++  ""
++  "ornot %r2,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*xorsi_internal"
++  [(set (match_operand:SI 0 "register_operand" "=r,r")
++	(xor:SI (match_operand:SI 1 "reg_or_0_operand" "%rJ,rJ")
++		(match_operand:SI 2 "or_operand" "rI,N")))]
++  ""
++  "@
++   xor %r1,%2,%0
++   eqv %r1,%N2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "xordi3"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(xor:DI (match_operand:DI 1 "reg_or_0_operand" "%rJ,rJ")
++		(match_operand:DI 2 "or_operand" "rI,N")))]
++  ""
++  "@
++   xor %r1,%2,%0
++   eqv %r1,%N2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*xornot<mode>3"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(not:I48MODE (xor:I48MODE
++		      (match_operand:I48MODE 1 "register_operand" "%rJ")
++		      (match_operand:I48MODE 2 "register_operand" "rI"))))]
++  ""
++  "eqv %r1,%2,%0"
++  [(set_attr "type" "ilog")])
++
++;; Handle FFS and related insns iff we support CIX.
++
++(define_expand "ffsdi2"
++  [(set (match_dup 2)
++	(ctz:DI (match_operand:DI 1 "register_operand")))
++   (set (match_dup 3)
++	(plus:DI (match_dup 2) (const_int 1)))
++   (set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI (eq (match_dup 1) (const_int 0))
++			 (const_int 0) (match_dup 3)))]
++  ""
++{
++  operands[2] = gen_reg_rtx (DImode);
++  operands[3] = gen_reg_rtx (DImode);
++})
++
++(define_insn "clzdi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(clz:DI (match_operand:DI 1 "register_operand" "r")))]
++  ""
++  "ctlz %1,%0"
++  [(set_attr "type" "mvi")])
++
++(define_insn "ctzdi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ctz:DI (match_operand:DI 1 "register_operand" "r")))]
++  ""
++  "cttz %1,%0"
++  [(set_attr "type" "mvi")])
++
++(define_insn "popcountdi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(popcount:DI (match_operand:DI 1 "register_operand" "r")))]
++  ""
++  "ctpop %1,%0"
++  [(set_attr "type" "mvi")])
++
++(define_insn "popcountsi2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(popcount:SI (match_operand:SI 1 "register_operand" "r")))]
++  ""
++  "zapnot %1,15,%0\;ctpop %0,%0"
++  [(set_attr "type" "mvi")])
++
++(define_expand "bswapsi2"
++  [(set (match_operand:SI 0 "register_operand")
++	(bswap:SI (match_operand:SI 1 "register_operand")))]
++  "!optimize_size"
++{
++  if (TARGET_SW8A == 0 || flag_sw_rev != 1)
++   {
++  rtx t0, t1;
++
++  t0 = gen_reg_rtx (DImode);
++  t1 = gen_reg_rtx (DImode);
++
++  emit_insn (gen_inslh (t0, gen_lowpart (DImode, operands[1]), GEN_INT (7)));
++  emit_insn (gen_inswl_const (t1, gen_lowpart (HImode, operands[1]),
++			      GEN_INT (24)));
++  emit_insn (gen_iordi3 (t1, t0, t1));
++  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
++  emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x5)));
++  emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xa)));
++  emit_insn (gen_addsi3 (operands[0], gen_lowpart (SImode, t0),
++			 gen_lowpart (SImode, t1)));
++  DONE;
++   }
++  else
++   {
++      emit_insn (gen_bswapsi2_internal (operands[0], operands[1]));
++      DONE;
++   }
++})
++
++(define_expand "bswapdi2"
++  [(set (match_operand:DI 0 "register_operand")
++	(bswap:DI (match_operand:DI 1 "register_operand")))]
++  "!optimize_size"
++{
++  if (TARGET_SW8A == 0 || flag_sw_rev != 1)
++   {
++  rtx t0, t1;
++
++  t0 = gen_reg_rtx (DImode);
++  t1 = gen_reg_rtx (DImode);
++
++  /* This method of shifting and masking is not specific to Sw_64, but
++     is only profitable on Sw_64 because of our handy byte zap insn.  */
++
++  emit_insn (gen_lshrdi3 (t0, operands[1], GEN_INT (32)));
++  emit_insn (gen_ashldi3 (t1, operands[1], GEN_INT (32)));
++  emit_insn (gen_iordi3 (t1, t0, t1));
++
++  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (16)));
++  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (16)));
++  emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xcc)));
++  emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x33)));
++  emit_insn (gen_iordi3 (t1, t0, t1));
++
++  emit_insn (gen_lshrdi3 (t0, t1, GEN_INT (8)));
++  emit_insn (gen_ashldi3 (t1, t1, GEN_INT (8)));
++  emit_insn (gen_anddi3 (t0, t0, sw_64_expand_zap_mask (0xaa)));
++  emit_insn (gen_anddi3 (t1, t1, sw_64_expand_zap_mask (0x55)));
++  emit_insn (gen_iordi3 (operands[0], t0, t1));
++  DONE;
++ }
++ else
++   {
++     emit_insn (gen_bswapdi2_internal (operands[0], operands[1]));
++     DONE;
++   }
++})
++
++(define_insn "bswaphi2"
++  [(set (match_operand:HI 0 "register_operand" "=r")
++	(bswap:HI (match_operand:HI 1 "register_operand" "r")))]
++  "TARGET_SW8A && flag_sw_rev == 1"
++  "revbh %1,%0"
++  [(set_attr "isa" "sw8a")])
++
++(define_insn "bswapsi2_internal"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(bswap:SI (match_operand:SI 1 "register_operand" "r")))]
++  "TARGET_SW8A && flag_sw_rev == 1"
++  "revbw %1,%0"
++  [(set_attr "isa" "sw8a")])
++
++(define_insn "bswapdi2_internal"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(bswap:DI (match_operand:DI 1 "register_operand" "r")))]
++  "TARGET_SW8A && flag_sw_rev == 1"
++  "revbl %1,%0"
++  [(set_attr "isa" "sw8a")])
++
++(define_insn "l<frint_pattern>dfdi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(fix:DI
++	(unspec:DF [(match_operand:DF 1 "register_operand" "fG")]
++	FRINT)))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmov%-l<frint_suffix> %1, %0"
++  [(set_attr "type" "frint")])
++
++(define_insn "fix_truncdfdi2_8a"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&r,&r")
++	(fix:DI
++	(match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
++ "cmov%-l%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "l<frint_pattern>udfdi2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unsigned_fix:DI
++	(unspec:DF [(match_operand:DF 1 "register_operand" "fG")]
++	FRINT)))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmov%-lu<frint_suffix> %1, %0"
++  [(set_attr "type" "frint")])
++
++(define_insn "fixuns_truncdfdi2_internal"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&r,&r")
++	(unsigned_fix:DI
++	  (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
++  "cmov%-lu%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "i<frint_pattern>dfsi2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(fix:SI
++	(unspec:DF [(match_operand:DF 1 "register_operand" "fG")]
++	FRINT)))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmov%-w<frint_suffix> %1, %0"
++  [(set_attr "type" "frint")])
++
++;; CMOVDW_Z PART1
++(define_insn "fix_truncdfsi2_8a"
++  [(set (match_operand:SI 0 "reg_no_subreg_operand" "=&r,&r")
++	(fix:SI
++	  (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
++  "cmov%-w%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++;; CMOVDW_Z PART2
++(define_expand "fix_truncdfsi2"
++  [(set (match_operand:SI 0 "reg_no_subreg_operand")
++	(fix:SI (match_operand:DF 1 "reg_or_0_operand")))]
++  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1")
++
++(define_insn "i<frint_pattern>udfsi2"
++  [(set (match_operand:SI 0 "register_operand" "=&r,&r")
++	(unsigned_fix:SI
++	(unspec:DF [(match_operand:DF 1 "register_operand" "fG,fG")]
++	FRINT)))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmov%-wu<frint_suffix> %1, %0"
++  [(set_attr "type" "frint")])
++
++;; CMOVDWU_Z PART1
++(define_insn "*fixuns_truncdfsi2"
++  [(set (match_operand:SI 0 "reg_no_subreg_operand" "=&r,&r")
++	(unsigned_fix:SI
++	  (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1"
++  "cmov%-wu%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++;; CMOVDWU_Z PART2
++(define_expand "fixuns_truncdfsi2"
++  [(set (match_operand:SI 0 "reg_no_subreg_operand")
++	(unsigned_fix:SI (match_operand:DF 1 "reg_or_0_operand")))]
++  "TARGET_SW8A && TARGET_FP && flag_sw_cmov == 1")
++
++(define_insn "floatdisf2_8a"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovls %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatunsdisf2"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(unsigned_float:SF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovuls %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++  (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatsisf2"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(float:SF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovws %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatunssisf2"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(unsigned_float:SF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovuws %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatdidf2_8a"
++  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
++	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1 && TARGET_FP"
++  "cmovl%-%/ %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatunsdidf2"
++  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
++	(unsigned_float:DF (match_operand:DI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovuld %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatsidf2"
++  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
++	(float:DF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovwd %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "floatunssidf2"
++  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
++	(unsigned_float:DF (match_operand:SI 1 "reg_no_subreg_operand" "r,r")))]
++  "TARGET_SW8A && flag_sw_cmov == 1"
++  "cmovuwd %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "builtin_sbt"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (unspec:DI
++		       [(match_operand:DI 2 "reg_or_6bit_operand" "rI")]
++			UNSPEC_SBT)
++		(match_operand:DI 1 "register_operand" "r")))]
++  "flag_sw_bitop"
++  "sbt %r1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "builtin_cbt"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (unspec:DI
++			[(match_operand:DI 2 "reg_or_6bit_operand" "rI")]
++			 UNSPEC_CBT)
++		(match_operand:DI 1 "register_operand" "r")))]
++  "flag_sw_bitop"
++  "cbt %r1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "lshrsi3"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(lshiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
++		     (match_operand:SI 2 "reg_or_5bit_operand" "rY")))]
++  "TARGET_SW8A && flag_sw_shift_word == 1"
++  "srlw %r1,%2,%0"
++  [(set_attr "type" "shift")
++   (set_attr "isa" "sw8a")])
++
++(define_insn "ashrsi3"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(ashiftrt:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
++		     (match_operand:SI 2 "reg_or_5bit_operand" "rY")))]
++  "TARGET_SW8A && flag_sw_shift_word == 1"
++  "sraw %r1,%2,%0"
++  [(set_attr "type" "shift")
++   (set_attr "isa" "sw8a")])
++
++(define_insn "rotlsi3"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(subreg:SI
++	(zero_extend:DI (rotate:SI (match_operand:SI 1 "reg_or_0_operand" "rJ")
++				   (match_operand:SI 2 "reg_or_5bit_operand" "rY"))) 0))]
++  "TARGET_SW8A && flag_sw_shift_word == 1"
++  "rolw %r1,%2,%0"
++  [(set_attr "type" "shift")
++   (set_attr "isa" "sw8a")])
++
++(define_insn "rotldi3"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(rotate:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++		   (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
++  "TARGET_SW8A && flag_sw_shift_word == 1"
++  "roll %r1,%2,%0"
++  [(set_attr "type" "shift")
++   (set_attr "isa" "sw8a")])
++
++;; Next come the shifts and the various extract and insert operations.
++
++(define_insn "ashldi3"
++  [(set (match_operand:DI 0 "register_operand" "=r,r")
++	(ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ,rJ")
++		   (match_operand:DI 2 "reg_or_6bit_operand" "P,rS")))]
++  ""
++{
++  switch (which_alternative)
++    {
++    case 0:
++      if (operands[2] == const1_rtx)
++	return "addl %r1,%r1,%0";
++      else
++      return "sll %r1,%2,%0";
++    case 1:
++     if (TARGET_SW8A == 0 || flag_sw_shift_word != 1)
++	return "sll %r1,%2,%0";
++      else
++	return "slll %r1,%2,%0";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type" "iadd,shift")])
++
++(define_expand "ashlsi3"
++  [(set (match_operand:SI 0 "register_operand")
++	(ashift:SI (match_operand:SI 1 "reg_or_0_operand")
++		   (match_operand:SI 2 "reg_or_5bit_operand")))])
++
++(define_insn "*ashlsi3_sll"
++  [(set (match_operand:SI 0 "register_operand" "=r,&r")
++	(ashift:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,rJ")
++		   (match_operand:SI 2 "reg_or_5bit_operand" "P,rS")))]
++  "TARGET_SW8A == 0 || flag_sw_shift_word != 1"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      if (operands[2] == const1_rtx)
++	return "addw %r1,%r1,%0";
++      else
++	return "s%P2addw %r1,0,%0";
++    case 1:
++      if (REG_P (operands[2]))
++	return "and %2,31,%0\;sll %r1,%0,%0";
++      else
++	return "sll %r1,%2,%0";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type" "iadd,shift")])
++
++(define_insn "*ashlsi3_sllw"
++  [(set (match_operand:SI 0 "register_operand" "=r,r")
++	(ashift:SI (match_operand:SI 1 "reg_or_0_operand" "rJ,rJ")
++		   (match_operand:SI 2 "reg_or_5bit_operand" "P,rY")))]
++  "TARGET_SW8A && flag_sw_shift_word == 1"
++{
++  switch (which_alternative)
++    {
++    case 0:
++      if (operands[2] == const1_rtx)
++	return "addw %r1,%r1,%0";
++      else
++	return "s%P2addw %r1,0,%0";
++    case 1:
++      return "sllw %r1,%2,%0";
++    default:
++      gcc_unreachable ();
++    }
++}
++  [(set_attr "type" "iadd,shift")
++   (set_attr "isa" "*,sw8a")])
++
++(define_insn "*ashldi_se"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	 (subreg:SI (ashift:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++			       (match_operand:DI 2 "const_int_operand" "P"))
++		    0)))]
++  "IN_RANGE (INTVAL (operands[2]), 1, 3)"
++{
++  if (operands[2] == const1_rtx)
++    return "addw %r1,%r1,%0";
++  else
++    return "s%P2addw %r1,0,%0";
++}
++  [(set_attr "type" "iadd")])
++
++(define_insn "lshrdi3"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
++  ""
++{
++  if (TARGET_SW8A == 0 || flag_sw_shift_word != 1)
++    return "srl %r1,%2,%0";
++  else
++    return "srll %r1,%2,%0";
++}
++  [(set_attr "type" "shift")])
++
++(define_insn "ashrdi3"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++		     (match_operand:DI 2 "reg_or_6bit_operand" "rS")))]
++  ""
++{
++  if (TARGET_SW8A == 0 || flag_sw_shift_word != 1)
++    return "sra %r1,%2,%0";
++  else
++    return "sral %r1,%2,%0";
++}
++  [(set_attr "type" "shift")])
++
++(define_insn "extendqi<mode>2"
++  [(set (match_operand:I24MODE 0 "register_operand" "=r")
++	(sign_extend:I24MODE
++	 (match_operand:QI 1 "register_operand" "r")))]
++  ""
++  "sextb %1,%0"
++  [(set_attr "type" "shift")])
++
++(define_expand "extendqidi2"
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extend:DI (match_operand:QI 1 "general_operand")))]
++  ""
++{
++    operands[1] = force_reg (QImode, operands[1]);
++})
++
++(define_insn "*extendqidi2_bwx"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI (match_operand:QI 1 "register_operand" "r")))]
++  ""
++  "sextb %1,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "extendhisi2"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(sign_extend:SI (match_operand:HI 1 "register_operand" "r")))]
++  ""
++  "sexth %1,%0"
++  [(set_attr "type" "shift")])
++
++(define_expand "extendhidi2"
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extend:DI (match_operand:HI 1 "general_operand")))]
++  ""
++{
++    operands[1] = force_reg (HImode, operands[1]);
++})
++
++(define_insn "*extendhidi2_bwx"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI (match_operand:HI 1 "register_operand" "r")))]
++  ""
++  "sexth %1,%0"
++  [(set_attr "type" "shift")])
++
++;; Here's how we sign extend an unaligned byte and halfword.  Doing this
++;; as a pattern saves one instruction.  The code is similar to that for
++;; the unaligned loads (see below).
++;;
++;; Operand 1 is the address, operand 0 is the result.
++
++(define_expand "unaligned_extendqidi"
++  [(set (match_dup 3)
++	(mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8))))
++   (set (match_dup 4)
++	(ashift:DI (match_dup 3)
++		   (minus:DI (const_int 64)
++			     (ashift:DI
++			      (and:DI (match_dup 2) (const_int 7))
++			      (const_int 3)))))
++   (set (match_operand:QI 0 "register_operand")
++	(ashiftrt:DI (match_dup 4) (const_int 56)))]
++  ""
++{
++  operands[0] = gen_lowpart (DImode, operands[0]);
++  operands[2] = get_unaligned_offset (operands[1], 1);
++  operands[3] = gen_reg_rtx (DImode);
++  operands[4] = gen_reg_rtx (DImode);
++})
++
++(define_expand "unaligned_extendhidi"
++  [(set (match_dup 3)
++	(mem:DI (and:DI (match_operand:DI 1 "address_operand") (const_int -8))))
++   (set (match_dup 4)
++	(ashift:DI (match_dup 3)
++		   (minus:DI (const_int 64)
++			     (ashift:DI
++			      (and:DI (match_dup 2) (const_int 7))
++			      (const_int 3)))))
++   (set (match_operand:HI 0 "register_operand")
++	(ashiftrt:DI (match_dup 4) (const_int 48)))]
++  ""
++{
++  operands[0] = gen_lowpart (DImode, operands[0]);
++  operands[2] = get_unaligned_offset (operands[1], 2);
++  operands[3] = gen_reg_rtx (DImode);
++  operands[4] = gen_reg_rtx (DImode);
++})
++
++;; add if condition
++(define_insn "*extxl_const"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++			 (match_operand:DI 2 "mode_width_operand" "n")
++			 (match_operand:DI 3 "mul8_operand" "I")))]
++  ""
++{
++  if (INTVAL (operands[2])==8)
++    return "extlb %r1,%s3,%0";
++  else if (INTVAL (operands[2])==16)
++    return "extlh %r1,%s3,%0";
++  else if (INTVAL (operands[2])==32)
++    return "extlw %r1,%s3,%0";
++  else if (INTVAL (operands[2])==64)
++    return "extll %r1,%s3,%0";
++}
++  [(set_attr "type" "shift")])
++
++;; add if condition
++(define_insn "extxl"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extract:DI
++	  (match_operand:DI 1 "reg_or_0_operand" "rJ")
++	  (match_operand:DI 2 "mode_width_operand" "n")
++	  (ashift:DI (match_operand:DI 3 "reg_or_8bit_operand" "rI")
++		     (const_int 3))))]
++  ""
++{
++  if (INTVAL (operands[2])==8)
++    return "extlb %r1,%3,%0";
++  else if (INTVAL (operands[2])==16)
++    return "extlh %r1,%3,%0";
++  else if (INTVAL (operands[2])==32)
++    return "extlw %r1,%3,%0";
++  else if (INTVAL (operands[2])==64)
++    return "extll %r1,%3,%0";
++}
++  [(set_attr "type" "shift")])
++
++;; Combine has some strange notion of preserving existing undefined behavior
++;; in shifts larger than a word size.  So capture these patterns that it
++;; should have turned into zero_extracts.
++
++;; add if condition
++(define_insn "*extxl_1"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++		  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++			     (const_int 3)))
++	     (match_operand:DI 3 "mode_mask_operand" "n")))]
++  ""
++{
++  if (INTVAL (operands[3]) == 0xff)
++    return "extlb %r1,%2,%0";
++  else if (INTVAL (operands[3]) == 0xffff)
++    return "extlh %r1,%2,%0";
++  else if (INTVAL (operands[3]) == 0xffffffff)
++    return "extlw %r1,%2,%0";
++  else if (INTVAL (operands[3]) == -1)
++    return "extll %r1,%2,%0";
++}
++  [(set_attr "type" "shift")])
++
++(define_insn "*extql_2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lshiftrt:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++	  (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++		     (const_int 3))))]
++  ""
++  "extll %1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "extqh"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI
++	 (match_operand:DI 1 "reg_or_0_operand" "rJ")
++	  (minus:DI (const_int 64)
++		    (ashift:DI
++		     (and:DI
++		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++		      (const_int 7))
++		     (const_int 3)))))]
++  ""
++  "exthl %r1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "extwh"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI
++	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++		 (const_int 65535))
++	 (minus:DI (const_int 64)
++		    (ashift:DI
++		     (and:DI
++		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++		      (const_int 7))
++		     (const_int 3)))))]
++  ""
++  "exthh %r1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "extlh"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI
++	 (and:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++		 (const_int 2147483647))
++	 (minus:DI (const_int 64)
++		    (ashift:DI
++		     (and:DI
++		      (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++		      (const_int 7))
++		     (const_int 3)))))]
++  ""
++  "exthw %r1,%2,%0"
++  [(set_attr "type" "shift")])
++
++;; This converts an extXl into an extXh with an appropriate adjustment
++;; to the address calculation.
++
++(define_insn "insbl_const"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (zero_extend:DI
++		    (match_operand:QI 1 "register_operand" "r"))
++		   (match_operand:DI 2 "mul8_operand" "I")))]
++  ""
++  "inslb %1,%s2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "inswl_const"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (zero_extend:DI
++		    (match_operand:HI 1 "register_operand" "r"))
++		   (match_operand:DI 2 "mul8_operand" "I")))]
++  ""
++  "inslh %1,%s2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "insll_const"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (zero_extend:DI
++		    (match_operand:SI 1 "register_operand" "r"))
++		   (match_operand:DI 2 "mul8_operand" "I")))]
++  ""
++  "inslw %1,%s2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "insbl"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (zero_extend:DI
++		    (match_operand:QI 1 "register_operand" "r"))
++		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++			      (const_int 3))))]
++  ""
++  "inslb %1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "inswl"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (zero_extend:DI
++		    (match_operand:HI 1 "register_operand" "r"))
++		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++			      (const_int 3))))]
++  ""
++  "inslh %1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "insll"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (zero_extend:DI
++		    (match_operand:SI 1 "register_operand" "r"))
++		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++			      (const_int 3))))]
++  ""
++  "inslw %1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "insql"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(ashift:DI (match_operand:DI 1 "register_operand" "r")
++		   (ashift:DI (match_operand:DI 2 "reg_or_8bit_operand" "rI")
++			      (const_int 3))))]
++  ""
++  "insll %1,%2,%0"
++  [(set_attr "type" "shift")])
++
++;; Combine has this sometimes habit of moving the and outside of the
++;; shift, making life more interesting.
++
++(define_insn "*insxl"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
++			   (match_operand:DI 2 "mul8_operand" "I"))
++		(match_operand:DI 3 "const_int_operand" "i")))]
++  "((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
++    == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
++    || ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
++	== (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
++    || ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
++	== (unsigned HOST_WIDE_INT) INTVAL (operands[3]))"
++{
++#if HOST_BITS_PER_WIDE_INT == 64
++  if ((unsigned HOST_WIDE_INT) 0xff << INTVAL (operands[2])
++      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
++    return "inslb %1,%s2,%0";
++  if ((unsigned HOST_WIDE_INT) 0xffff << INTVAL (operands[2])
++      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
++    return "inslh %1,%s2,%0";
++  if ((unsigned HOST_WIDE_INT) 0xffffffff << INTVAL (operands[2])
++      == (unsigned HOST_WIDE_INT) INTVAL (operands[3]))
++    return "inslw %1,%s2,%0";
++#endif
++  gcc_unreachable ();
++}
++  [(set_attr "type" "shift")])
++
++;; We do not include the insXh insns because they are complex to express
++;; and it does not appear that we would ever want to generate them.
++;;
++;; Since we need them for block moves, though, cop out and use unspec.
++
++(define_insn "insxh"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "mode_width_operand" "n")
++		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
++		   UNSPEC_INSXH))]
++  ""
++{
++  if (INTVAL (operands[2])==16)
++    return "inshh %r1,%3,%0";
++  else if (INTVAL (operands[2])==32)
++    return "inshw %r1,%3,%0";
++  else if (INTVAL (operands[2])==64)
++    return "inshl %r1,%3,%0";
++}
++  [(set_attr "type" "shift")])
++
++(define_insn "mskxl"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (not:DI (ashift:DI
++			 (match_operand:DI 2 "mode_mask_operand" "n")
++			 (ashift:DI
++			  (match_operand:DI 3 "reg_or_8bit_operand" "rI")
++			  (const_int 3))))
++		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
++  ""
++{
++  if (INTVAL (operands[2]) == 0xff)
++    return "masklb %r1,%3,%0";
++  else if (INTVAL (operands[2]) == 0xffff)
++    return "masklh %r1,%3,%0";
++  else if (INTVAL (operands[2]) == 0xffffffff)
++    return "masklw %r1,%3,%0";
++  else if (INTVAL (operands[2]) == -1)
++    return "maskll %r1,%3,%0";
++}
++  [(set_attr "type" "shift")])
++
++;; We do not include the mskXh insns because it does not appear we would
++;; ever generate one.
++;;
++;; Again, we do for block moves and we use unspec again.
++
++(define_insn "mskxh"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "mode_width_operand" "n")
++		    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]
++		   UNSPEC_MSKXH))]
++  ""
++{
++  if (INTVAL (operands[2])==16)
++    return "maskhh %r1,%3,%0";
++  else if (INTVAL (operands[2])==32)
++    return "maskhw %r1,%3,%0";
++  else if (INTVAL (operands[2])==64)
++    return "maskhl %r1,%3,%0";
++}
++  [(set_attr "type" "shift")])
++
++
++(define_insn_and_split "*ze_and_ne"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++			 (const_int 1)
++			 (match_operand 2 "const_int_operand" "I")))]
++  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
++  "#"
++  "(unsigned HOST_WIDE_INT) INTVAL (operands[2]) < 8"
++  [(set (match_dup 0)
++	(and:DI (match_dup 1) (match_dup 3)))
++   (set (match_dup 0)
++	(ne:DI (match_dup 0) (const_int 0)))]
++  "operands[3] = GEN_INT (1 << INTVAL (operands[2]));")
++
++;; Floating-point operations.  All the double-precision insns can extend
++;; from single, so indicate that.  The exception are the ones that simply
++;; play with the sign bits; it's not clear what to do there.
++
++(define_mode_iterator FMODE [SF DF])
++
++(define_mode_attr opmode [(SF "si") (DF "di")])
++
++(define_insn "abs<mode>2"
++  [(set (match_operand:FMODE 0 "register_operand" "=f")
++	(abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
++  "TARGET_FP"
++  "fcpys $f31,%R1,%0"
++  [(set_attr "type" "fcpys")])
++
++(define_insn "*nabs<mode>2"
++  [(set (match_operand:FMODE 0 "register_operand" "=f")
++	(neg:FMODE
++	 (abs:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP"
++  "fcpysn $f31,%R1,%0"
++  [(set_attr "type" "fadd")])
++
++(define_expand "abstf2"
++  [(parallel [(set (match_operand:TF 0 "register_operand")
++		   (abs:TF (match_operand:TF 1 "reg_or_0_operand")))
++	      (use (match_dup 2))])]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "operands[2] = force_reg (DImode, GEN_INT (HOST_WIDE_INT_1U << 63));")
++
++(define_insn_and_split "*abstf_internal"
++  [(set (match_operand:TF 0 "register_operand" "=r")
++	(abs:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
++   (use (match_operand:DI 2 "register_operand" "r"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  "sw_64_split_tfmode_frobsign (operands, gen_andnotdi3); DONE;")
++
++(define_insn "neg<mode>2"
++  [(set (match_operand:FMODE 0 "register_operand" "=f")
++	(neg:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")))]
++  "TARGET_FP"
++  "fcpysn %R1,%R1,%0"
++  [(set_attr "type" "fadd")])
++
++(define_expand "negtf2"
++  [(parallel [(set (match_operand:TF 0 "register_operand")
++		   (neg:TF (match_operand:TF 1 "reg_or_0_operand")))
++	      (use (match_dup 2))])]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "operands[2] = force_reg (DImode, GEN_INT ((HOST_WIDE_INT) 1 << 63));")
++
++(define_insn_and_split "*negtf_internal"
++  [(set (match_operand:TF 0 "register_operand" "=r")
++	(neg:TF (match_operand:TF 1 "reg_or_0_operand" "rG")))
++   (use (match_operand:DI 2 "register_operand" "r"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "#"
++  "&& reload_completed"
++  [(const_int 0)]
++  "sw_64_split_tfmode_frobsign (operands, gen_xordi3); DONE;")
++
++(define_insn "copysign<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=f")
++	(unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG")
++		       (match_operand:FMODE 2 "reg_or_0_operand" "fG")]
++		      UNSPEC_COPYSIGN))]
++  "TARGET_FP"
++  "fcpys %R2,%R1,%0"
++  [(set_attr "type" "fadd")])
++
++(define_insn "*ncopysign<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=f")
++	(neg:FMODE
++	 (unspec:FMODE [(match_operand:FMODE 1 "reg_or_0_operand" "fG")
++			(match_operand:FMODE 2 "reg_or_0_operand" "fG")]
++		       UNSPEC_COPYSIGN)))]
++  "TARGET_FP"
++  "fcpysn %R2,%R1,%0"
++  [(set_attr "type" "fadd")])
++
++(define_insn "*add<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
++	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
++		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fadd<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*add<mode>3_same"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
++		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 1"
++  "fadd<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "add<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(plus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
++		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_FP"
++  ""
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "*fmasf4"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(fma:SF (match_operand:SF 1 "register_operand" "f")
++		   (match_operand:SF 2 "register_operand" "f")
++		   (match_operand:SF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 0"
++  "fmas %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fmasf4_same"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF (match_operand:SF 1 "register_operand" "f")
++		   (match_operand:SF 2 "register_operand" "f")
++		   (match_operand:SF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 1"
++  "fmas %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fmasf4"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF (match_operand:SF 1 "register_operand" "f")
++		   (match_operand:SF 2 "register_operand" "f")
++		   (match_operand:SF 3 "register_operand" "f")))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fmadf4"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(fma:DF (match_operand:DF 1 "register_operand" "f")
++		   (match_operand:DF 2 "register_operand" "f")
++		   (match_operand:DF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 0"
++  "fmad %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fmadf4_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF (match_operand:DF 1 "register_operand" "f")
++		   (match_operand:DF 2 "register_operand" "f")
++		   (match_operand:DF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 1"
++  "fmad %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fmadf4"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF (match_operand:DF 1 "register_operand" "f")
++		   (match_operand:DF 2 "register_operand" "f")
++		   (match_operand:DF 3 "register_operand" "f")))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fmssf4"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(fma:SF
++	       (match_operand:SF 1 "register_operand" "f")
++	       (match_operand:SF 2 "register_operand" "f")
++	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 0"
++  "fmss %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fmssf4_same"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF
++	       (match_operand:SF 1 "register_operand" "f")
++	       (match_operand:SF 2 "register_operand" "f")
++	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 1"
++  "fmss %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fmssf4"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF
++	       (match_operand:SF 1 "register_operand" "f")
++	       (match_operand:SF 2 "register_operand" "f")
++	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fmsdf4"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(fma:DF
++		   (match_operand:DF 1 "register_operand" "f")
++		   (match_operand:DF 2 "register_operand" "f")
++		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 0"
++  "fmsd %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fmsdf4_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF
++		   (match_operand:DF 1 "register_operand" "f")
++		   (match_operand:DF 2 "register_operand" "f")
++		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 1"
++  "fmsd %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fmsdf4"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF
++		   (match_operand:DF 1 "register_operand" "f")
++		   (match_operand:DF 2 "register_operand" "f")
++		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fnmasf4"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(fma:SF
++	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
++	       (match_operand:SF 2 "register_operand" "f")
++	       (match_operand:SF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 0"
++  "fnmas %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fnmasf4_same"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF
++	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
++	       (match_operand:SF 2 "register_operand" "f")
++	       (match_operand:SF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 1"
++  "fnmas %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fnmasf4"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF
++	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
++	       (match_operand:SF 2 "register_operand" "f")
++	       (match_operand:SF 3 "register_operand" "f")))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fnmadf4"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(fma:DF
++		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
++		   (match_operand:DF 2 "register_operand" "f")
++		   (match_operand:DF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 0"
++  "fnmad %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fnmadf4_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF
++		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
++		   (match_operand:DF 2 "register_operand" "f")
++		   (match_operand:DF 3 "register_operand" "f")))]
++  "flag_sw_sdsame == 1"
++  "fnmad %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fnmadf4"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF
++		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
++		   (match_operand:DF 2 "register_operand" "f")
++		   (match_operand:DF 3 "register_operand" "f")))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fnmssf4"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(fma:SF
++	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
++	       (match_operand:SF 2 "register_operand" "f")
++	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 0"
++  "fnmss %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fnmssf4_same"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF
++	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
++	       (match_operand:SF 2 "register_operand" "f")
++	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 1"
++  "fnmss %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fnmssf4"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(fma:SF
++	       (neg:SF (match_operand:SF 1 "register_operand" "f"))
++	       (match_operand:SF 2 "register_operand" "f")
++	       (neg:SF (match_operand:SF 3 "register_operand" "f"))))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*fnmsdf4"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(fma:DF
++		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
++		   (match_operand:DF 2 "register_operand" "f")
++		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 0"
++  "fnmsd %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*fnmsdf4_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF
++		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
++		   (match_operand:DF 2 "register_operand" "f")
++		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
++  "flag_sw_sdsame == 1"
++  "fnmsd %R1,%R2,%R3,%0"
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "fnmsdf4"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(fma:DF
++		   (neg:DF (match_operand:DF 1 "register_operand" "f"))
++		   (match_operand:DF 2 "register_operand" "f")
++		   (neg:DF (match_operand:DF 3 "register_operand" "f"))))]
++  "flag_sw_fma==1 && TARGET_FP"
++  ""
++  [(set_attr "type" "fmadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*adddf_ext1"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(plus:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fadd%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*adddf_ext1_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(plus:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fadd%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*adddf_ext2"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(plus:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
++		 (float_extend:DF
++		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fadd%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*adddf_ext2_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(plus:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
++		 (float_extend:DF
++		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fadd%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "addtf3"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))
++   (use (match_operand:TF 2 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_arith (PLUS, operands); DONE;")
++
++(define_insn "*sub<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
++	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		     (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fsub<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*sub<mode>3_same"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		     (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 1"
++  "fsub<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "sub<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(minus:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		     (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_FP"
++  ""
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "*subdf_ext1"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(minus:DF (float_extend:DF
++		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fsub%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*subdf_ext1_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(minus:DF (float_extend:DF
++		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		  (match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fsub%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*subdf_ext2"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
++		  (float_extend:DF
++		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fsub%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*subdf_ext2_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(minus:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
++		  (float_extend:DF
++		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fsub%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*subdf_ext3"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(minus:DF (float_extend:DF
++		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		  (float_extend:DF
++		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fsub%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*subdf_ext3_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(minus:DF (float_extend:DF
++		   (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		  (float_extend:DF
++		   (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fsub%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "subtf3"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))
++   (use (match_operand:TF 2 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_arith (MINUS, operands); DONE;")
++
++(define_insn "*mul<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
++	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
++		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fmul<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*mul<mode>3_same"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
++		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 1"
++  "fmul<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "mul<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(mult:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "%fG,fG")
++		    (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_FP"
++  ""
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "*muldf_ext1"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(mult:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fmul%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*muldf_ext1_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(mult:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		 (match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fmul%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*muldf_ext2"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(mult:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
++		 (float_extend:DF
++		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fmul%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*muldf_ext2_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(mult:DF (float_extend:DF
++		  (match_operand:SF 1 "reg_or_0_operand" "%fG"))
++		 (float_extend:DF
++		  (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fmul%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fmul")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "multf3"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))
++   (use (match_operand:TF 2 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_arith (MULT, operands); DONE;")
++
++(define_insn "div<mode>3_ieee"
++  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
++	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fdiv<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++;; Floating point reciprocal approximation
++(define_insn "fre<mode>"
++  [(set (match_operand:SFDF 0 "register_operand" "=f")
++	(unspec:SFDF [(match_operand:SFDF 1 "register_operand" "f")]
++		     UNSPEC_FRECX))]
++  "(flag_sw_recip || flag_sw_recip_precision) && flag_reciprocal_math && TARGET_SW8A"
++  "frec<SD> %1,%0"
++  [(set_attr "type" "fp")])
++
++(define_insn "*div<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
++	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fdiv<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*div<mode>3_same"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 1"
++  "fdiv<modesuffix>%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_expand "div<mode>3"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")
++		   (match_operand:FMODE 2 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_FP"
++{
++     if ((flag_sw_recip || flag_sw_recip_precision) && flag_reciprocal_math && TARGET_SW8A)
++      {
++	if (operands[1] == CONST0_RTX (<MODE>mode))
++	  operands[1] = gen_move_reg (operands[1]);
++
++	if (operands[2] == CONST0_RTX (<MODE>mode))
++	  operands[2] = gen_move_reg (operands[2]);
++
++	 sw_64_emit_swdiv (operands[0], operands[1], operands[2], true);
++	 DONE;
++      }
++
++})
++
++(define_insn "*div<mode>3_fpr"
++  [(set (match_operand:FMODE 0 "register_operand" "=f")
++	(div:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG")
++		   (match_operand:FMODE 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP"
++  "fdiv<modesuffix>%/  %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*divdf_ext1"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fdiv%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*divdf_ext1_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(div:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		(match_operand:DF 2 "reg_or_0_operand" "fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fdiv%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*divdf_ext2"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
++		(float_extend:DF
++		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fdiv%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*divdf_ext2_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(div:DF (match_operand:DF 1 "reg_or_0_operand" "fG")
++		(float_extend:DF
++		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fdiv%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++
++(define_insn "*divdf_ext3"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(div:DF (float_extend:DF
++		 (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		(float_extend:DF
++		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fdiv%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_insn "*divdf_ext3_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(div:DF (float_extend:DF
++		 (match_operand:SF 1 "reg_or_0_operand" "fG"))
++		(float_extend:DF
++		 (match_operand:SF 2 "reg_or_0_operand" "fG"))))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fdiv%-%/ %R1,%R2,%0"
++  [(set_attr "type" "fdiv")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")])
++(define_expand "divtf3"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))
++   (use (match_operand:TF 2 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_arith (DIV, operands); DONE;")
++
++;; frint floating-point round to integral standard patterns.
++(define_insn "<frint_pattern><mode>2"
++  [(set (match_operand:SFDF 0 "register_operand" "=f")
++	(unspec:SFDF [(match_operand:SFDF 1 "register_operand" "f")]
++	 FRINT))]
++  "TARGET_SW8A && flag_sw_fprnd"
++  "fri<SD><frint_suffix> %1, %0"
++  [(set_attr "type" "frint")])
++
++(define_insn "*sqrt<mode>2"
++  [(set (match_operand:FMODE 0 "register_operand" "=&f,&f")
++	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fsqrt<modesuffix>%/ %R1,%0"
++  [(set_attr "type" "fsqrt")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*sqrt<mode>2_same"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 1"
++  "fsqrt<modesuffix>%/ %R1,%0"
++  [(set_attr "type" "fsqrt")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "sqrt<mode>2"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(sqrt:FMODE (match_operand:FMODE 1 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_FP"
++  ""
++  [(set_attr "type" "fsqrt")
++   (set_attr "opsize" "<opmode>")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++;; Define conversion operators between DFmode and SImode, using the cvtql
++;; instruction.  To allow combine et al to do useful things, we keep the
++;; operation as a unit until after reload, at which point we split the
++;; instructions.
++;;
++;; Note that we (attempt to) only consider this optimization when the
++;; ultimate destination is memory.  If we will be doing further integer
++;; processing, it is cheaper to do the truncation in the int regs.
++
++(define_insn "*cvtql"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")]
++		   UNSPEC_CVTQL))]
++  "TARGET_FP && flag_sw_sdsame == 0"
++  "fcvtlw%/ %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "v_sv")])
++(define_insn "*cvtql_same"
++  [(set (match_operand:SF 0 "register_operand" "=f")
++	(unspec:SF [(match_operand:DI 1 "reg_or_0_operand" "fG")]
++		   UNSPEC_CVTQL))]
++  "TARGET_FP && flag_sw_sdsame == 1"
++  "fcvtlw%/ %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "v_sv")])
++
++(define_insn_and_split "*fix_truncdfsi_ieee"
++  [(set (match_operand:SI 0 "memory_operand" "=m")
++	(subreg:SI
++	  (match_operator:DI 4 "fix_operator"
++	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)]))
++   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
++   (set (match_dup 5) (match_dup 3))]
++{
++  operands[5] = adjust_address (operands[0], SFmode, 0);
++}
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")])
++
++;; mieee-opt
++(define_insn_and_split "*fix_truncdfsi_internal"
++  [(set (match_operand:SI 0 "memory_operand" "=m")
++	(subreg:SI
++	  (match_operator:DI 4 "fix_operator"
++	    [(match_operand:DF 1 "reg_or_0_operand" "fG")]) 0))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 2) (match_op_dup 4 [(match_dup 1)]))
++   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
++   (set (match_dup 5) (match_dup 3))]
++{
++  //operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
++  operands[5] = adjust_address (operands[0], SFmode, 0);
++}
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")])
++
++(define_insn "*fix_truncdfdi2"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f,&f")
++	(match_operator:DI 2 "fix_operator"
++	  [(match_operand:DF 1 "reg_or_0_operand" "fG,fG")]))]
++  "TARGET_FP && ((flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B)"
++  "fcvt%-l%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*fix_truncdfdi2_same"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f,f")
++	(match_operator:DI 2 "fix_operator"
++	  [(match_operand:DF 1 "reg_or_0_operand" "fG,fG")]))]
++  "TARGET_FP && ((flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B)"
++  "fcvt%-l%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "fix_truncdfdi2"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand")
++	(fix:DI (match_operand:DF 1 "reg_or_0_operand")))]
++  "TARGET_FP")
++
++(define_expand "fixuns_truncdfdi2"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand")
++	(unsigned_fix:DI (match_operand:DF 1 "reg_or_0_operand")))]
++  "TARGET_FP"
++{
++  if ((TARGET_SW8A == 1 && flag_sw_cmov != 1) || TARGET_SW6B)
++  {
++  rtx reg1 = gen_reg_rtx (DFmode);
++  rtx reg2 = gen_reg_rtx (DFmode);
++  rtx reg3 = gen_reg_rtx (DImode);
++  rtx_code_label *label1 = gen_label_rtx ();
++  rtx_code_label *label2 = gen_label_rtx ();
++  rtx test;
++  REAL_VALUE_TYPE offset;
++
++  real_2expN (&offset, 63, DFmode);
++
++  emit_move_insn (reg1, const_double_from_real_value (offset, DFmode));
++  do_pending_stack_adjust ();
++
++  test = gen_rtx_GE (VOIDmode, operands[1], reg1);
++  emit_jump_insn (gen_cbranchdf4 (test, operands[1], reg1, label1));
++
++  emit_insn (gen_fix_truncdfdi2 (operands[0], operands[1]));
++  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2)));
++  emit_barrier ();
++
++  emit_label (label1);
++  emit_move_insn (reg2, gen_rtx_MINUS (DFmode, operands[1], reg1));
++  emit_move_insn (reg3, GEN_INT (BITMASK_HIGH));
++  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
++
++  emit_insn (gen_fix_truncdfdi2 (operands[0], reg2));
++  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
++
++  emit_label (label2);
++
++  /* Allow REG_NOTES to be set on last insn (labels don't have enough
++     fields, and can't be used for REG_NOTES anyway).  */
++  emit_use (stack_pointer_rtx);
++  DONE;
++  }
++  else
++  {
++  emit_insn (gen_fixuns_truncdfdi2_internal (operands[0], operands[1]));
++  DONE;
++  }
++})
++
++
++;; Likewise between SFmode and SImode.
++
++(define_insn_and_split "*fix_truncsfsi_ieee"
++  [(set (match_operand:SI 0 "memory_operand" "=m")
++	(subreg:SI
++	  (match_operator:DI 4 "fix_operator"
++	    [(float_extend:DF
++	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && ((sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0) || TARGET_SW6B)"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))]))
++   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
++   (set (match_dup 5) (match_dup 3))]
++  "operands[5] = adjust_address (operands[0], SFmode, 0);"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")])
++
++;; mieee-opt
++(define_insn_and_split "*fix_truncsfsi_internal"
++  [(set (match_operand:SI 0 "memory_operand" "=m")
++	(subreg:SI
++	  (match_operator:DI 4 "fix_operator"
++	    [(float_extend:DF
++	       (match_operand:SF 1 "reg_or_0_operand" "fG"))]) 0))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && ((sw_64_fptm < SW_64_FPTM_SU && flag_sw_cmov == 0) || TARGET_SW6B)"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 2) (match_op_dup 4 [(float_extend:DF (match_dup 1))]))
++   (set (match_dup 3) (unspec:SF [(match_dup 2)] UNSPEC_CVTQL))
++   (set (match_dup 5) (match_dup 3))]
++{
++ // operands[4] = gen_rtx_REG (SFmode, REGNO (operands[2]));
++  operands[5] = adjust_address (operands[0], SFmode, 0);
++}
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")])
++
++(define_insn "*fix_truncsfdi2"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=&f,&f")
++	(match_operator:DI 2 "fix_operator"
++	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,fG"))]))]
++  "TARGET_FP && ((flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B)"
++  "fcvt%-l%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*fix_truncsfdi2_same"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand" "=f,f")
++	(match_operator:DI 2 "fix_operator"
++	  [(float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,fG"))]))]
++  "TARGET_FP && ((flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B)"
++  "fcvt%-l%T2 %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "c")
++   (set_attr "trap_suffix" "v_sv_svi")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "fix_truncsfdi2"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand")
++	(fix:DI (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))]
++  "TARGET_FP && flag_sw_cmov == 0")
++
++(define_expand "fixuns_truncsfdi2"
++  [(set (match_operand:DI 0 "reg_no_subreg_operand")
++	(unsigned_fix:DI
++	  (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))))]
++  "TARGET_FP"
++{
++  if ( (TARGET_SW8A == 1 && flag_sw_cmov != 1) || TARGET_SW6B)
++   {
++  rtx reg1 = gen_reg_rtx (SFmode);
++  rtx reg2 = gen_reg_rtx (DFmode);
++  rtx reg3 = gen_reg_rtx (DImode);
++  rtx reg4 = gen_reg_rtx (DFmode);
++  rtx reg5 = gen_reg_rtx (DFmode);
++  rtx_code_label *label1 = gen_label_rtx ();
++  rtx_code_label *label2 = gen_label_rtx ();
++  rtx test;
++  REAL_VALUE_TYPE offset;
++
++  real_2expN (&offset, 63, SFmode);
++
++  emit_move_insn (reg1, const_double_from_real_value (offset, SFmode));
++  do_pending_stack_adjust ();
++
++  test = gen_rtx_GE (SFmode, operands[1], reg1);
++  emit_insn (gen_extendsfdf2 (reg4, reg1));
++  emit_insn (gen_extendsfdf2 (reg2, operands[1]));
++  emit_jump_insn (gen_cbranchdf4 (test, reg2, reg4, label1));
++
++  emit_insn (gen_fix_truncdfdi2 (operands[0], reg2));
++  emit_jump_insn (gen_rtx_SET (pc_rtx, gen_rtx_LABEL_REF (VOIDmode, label2)));
++  emit_barrier ();
++
++  emit_label (label1);
++  emit_move_insn (reg5, gen_rtx_MINUS (DFmode, reg2, reg4));
++  emit_move_insn (reg3, GEN_INT (BITMASK_HIGH));
++  emit_insn (gen_ashldi3 (reg3, reg3, GEN_INT (32)));
++
++  emit_insn (gen_fix_truncdfdi2 (operands[0], reg5));
++  emit_insn (gen_iordi3 (operands[0], operands[0], reg3));
++
++  emit_label (label2);
++
++  /* Allow REG_NOTES to be set on last insn (labels don't have enough
++     fields, and can't be used for REG_NOTES anyway).  */
++  emit_use (stack_pointer_rtx);
++  DONE;
++   }
++   else
++   {
++  rtx reg2 = gen_reg_rtx (DFmode);
++  emit_insn (gen_extendsfdf2 (reg2, operands[1]));
++  emit_insn (gen_fixuns_truncdfdi2_internal (operands[0], reg2));
++  DONE;
++   }
++})
++
++
++
++(define_expand "fix_trunctfdi2"
++  [(use (match_operand:DI 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_cvt (FIX, operands); DONE;")
++
++(define_expand "fixuns_trunctfdi2"
++  [(use (match_operand:DI 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_cvt (UNSIGNED_FIX, operands); DONE;")
++
++(define_insn "*floatdisf2"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
++  "(flag_sw_sdsame == 0 && flag_sw_cmov == 0) || TARGET_SW6B"
++  "fcvtl%,%/ %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*floatdisf2_same"
++  [(set (match_operand:SF 0 "register_operand" "=f,f")
++	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
++  "(flag_sw_sdsame == 1 && flag_sw_cmov == 0) || TARGET_SW6B"
++  "fcvtl%,%/ %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "floatdisf2"
++  [(set (match_operand:SF 0 "register_operand" "=f,f")
++	(float:SF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
++  "TARGET_FP"
++  ""
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn_and_split "*floatsisf2_ieee"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(float:SF (match_operand:SI 1 "memory_operand" "m")))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 3) (match_dup 1))
++   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
++   (set (match_dup 0) (float:SF (match_dup 2)))]
++  "operands[1] = adjust_address (operands[1], SFmode, 0);")
++
++;; mieee-opt
++(define_insn_and_split "*floatsisf2"
++  [(set (match_operand:SF 0 "register_operand" "=&f")
++	(float:SF (match_operand:SI 1 "memory_operand" "m")))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && flag_sw_cmov == 0 && ! TARGET_SW8A"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 3) (match_dup 1))
++   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
++   (set (match_dup 0) (float:SF (match_dup 2)))]
++{
++  operands[1] = adjust_address (operands[1], SFmode, 0);
++  //operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
++})
++
++(define_insn "*floatdidf2"
++  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
++	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
++  "flag_sw_sdsame == 0 "
++  "fcvtl%-%/ %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*floatdidf2_same"
++  [(set (match_operand:DF 0 "register_operand" "=f,f")
++	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
++  "flag_sw_sdsame == 1 "
++  "fcvtl%-%/ %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "floatdidf2"
++  [(set (match_operand:DF 0 "register_operand" "=f,f")
++	(float:DF (match_operand:DI 1 "reg_no_subreg_operand" "f,f")))]
++  "TARGET_FP "
++  ""
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn_and_split "*floatsidf2_ieee"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(float:DF (match_operand:SI 1 "memory_operand" "m")))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_cmov == 0"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 3) (match_dup 1))
++   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
++   (set (match_dup 0) (float:DF (match_dup 2)))]
++  "operands[1] = adjust_address (operands[1], SFmode, 0);")
++
++;; mieee-opt
++(define_insn_and_split "*floatsidf2"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(float:DF (match_operand:SI 1 "memory_operand" "m")))
++   (clobber (match_scratch:DI 2 "=&f"))
++   (clobber (match_scratch:SF 3 "=&f"))]
++  "TARGET_FP && flag_sw_cmov == 0 && ! TARGET_SW8A"
++  "#"
++  "&& reload_completed"
++  [(set (match_dup 3) (match_dup 1))
++   (set (match_dup 2) (unspec:DI [(match_dup 3)] UNSPEC_CVTLQ))
++   (set (match_dup 0) (float:DF (match_dup 2)))]
++{
++  operands[1] = adjust_address (operands[1], SFmode, 0);
++//  operands[2] = gen_rtx_REG (DImode, REGNO (operands[0]));
++ // operands[3] = gen_rtx_REG (SFmode, REGNO (operands[0]));
++})
++
++(define_expand "floatditf2"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:DI 1 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_cvt (FLOAT, operands); DONE;")
++
++(define_expand "floatunsditf2"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:DI 1 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_cvt (UNSIGNED_FLOAT, operands); DONE;")
++
++(define_expand "extendsfdf2"
++  [(set (match_operand:DF 0 "register_operand")
++	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand")))]
++  "TARGET_FP"
++{
++  if (sw_64_fptm >= SW_64_FPTM_SU)
++    operands[1] = force_reg (SFmode, operands[1]);
++})
++
++;; The Unicos/Mk assembler doesn't support cvtst, but we've already
++;; asserted that sw_64_fptm == SW_64_FPTM_N.
++
++(define_insn "*cmpsf_internal"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(match_operator:SF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:SF 2 "reg_or_0_operand" "fG,fG")
++			    (match_operand:SF 3 "reg_or_0_operand" "fG,fG")]))]
++  "TARGET_FP && flag_sw_sdsame == 0 && flag_sw_sf_cmpsel"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*cmpsf_internal_same"
++  [(set (match_operand:SF 0 "register_operand" "=f,f")
++	(match_operator:SF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:SF 2 "reg_or_0_operand" "fG,fG")
++			    (match_operand:SF 3 "reg_or_0_operand" "fG,fG")]))]
++  "TARGET_FP && flag_sw_sdsame == 1 && flag_sw_sf_cmpsel"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "*extendsfdf2_ieee"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
++  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fcvtsd %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")])
++(define_insn "*extendsfdf2_ieee_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(float_extend:DF (match_operand:SF 1 "register_operand" "f")))]
++  "TARGET_FP && sw_64_fptm >= SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fcvtsd %1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")])
++
++(define_insn "*extendsfdf2_internal_1"
++  [(set (match_operand:DF 0 "register_operand" "=&f,f,m")
++	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_nofcpys == 1"
++  "@
++   fcvtsd %1,%0
++   fld%,%U1 %0,%1
++   fst%-%U0 %1,%0"
++  [(set_attr "type" "fcpys,fld,fst")])
++
++(define_insn "*extendsfdf2_internal_2"
++  [(set (match_operand:DF 0 "register_operand" "=&f,f,m")
++	(float_extend:DF (match_operand:SF 1 "nonimmediate_operand" "f,m,f")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_nofcpys == 0"
++  "@
++   fcvtsd %1,%0 \;fcpys %0,%0,%0
++   fld%, %0,%1
++   fst%- %1,%0"
++  [(set_attr "type" "fcpys,fld,fst")])
++
++;; Use register_operand for operand 1 to prevent compress_float_constant
++;; from doing something silly.  When optimizing we'll put things back
++;; together anyway.
++(define_expand "extendsftf2"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:SF 1 "register_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++{
++  rtx tmp = gen_reg_rtx (DFmode);
++  emit_insn (gen_extendsfdf2 (tmp, operands[1]));
++  emit_insn (gen_extenddftf2 (operands[0], tmp));
++  DONE;
++})
++
++(define_expand "extenddftf2"
++  [(use (match_operand:TF 0 "register_operand"))
++   (use (match_operand:DF 1 "register_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_cvt (FLOAT_EXTEND, operands); DONE;")
++
++(define_insn "*truncdfsf2"
++  [(set (match_operand:SF 0 "register_operand" "=&f,&f")
++	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 0"
++  "fcvt%-%,%/ %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*truncdfsf2_same"
++  [(set (match_operand:SF 0 "register_operand" "=f,f")
++	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "flag_sw_sdsame == 1"
++  "fcvt%-%,%/ %R1,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_expand "truncdfsf2"
++  [(set (match_operand:SF 0 "register_operand" "=f,f")
++	(float_truncate:SF (match_operand:DF 1 "reg_or_0_operand" "fG,fG")))]
++  "TARGET_FP"
++  ""
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "round_suffix" "normal")
++   (set_attr "trap_suffix" "u_su_sui")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_expand "trunctfdf2"
++  [(use (match_operand:DF 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_xfloating_cvt (FLOAT_TRUNCATE, operands); DONE;")
++
++(define_expand "trunctfsf2"
++  [(use (match_operand:SF 0 "register_operand"))
++   (use (match_operand:TF 1 "general_operand"))]
++  "TARGET_FP && TARGET_HAS_XFLOATING_LIBS"
++{
++  rtx tmpf, sticky, arg, lo, hi;
++
++  tmpf = gen_reg_rtx (DFmode);
++  sticky = gen_reg_rtx (DImode);
++  arg = copy_to_mode_reg (TFmode, operands[1]);
++  lo = gen_lowpart (DImode, arg);
++  hi = gen_highpart (DImode, arg);
++
++  /* Convert the low word of the TFmode value into a sticky rounding bit,
++     then or it into the low bit of the high word.  This leaves the sticky
++     bit at bit 48 of the fraction, which is representable in DFmode,
++     which prevents rounding error in the final conversion to SFmode.  */
++
++  emit_insn (gen_rtx_SET (sticky, gen_rtx_NE (DImode, lo, const0_rtx)));
++  emit_insn (gen_iordi3 (hi, hi, sticky));
++  emit_insn (gen_trunctfdf2 (tmpf, arg));
++  emit_insn (gen_truncdfsf2 (operands[0], tmpf));
++  DONE;
++})
++
++;; Next are all the integer comparisons, and conditional moves and branches
++;; and some of the related define_expand's and define_split's.
++
++(define_insn "*setcc_internal"
++  [(set (match_operand 0 "register_operand" "=r")
++	(match_operator 1 "sw_64_comparison_operator"
++			   [(match_operand:DI 2 "register_operand" "r")
++			    (match_operand:DI 3 "reg_or_8bit_operand" "rI")]))]
++  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
++   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
++   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
++  "cmp%C1 %2,%3,%0"
++  [(set_attr "type" "icmp")])
++
++;; Yes, we can technically support reg_or_8bit_operand in operand 2,
++;; but that's non-canonical rtl and allowing that causes inefficiencies
++;; from cse on.
++(define_insn "*setcc_swapped_internal"
++  [(set (match_operand 0 "register_operand" "=r")
++	(match_operator 1 "sw_64_swapped_comparison_operator"
++			   [(match_operand:DI 2 "register_operand" "r")
++			    (match_operand:DI 3 "reg_or_0_operand" "rJ")]))]
++  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
++   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
++   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
++  "cmp%c1 %r3,%2,%0"
++  [(set_attr "type" "icmp")])
++
++;; Use match_operator rather than ne directly so that we can match
++;; multiple integer modes.
++(define_insn "*setne_internal"
++  [(set (match_operand 0 "register_operand" "=r")
++	(match_operator 1 "signed_comparison_operator"
++			  [(match_operand:DI 2 "register_operand" "r")
++			   (const_int 0)]))]
++  "GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT
++   && GET_MODE_SIZE (GET_MODE (operands[0])) <= 8
++   && GET_CODE (operands[1]) == NE
++   && GET_MODE (operands[0]) == GET_MODE (operands[1])"
++  "cmpult $31,%2,%0"
++  [(set_attr "type" "icmp")])
++
++;; The mode folding trick can't be used with const_int operands, since
++;; reload needs to know the proper mode.
++;;
++;; Use add_operand instead of the more seemingly natural reg_or_8bit_operand
++;; in order to create more pairs of constants.  As long as we're allowing
++;; two constants at the same time, and will have to reload one of them...
++
++(define_insn "*mov<mode>cc_internal"
++  [(set (match_operand:IMODE 0 "register_operand" "=r,r,r,r")
++	(if_then_else:IMODE
++	 (match_operator 2 "signed_comparison_operator"
++			 [(match_operand:DI 3 "reg_or_0_operand" "rJ,rJ,J,J")
++			  (match_operand:DI 4 "reg_or_0_operand" "J,J,rJ,rJ")])
++	 (match_operand:IMODE 1 "add_operand" "rI,0,rI,0")
++	 (match_operand:IMODE 5 "add_operand" "0,rI,0,rI")))]
++  "(operands[3] == const0_rtx) ^ (operands[4] == const0_rtx)"
++  "@
++   sel%C2 %r3,%1,%0,%0
++   sel%D2 %r3,%5,%0,%0
++   sel%c2 %r4,%1,%0,%0
++   sel%d2 %r4,%5,%0,%0"
++  [(set_attr "type" "icmov")])
++
++(define_insn "*mov<mode>cc_lbc"
++  [(set (match_operand:IMODE 0 "register_operand" "=r,r")
++	(if_then_else:IMODE
++	 (eq (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
++			      (const_int 1)
++			      (const_int 0))
++	     (const_int 0))
++	 (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0")
++	 (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))]
++  ""
++  "@
++   sellbc %r2,%1,%0,%0
++   sellbs %r2,%3,%0,%0"
++  [(set_attr "type" "icmov")])
++
++(define_insn "*mov<mode>cc_lbs"
++  [(set (match_operand:IMODE 0 "register_operand" "=r,r")
++	(if_then_else:IMODE
++	 (ne (zero_extract:DI (match_operand:DI 2 "reg_or_0_operand" "rJ,rJ")
++			      (const_int 1)
++			      (const_int 0))
++	     (const_int 0))
++	 (match_operand:IMODE 1 "reg_or_8bit_operand" "rI,0")
++	 (match_operand:IMODE 3 "reg_or_8bit_operand" "0,rI")))]
++  ""
++  "@
++   sellbs %r2,%1,%0,%0
++   sellbc %r2,%3,%0,%0"
++  [(set_attr "type" "icmov")])
++
++;; For ABS, we have two choices, depending on whether the input and output
++;; registers are the same or not.
++(define_expand "absdi2"
++  [(set (match_operand:DI 0 "register_operand")
++	(abs:DI (match_operand:DI 1 "register_operand")))]
++  ""
++{
++  if (rtx_equal_p (operands[0], operands[1]))
++    emit_insn (gen_absdi2_same (operands[0], gen_reg_rtx (DImode)));
++  else
++    emit_insn (gen_absdi2_diff (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "absdi2_same"
++  [(set (match_operand:DI 1 "register_operand")
++	(neg:DI (match_operand:DI 0 "register_operand")))
++   (set (match_dup 0)
++	(if_then_else:DI (ge (match_dup 0) (const_int 0))
++			 (match_dup 0)
++			 (match_dup 1)))])
++
++(define_expand "absdi2_diff"
++  [(set (match_operand:DI 0 "register_operand")
++	(neg:DI (match_operand:DI 1 "register_operand")))
++   (set (match_dup 0)
++	(if_then_else:DI (lt (match_dup 1) (const_int 0))
++			 (match_dup 0)
++			 (match_dup 1)))])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(abs:DI (match_dup 0)))
++   (clobber (match_operand:DI 1 "register_operand"))]
++  ""
++  [(set (match_dup 1) (neg:DI (match_dup 0)))
++   (set (match_dup 0) (if_then_else:DI (ge (match_dup 0) (const_int 0))
++				       (match_dup 0) (match_dup 1)))])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(abs:DI (match_operand:DI 1 "register_operand")))]
++  "! rtx_equal_p (operands[0], operands[1])"
++  [(set (match_dup 0) (neg:DI (match_dup 1)))
++   (set (match_dup 0) (if_then_else:DI (lt (match_dup 1) (const_int 0))
++				       (match_dup 0) (match_dup 1)))])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(neg:DI (abs:DI (match_dup 0))))
++   (clobber (match_operand:DI 1 "register_operand"))]
++  ""
++  [(set (match_dup 1) (neg:DI (match_dup 0)))
++   (set (match_dup 0) (if_then_else:DI (le (match_dup 0) (const_int 0))
++				       (match_dup 0) (match_dup 1)))])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(neg:DI (abs:DI (match_operand:DI 1 "register_operand"))))]
++  "! rtx_equal_p (operands[0], operands[1])"
++  [(set (match_dup 0) (neg:DI (match_dup 1)))
++   (set (match_dup 0) (if_then_else:DI (gt (match_dup 1) (const_int 0))
++				       (match_dup 0) (match_dup 1)))])
++
++(define_insn "<code><mode>3"
++  [(set (match_operand:I12MODE 0 "register_operand" "=r")
++	(any_maxmin:I12MODE
++	 (match_operand:I12MODE 1 "reg_or_0_operand" "%rJ")
++	 (match_operand:I12MODE 2 "reg_or_8bit_operand" "rI")))]
++  "TARGET_MAX"
++  "<maxmin><vecmodesuffix> %r1,%2,%0"
++  [(set_attr "type" "mvi")])
++
++(define_expand "smaxdi3"
++  [(set (match_dup 3)
++	(le:DI (match_operand:DI 1 "reg_or_0_operand")
++	       (match_operand:DI 2 "reg_or_8bit_operand")))
++   (set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI (eq (match_dup 3) (const_int 0))
++			 (match_dup 1) (match_dup 2)))]
++  ""
++  "operands[3] = gen_reg_rtx (DImode);")
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(smax:DI (match_operand:DI 1 "reg_or_0_operand")
++		 (match_operand:DI 2 "reg_or_8bit_operand")))
++   (clobber (match_operand:DI 3 "register_operand"))]
++  "operands[2] != const0_rtx"
++  [(set (match_dup 3) (le:DI (match_dup 1) (match_dup 2)))
++   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
++				       (match_dup 1) (match_dup 2)))])
++
++(define_insn "*smax_const0"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(smax:DI (match_operand:DI 1 "register_operand" "0")
++		 (const_int 0)))]
++  ""
++  "sellt %0,0,%0,%0"
++  [(set_attr "type" "icmov")])
++
++(define_expand "smindi3"
++  [(set (match_dup 3)
++	(lt:DI (match_operand:DI 1 "reg_or_0_operand")
++	       (match_operand:DI 2 "reg_or_8bit_operand")))
++   (set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI (ne (match_dup 3) (const_int 0))
++			 (match_dup 1) (match_dup 2)))]
++  ""
++  "operands[3] = gen_reg_rtx (DImode);")
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(smin:DI (match_operand:DI 1 "reg_or_0_operand")
++		 (match_operand:DI 2 "reg_or_8bit_operand")))
++   (clobber (match_operand:DI 3 "register_operand"))]
++  "operands[2] != const0_rtx"
++  [(set (match_dup 3) (lt:DI (match_dup 1) (match_dup 2)))
++   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
++				       (match_dup 1) (match_dup 2)))])
++
++(define_insn "*smin_const0"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(smin:DI (match_operand:DI 1 "register_operand" "0")
++		 (const_int 0)))]
++  ""
++  "selgt %0,0,%0,%0"
++  [(set_attr "type" "icmov")])
++
++(define_expand "umaxdi3"
++  [(set (match_dup 3)
++	(leu:DI (match_operand:DI 1 "reg_or_0_operand")
++		(match_operand:DI 2 "reg_or_8bit_operand")))
++   (set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI (eq (match_dup 3) (const_int 0))
++			 (match_dup 1) (match_dup 2)))]
++  ""
++  "operands[3] = gen_reg_rtx (DImode);")
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(umax:DI (match_operand:DI 1 "reg_or_0_operand")
++		 (match_operand:DI 2 "reg_or_8bit_operand")))
++   (clobber (match_operand:DI 3 "register_operand"))]
++  "operands[2] != const0_rtx"
++  [(set (match_dup 3) (leu:DI (match_dup 1) (match_dup 2)))
++   (set (match_dup 0) (if_then_else:DI (eq (match_dup 3) (const_int 0))
++				       (match_dup 1) (match_dup 2)))])
++
++(define_expand "umindi3"
++  [(set (match_dup 3)
++	(ltu:DI (match_operand:DI 1 "reg_or_0_operand")
++		(match_operand:DI 2 "reg_or_8bit_operand")))
++   (set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI (ne (match_dup 3) (const_int 0))
++			 (match_dup 1) (match_dup 2)))]
++  ""
++  "operands[3] = gen_reg_rtx (DImode);")
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(umin:DI (match_operand:DI 1 "reg_or_0_operand")
++		 (match_operand:DI 2 "reg_or_8bit_operand")))
++   (clobber (match_operand:DI 3 "register_operand"))]
++  "operands[2] != const0_rtx"
++  [(set (match_dup 3) (ltu:DI (match_dup 1) (match_dup 2)))
++   (set (match_dup 0) (if_then_else:DI (ne (match_dup 3) (const_int 0))
++				       (match_dup 1) (match_dup 2)))])
++
++(define_insn "*bcc_normal"
++  [(set (pc)
++	(if_then_else
++	 (match_operator 1 "signed_comparison_operator"
++			 [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++			  (const_int 0)])
++	 (label_ref (match_operand 0))
++	 (pc)))]
++  ""
++  "b%C1 %r2,%0"
++  [(set_attr "type" "ibr")])
++
++(define_insn_and_split "*branchcombine"
++  [(set (pc)
++	(if_then_else (match_operator 1 "sw_64_branch_combination"
++	   [(match_operand:DI 2 "register_operand")
++	    (match_operand:DI 3 "reg_or_8bit_operand")])
++	   (label_ref (match_operand 0))
++	   (pc)))]
++"flag_sw_branch_combination==1
++ && (can_create_pseudo_p ()) && operands[3]!=CONST0_RTX (DImode)"
++"#"
++"&& 1"
++  [(parallel
++    [(set (pc)
++	(if_then_else
++	  (match_op_dup 1
++	     [(match_dup 2)
++	      (match_dup 3)])
++	 (label_ref (match_dup 0))
++	 (pc)))
++    (clobber (match_dup 4))])]
++{
++  operands[4]=gen_reg_rtx (DImode);
++})
++
++(define_insn "bcc_ne"
++  [(parallel
++    [(set (pc)
++       (if_then_else
++	 (match_operator 1 "sw_64_comparison_operator"
++	    [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++	     (match_operand:DI 3 "reg_or_8bit_operand" "rI")])
++	 (label_ref (match_operand 0))
++	 (pc)))
++     (clobber (match_operand:DI 4 "register_operand" "=r"))])]
++  "flag_sw_branch_combination==1"
++  "cmp%C1 %r2,%3,%r4
++   bne %r4,%0"
++  [(set_attr "type" "ibr")])
++
++(define_insn "bcc_eq"
++  [(parallel
++    [(set (pc)
++       (if_then_else
++	 (match_operator 1 "sw_64_swapped_branch_combination"
++	    [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++	     (match_operand:DI 3 "reg_or_8bit_operand" "rI")])
++	 (label_ref (match_operand 0))
++	 (pc)))
++     (clobber (match_operand:DI 4 "register_operand" "=r"))])]
++  "flag_sw_branch_combination==1"
++  "cmp%D1 %r2,%3,%r4
++   beq %r4,%0"
++  [(set_attr "type" "ibr")])
++
++(define_insn "*bcc_reverse"
++  [(set (pc)
++	(if_then_else
++	 (match_operator 1 "signed_comparison_operator"
++			 [(match_operand:DI 2 "register_operand" "r")
++			  (const_int 0)])
++
++	 (pc)
++	 (label_ref (match_operand 0))))]
++  ""
++  "b%c1 %2,%0"
++  [(set_attr "type" "ibr")])
++
++(define_insn "*blbs_normal"
++  [(set (pc)
++	(if_then_else
++	 (ne (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++			      (const_int 1)
++			      (const_int 0))
++	     (const_int 0))
++	 (label_ref (match_operand 0))
++	 (pc)))]
++  ""
++  "blbs %r1,%0"
++  [(set_attr "type" "ibr")])
++
++(define_insn "*blbc_normal"
++  [(set (pc)
++	(if_then_else
++	 (eq (zero_extract:DI (match_operand:DI 1 "reg_or_0_operand" "rJ")
++			      (const_int 1)
++			      (const_int 0))
++	     (const_int 0))
++	 (label_ref (match_operand 0))
++	 (pc)))]
++  ""
++  "blbc %r1,%0"
++  [(set_attr "type" "ibr")])
++
++(define_split
++  [(parallel
++    [(set (pc)
++	  (if_then_else
++	   (match_operator 1 "comparison_operator"
++	     [(zero_extract:DI (match_operand:DI 2 "register_operand")
++			       (const_int 1)
++			       (match_operand:DI 3 "const_int_operand"))
++	      (const_int 0)])
++	   (label_ref (match_operand 0))
++	   (pc)))
++     (clobber (match_operand:DI 4 "register_operand"))])]
++  "INTVAL (operands[3]) != 0"
++  [(set (match_dup 4)
++	(lshiftrt:DI (match_dup 2) (match_dup 3)))
++   (set (pc)
++	(if_then_else (match_op_dup 1
++				    [(zero_extract:DI (match_dup 4)
++						      (const_int 1)
++						      (const_int 0))
++				     (const_int 0)])
++		      (label_ref (match_dup 0))
++		      (pc)))]
++)
++
++
++;; The following are the corresponding floating-point insns.  Recall
++;; we need to have variants that expand the arguments from SFmode
++;; to DFmode.
++
++(define_insn "*cmpdf_internal"
++  [(set (match_operand:DF 0 "register_operand" "=&f,&f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "reg_or_0_operand" "fG,fG")
++			    (match_operand:DF 3 "reg_or_0_operand" "fG,fG")]))]
++  "TARGET_FP && flag_sw_sdsame == 0"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++(define_insn "*cmpdf_internal_same"
++  [(set (match_operand:DF 0 "register_operand" "=f,f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "reg_or_0_operand" "fG,fG")
++			    (match_operand:DF 3 "reg_or_0_operand" "fG,fG")]))]
++  "TARGET_FP && flag_sw_sdsame == 1"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")
++   (set (attr "enabled")
++     (cond [(eq_attr "alternative" "0")
++	      (symbol_ref "sw_64_fptm < SW_64_FPTM_SU")
++	   ]
++	   (symbol_ref "true")))])
++
++(define_insn "*cmpdf_ext1"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(float_extend:DF
++			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
++			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")])
++(define_insn "*cmpdf_ext1_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(float_extend:DF
++			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
++			    (match_operand:DF 3 "reg_or_0_operand" "fG")]))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")])
++
++(define_insn "*cmpdf_ext2"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
++			    (float_extend:DF
++			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")])
++(define_insn "*cmpdf_ext2_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(match_operand:DF 2 "reg_or_0_operand" "fG")
++			    (float_extend:DF
++			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")])
++
++(define_insn "*cmpdf_ext3"
++  [(set (match_operand:DF 0 "register_operand" "=&f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(float_extend:DF
++			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
++			    (float_extend:DF
++			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 0"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")])
++(define_insn "*cmpdf_ext3_same"
++  [(set (match_operand:DF 0 "register_operand" "=f")
++	(match_operator:DF 1 "sw_64_fp_comparison_operator"
++			   [(float_extend:DF
++			     (match_operand:SF 2 "reg_or_0_operand" "fG"))
++			    (float_extend:DF
++			     (match_operand:SF 3 "reg_or_0_operand" "fG"))]))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU && flag_sw_sdsame == 1"
++  "fcmp%C1%/ %R2,%R3,%0"
++  [(set_attr "type" "fadd")
++   (set_attr "trap" "yes")
++   (set_attr "trap_suffix" "su")])
++
++(define_insn "*mov<mode>cc_internal"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++	(if_then_else:FMODE
++	 (match_operator 3 "signed_comparison_operator"
++			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
++			  (match_operand:DF 2 "const0_operand" "G,G")])
++	 (match_operand:FMODE 1 "reg_or_0_operand" "fG,0")
++	 (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))]
++  "TARGET_FP"
++  "@
++   fsel%C3 %R4,%R1,%0,%0
++   fsel%D3 %R4,%R5,%0,%0"
++  [(set_attr "type" "fcmov")])
++
++(define_insn "*movdfcc_ext1"
++  [(set (match_operand:DF 0 "register_operand" "=f,f")
++	(if_then_else:DF
++	 (match_operator 3 "signed_comparison_operator"
++			 [(match_operand:DF 4 "reg_or_0_operand" "fG,fG")
++			  (match_operand:DF 2 "const0_operand" "G,G")])
++	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
++	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++  "@
++   fsel%C3 %R4,%R1,%0,%0
++   fsel%D3 %R4,%R5,%0,%0"
++  [(set_attr "type" "fcmov")])
++
++(define_insn "*movdfcc_ext2"
++  [(set (match_operand:DF 0 "register_operand" "=f,f")
++	(if_then_else:DF
++	 (match_operator 3 "signed_comparison_operator"
++			 [(float_extend:DF
++			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
++			  (match_operand:DF 2 "const0_operand" "G,G")])
++	 (match_operand:DF 1 "reg_or_0_operand" "fG,0")
++	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++  "@
++   fsel%C3 %R4,%R1,%0,%0
++   fsel%D3 %R4,%R5,%0,%0"
++  [(set_attr "type" "fcmov")])
++
++(define_insn "*movdfcc_ext3"
++  [(set (match_operand:SF 0 "register_operand" "=f,f")
++	(if_then_else:SF
++	 (match_operator 3 "signed_comparison_operator"
++			 [(float_extend:DF
++			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
++			  (match_operand:DF 2 "const0_operand" "G,G")])
++	 (match_operand:SF 1 "reg_or_0_operand" "fG,0")
++	 (match_operand:SF 5 "reg_or_0_operand" "0,fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++  "@
++   fsel%C3 %R4,%R1,%0,%0
++   fsel%D3 %R4,%R5,%0,%0"
++  [(set_attr "type" "fcmov")])
++
++(define_insn "*movdfcc_ext4"
++  [(set (match_operand:DF 0 "register_operand" "=f,f")
++	(if_then_else:DF
++	 (match_operator 3 "signed_comparison_operator"
++			 [(float_extend:DF
++			   (match_operand:SF 4 "reg_or_0_operand" "fG,fG"))
++			  (match_operand:DF 2 "const0_operand" "G,G")])
++	 (float_extend:DF (match_operand:SF 1 "reg_or_0_operand" "fG,0"))
++	 (match_operand:DF 5 "reg_or_0_operand" "0,fG")))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++  "@
++   fsel%C3 %R4,%R1,%0,%0
++   fsel%D3 %R4,%R5,%0,%0"
++  [(set_attr "type" "fcmov")])
++
++(define_expand "smaxdf3"
++  [(set (match_dup 3)
++	(le:DF (match_operand:DF 1 "reg_or_0_operand")
++	       (match_operand:DF 2 "reg_or_0_operand")))
++   (set (match_operand:DF 0 "register_operand")
++	(if_then_else:DF (eq (match_dup 3) (match_dup 4))
++			 (match_dup 1) (match_dup 2)))]
++  "TARGET_FP"
++{
++  operands[3] = gen_reg_rtx (DFmode);
++  operands[4] = CONST0_RTX (DFmode);
++})
++
++(define_expand "smindf3"
++  [(set (match_dup 3)
++	(lt:DF (match_operand:DF 1 "reg_or_0_operand")
++	       (match_operand:DF 2 "reg_or_0_operand")))
++   (set (match_operand:DF 0 "register_operand")
++	(if_then_else:DF (ne (match_dup 3) (match_dup 4))
++			 (match_dup 1) (match_dup 2)))]
++  "TARGET_FP"
++{
++  operands[3] = gen_reg_rtx (DFmode);
++  operands[4] = CONST0_RTX (DFmode);
++})
++
++(define_expand "smaxsf3"
++  [(set (match_dup 3)
++	(le:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))
++	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand"))))
++   (set (match_operand:SF 0 "register_operand")
++	(if_then_else:SF (eq (match_dup 3) (match_dup 4))
++			 (match_dup 1) (match_dup 2)))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++{
++  operands[3] = gen_reg_rtx (DFmode);
++  operands[4] = CONST0_RTX (DFmode);
++})
++
++(define_expand "sminsf3"
++  [(set (match_dup 3)
++	(lt:DF (float_extend:DF (match_operand:SF 1 "reg_or_0_operand"))
++	       (float_extend:DF (match_operand:SF 2 "reg_or_0_operand"))))
++   (set (match_operand:SF 0 "register_operand")
++	(if_then_else:SF (ne (match_dup 3) (match_dup 4))
++		      (match_dup 1) (match_dup 2)))]
++  "TARGET_FP && sw_64_fptm < SW_64_FPTM_SU"
++{
++  operands[3] = gen_reg_rtx (DFmode);
++  operands[4] = CONST0_RTX (DFmode);
++})
++
++(define_insn "*fbcc_normal"
++  [(set (pc)
++	(if_then_else
++	 (match_operator 1 "signed_comparison_operator"
++			 [(match_operand:DF 2 "reg_or_0_operand" "fG")
++			  (match_operand:DF 3 "const0_operand" "G")])
++	 (label_ref (match_operand 0))
++	 (pc)))]
++  "TARGET_FP"
++  "fb%C1 %R2,%0"
++  [(set_attr "type" "fbr")])
++
++(define_insn "*fbcc_ext_normal"
++  [(set (pc)
++	(if_then_else
++	 (match_operator 1 "signed_comparison_operator"
++			 [(float_extend:DF
++			   (match_operand:SF 2 "reg_or_0_operand" "fG"))
++			  (match_operand:DF 3 "const0_operand" "G")])
++	 (label_ref (match_operand 0))
++	 (pc)))]
++  "TARGET_FP"
++  "fb%C1 %R2,%0"
++  [(set_attr "type" "fbr")])
++
++;; These are the main define_expand's used to make conditional branches
++;; and compares.
++
++(define_expand "cbranchsf4"
++  [(use (match_operator 0 "sw_64_cbranch_operator"
++	 [(match_operand:SF 1 "reg_or_0_operand")
++	  (match_operand:SF 2 "reg_or_0_operand")]))
++   (use (match_operand 3))]
++  "TARGET_FP && flag_sw_sf_cmpsel"
++  "sw_64_emit_conditional_branch (operands, SFmode); DONE;")
++
++(define_insn "*sfbcc_normal"
++  [(set (pc)
++    (if_then_else
++     (match_operator 1 "signed_comparison_operator"
++	     [(match_operand:SF 2 "reg_or_0_operand" "fG")
++	      (match_operand:SF 3 "const0_operand" "G")])
++     (label_ref (match_operand 0))
++     (pc)))]
++  "TARGET_FP && flag_sw_sf_cmpsel"
++  "fb%C1 %R2,%0"
++  [(set_attr "type" "fbr")])
++
++(define_insn "*mov<mode>sfcc_internal"
++  [(set (match_operand:FMODE 0 "register_operand" "=f,f")
++    (if_then_else:FMODE
++     (match_operator 3 "signed_comparison_operator"
++	     [(match_operand:SF 4 "reg_or_0_operand" "fG,fG")
++	      (match_operand:SF 2 "const0_operand" "G,G")])
++     (match_operand:FMODE 1 "reg_or_0_operand" "fG,0")
++     (match_operand:FMODE 5 "reg_or_0_operand" "0,fG")))]
++  "TARGET_FP && flag_sw_sf_cmpsel"
++  "@
++   fsel%C3 %R4,%R1,%0,%0
++   fsel%D3 %R4,%R5,%0,%0"
++  [(set_attr "type" "fcmov")])
++
++(define_expand "cbranchdf4"
++  [(use (match_operator 0 "sw_64_cbranch_operator"
++	 [(match_operand:DF 1 "reg_or_0_operand")
++	  (match_operand:DF 2 "reg_or_0_operand")]))
++   (use (match_operand 3))]
++  "TARGET_FP"
++  "sw_64_emit_conditional_branch (operands, DFmode); DONE;")
++
++(define_expand "cbranchtf4"
++  [(use (match_operator 0 "sw_64_cbranch_operator"
++	 [(match_operand:TF 1 "general_operand")
++	  (match_operand:TF 2 "general_operand")]))
++   (use (match_operand 3))]
++  "TARGET_HAS_XFLOATING_LIBS"
++  "sw_64_emit_conditional_branch (operands, TFmode); DONE;")
++
++(define_expand "cbranchdi4"
++  [(use (match_operator 0 "sw_64_cbranch_operator"
++	 [(match_operand:DI 1 "general_operand")
++	  (match_operand:DI 2 "general_operand")]))
++   (use (match_operand 3))]
++  ""
++  "sw_64_emit_conditional_branch (operands, DImode); DONE;")
++
++(define_expand "cstoredf4"
++  [(use (match_operator:DI 1 "sw_64_cbranch_operator"
++	 [(match_operand:DF 2 "reg_or_0_operand")
++	  (match_operand:DF 3 "reg_or_0_operand")]))
++   (clobber (match_operand:DI 0 "register_operand"))]
++  "TARGET_FP"
++{
++  if (sw_64_emit_setcc (operands, DFmode))
++    DONE;
++  else
++    FAIL;
++})
++
++(define_expand "cstoretf4"
++  [(use (match_operator:DI 1 "sw_64_cbranch_operator"
++	 [(match_operand:TF 2 "general_operand")
++	  (match_operand:TF 3 "general_operand")]))
++   (clobber (match_operand:DI 0 "register_operand"))]
++  "TARGET_HAS_XFLOATING_LIBS"
++{
++  if (sw_64_emit_setcc (operands, TFmode))
++    DONE;
++  else
++    FAIL;
++})
++
++(define_expand "cstoredi4"
++  [(use (match_operator:DI 1 "sw_64_cbranch_operator"
++	 [(match_operand:DI 2 "general_operand")
++	  (match_operand:DI 3 "general_operand")]))
++   (clobber (match_operand:DI 0 "register_operand"))]
++  ""
++{
++  if (sw_64_emit_setcc (operands, DImode))
++    DONE;
++  else
++    FAIL;
++})
++
++;; These are the main define_expand's used to make conditional moves.
++
++(define_expand "mov<mode>cc"
++  [(set (match_operand:I48MODE 0 "register_operand")
++	(if_then_else:I48MODE
++	  (match_operand 1 "comparison_operator")
++	  (match_operand:I48MODE 2 "reg_or_8bit_operand")
++	  (match_operand:I48MODE 3 "reg_or_8bit_operand")))]
++  ""
++{
++  operands[1] = sw_64_emit_conditional_move (operands[1], <MODE>mode);
++  if (operands[1] == 0)
++    FAIL;
++})
++
++(define_expand "mov<mode>cc"
++  [(set (match_operand:FMODE 0 "register_operand")
++	(if_then_else:FMODE
++	  (match_operand 1 "comparison_operator")
++	  (match_operand:FMODE 2 "reg_or_8bit_operand")
++	  (match_operand:FMODE 3 "reg_or_8bit_operand")))]
++  ""
++{
++  operands[1] = sw_64_emit_conditional_move (operands[1], <MODE>mode);
++  if (operands[1] == 0)
++    FAIL;
++})
++
++;; These define_split definitions are used in cases when comparisons have
++;; not be stated in the correct way and we need to reverse the second
++;; comparison.  For example, x >= 7 has to be done as x < 6 with the
++;; comparison that tests the result being reversed.  We have one define_split
++;; for each use of a comparison.  They do not match valid insns and need
++;; not generate valid insns.
++;;
++;; We can also handle equality comparisons (and inequality comparisons in
++;; cases where the resulting add cannot overflow) by doing an add followed by
++;; a comparison with zero.  This is faster since the addition takes one
++;; less cycle than a compare when feeding into a conditional move.
++;; For this case, we also have an SImode pattern since we can merge the add
++;; and sign extend and the order doesn't matter.
++;;
++;; We do not do this for floating-point, since it isn't clear how the "wrong"
++;; operation could have been generated.
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI
++	 (match_operator 1 "comparison_operator"
++			 [(match_operand:DI 2 "reg_or_0_operand")
++			  (match_operand:DI 3 "reg_or_cint_operand")])
++	 (match_operand:DI 4 "reg_or_cint_operand")
++	 (match_operand:DI 5 "reg_or_cint_operand")))
++   (clobber (match_operand:DI 6 "register_operand"))]
++  "operands[3] != const0_rtx"
++  [(set (match_dup 6) (match_dup 7))
++   (set (match_dup 0)
++	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
++{
++  enum rtx_code code = GET_CODE (operands[1]);
++  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
++
++  /* If we are comparing for equality with a constant and that constant
++     appears in the arm when the register equals the constant, use the
++     register since that is more likely to match (and to produce better code
++     if both would).  */
++
++  if (code == EQ && CONST_INT_P (operands[3])
++      && rtx_equal_p (operands[4], operands[3]))
++    operands[4] = operands[2];
++
++  else if (code == NE && CONST_INT_P (operands[3])
++	   && rtx_equal_p (operands[5], operands[3]))
++    operands[5] = operands[2];
++
++  if (code == NE || code == EQ
++      || (extended_count (operands[2], DImode, unsignedp) >= 1
++	  && extended_count (operands[3], DImode, unsignedp) >= 1))
++    {
++      if (CONST_INT_P (operands[3]))
++	operands[7] = gen_rtx_PLUS (DImode, operands[2],
++				    GEN_INT (- INTVAL (operands[3])));
++      else
++	operands[7] = gen_rtx_MINUS (DImode, operands[2], operands[3]);
++
++      operands[8] = gen_rtx_fmt_ee (code, VOIDmode, operands[6], const0_rtx);
++    }
++
++  else if (code == EQ || code == LE || code == LT
++	   || code == LEU || code == LTU)
++    {
++      operands[7] = gen_rtx_fmt_ee (code, DImode, operands[2], operands[3]);
++      operands[8] = gen_rtx_NE (VOIDmode, operands[6], const0_rtx);
++    }
++  else
++    {
++      operands[7] = gen_rtx_fmt_ee (reverse_condition (code), DImode,
++				    operands[2], operands[3]);
++      operands[8] = gen_rtx_EQ (VOIDmode, operands[6], const0_rtx);
++    }
++})
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(if_then_else:DI
++	 (match_operator 1 "comparison_operator"
++			 [(match_operand:SI 2 "reg_or_0_operand")
++			  (match_operand:SI 3 "reg_or_cint_operand")])
++	 (match_operand:DI 4 "reg_or_8bit_operand")
++	 (match_operand:DI 5 "reg_or_8bit_operand")))
++   (clobber (match_operand:DI 6 "register_operand"))]
++  "operands[3] != const0_rtx
++   && (GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE)"
++  [(set (match_dup 6) (match_dup 7))
++   (set (match_dup 0)
++	(if_then_else:DI (match_dup 8) (match_dup 4) (match_dup 5)))]
++{
++  enum rtx_code code = GET_CODE (operands[1]);
++  int unsignedp = (code == GEU || code == LEU || code == GTU || code == LTU);
++  rtx tem;
++
++  if ((code != NE && code != EQ
++       && ! (extended_count (operands[2], DImode, unsignedp) >= 1
++	     && extended_count (operands[3], DImode, unsignedp) >= 1)))
++    FAIL;
++
++  if (CONST_INT_P (operands[3]))
++    tem = gen_rtx_PLUS (SImode, operands[2],
++			GEN_INT (- INTVAL (operands[3])));
++  else
++    tem = gen_rtx_MINUS (SImode, operands[2], operands[3]);
++
++  operands[7] = gen_rtx_SIGN_EXTEND (DImode, tem);
++  operands[8] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
++				operands[6], const0_rtx);
++})
++
++;; Prefer to use cmp and arithmetic when possible instead of a cmove.
++
++(define_split
++  [(set (match_operand 0 "register_operand")
++	(if_then_else (match_operator 1 "signed_comparison_operator"
++			   [(match_operand:DI 2 "reg_or_0_operand")
++			    (const_int 0)])
++	  (match_operand 3 "const_int_operand")
++	  (match_operand 4 "const_int_operand")))]
++  ""
++  [(const_int 0)]
++{
++  if (sw_64_split_conditional_move (GET_CODE (operands[1]), operands[0],
++				    operands[2], operands[3], operands[4]))
++    DONE;
++  else
++    FAIL;
++})
++
++;; ??? Why combine is allowed to create such non-canonical rtl, I don't know.
++;; Oh well, we match it in movcc, so it must be partially our fault.
++(define_split
++  [(set (match_operand 0 "register_operand")
++	(if_then_else (match_operator 1 "signed_comparison_operator"
++			   [(const_int 0)
++			    (match_operand:DI 2 "reg_or_0_operand")])
++	  (match_operand 3 "const_int_operand")
++	  (match_operand 4 "const_int_operand")))]
++  ""
++  [(const_int 0)]
++{
++  if (sw_64_split_conditional_move (swap_condition (GET_CODE (operands[1])),
++				    operands[0], operands[2], operands[3],
++				    operands[4]))
++    DONE;
++  else
++    FAIL;
++})
++
++(define_insn_and_split "*cmp_sadd_di"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(plus:DI (if_then_else:DI
++		   (match_operator 1 "sw_64_zero_comparison_operator"
++		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++		      (const_int 0)])
++		   (match_operand:DI 3 "const48_operand" "I")
++		   (const_int 0))
++		 (match_operand:DI 4 "sext_add_operand" "rIO")))
++   (clobber (match_scratch:DI 5 "=r"))]
++  ""
++  "#"
++  ""
++  [(set (match_dup 5)
++	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
++   (set (match_dup 0)
++	(plus:DI (mult:DI (match_dup 5) (match_dup 3))
++		 (match_dup 4)))]
++{
++  if (can_create_pseudo_p ())
++    operands[5] = gen_reg_rtx (DImode);
++  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
++    operands[5] = operands[0];
++})
++
++(define_insn_and_split "*cmp_sadd_si"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(plus:SI (if_then_else:SI
++		   (match_operator 1 "sw_64_zero_comparison_operator"
++		     [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++		      (const_int 0)])
++		   (match_operand:SI 3 "const48_operand" "I")
++		   (const_int 0))
++		 (match_operand:SI 4 "sext_add_operand" "rIO")))
++   (clobber (match_scratch:DI 5 "=r"))]
++  ""
++  "#"
++  ""
++  [(set (match_dup 5)
++	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
++   (set (match_dup 0)
++	(plus:SI (mult:SI (match_dup 6) (match_dup 3))
++		 (match_dup 4)))]
++{
++  if (can_create_pseudo_p ())
++    operands[5] = gen_reg_rtx (DImode);
++  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
++    operands[5] = gen_lowpart (DImode, operands[0]);
++
++  operands[6] = gen_lowpart (SImode, operands[5]);
++})
++
++(define_insn_and_split "*cmp_sadd_sidi"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	  (plus:SI (if_then_else:SI
++		     (match_operator 1 "sw_64_zero_comparison_operator"
++		       [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++			(const_int 0)])
++		     (match_operand:SI 3 "const48_operand" "I")
++		     (const_int 0))
++		   (match_operand:SI 4 "sext_add_operand" "rIO"))))
++   (clobber (match_scratch:DI 5 "=r"))]
++  ""
++  "#"
++  ""
++  [(set (match_dup 5)
++	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
++   (set (match_dup 0)
++	(sign_extend:DI (plus:SI (mult:SI (match_dup 6) (match_dup 3))
++				 (match_dup 4))))]
++{
++  if (can_create_pseudo_p ())
++    operands[5] = gen_reg_rtx (DImode);
++  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
++    operands[5] = operands[0];
++
++  operands[6] = gen_lowpart (SImode, operands[5]);
++})
++
++(define_insn_and_split "*cmp_ssub_di"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(minus:DI (if_then_else:DI
++		    (match_operator 1 "sw_64_zero_comparison_operator"
++		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++		       (const_int 0)])
++		    (match_operand:DI 3 "const48_operand" "I")
++		    (const_int 0))
++		  (match_operand:DI 4 "reg_or_8bit_operand" "rI")))
++   (clobber (match_scratch:DI 5 "=r"))]
++  ""
++  "#"
++  ""
++  [(set (match_dup 5)
++	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
++   (set (match_dup 0)
++	(minus:DI (mult:DI (match_dup 5) (match_dup 3))
++		  (match_dup 4)))]
++{
++  if (can_create_pseudo_p ())
++    operands[5] = gen_reg_rtx (DImode);
++  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
++    operands[5] = operands[0];
++})
++
++(define_insn_and_split "*cmp_ssub_si"
++  [(set (match_operand:SI 0 "register_operand" "=r")
++	(minus:SI (if_then_else:SI
++		    (match_operator 1 "sw_64_zero_comparison_operator"
++		      [(match_operand:DI 2 "reg_or_0_operand" "rJ")
++		       (const_int 0)])
++		    (match_operand:SI 3 "const48_operand" "I")
++		    (const_int 0))
++		  (match_operand:SI 4 "reg_or_8bit_operand" "rI")))
++   (clobber (match_scratch:DI 5 "=r"))]
++  ""
++  "#"
++  ""
++  [(set (match_dup 5)
++	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
++   (set (match_dup 0)
++	(minus:SI (mult:SI (match_dup 6) (match_dup 3))
++		 (match_dup 4)))]
++{
++  if (can_create_pseudo_p ())
++    operands[5] = gen_reg_rtx (DImode);
++  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
++    operands[5] = gen_lowpart (DImode, operands[0]);
++
++  operands[6] = gen_lowpart (SImode, operands[5]);
++})
++
++(define_insn_and_split "*cmp_ssub_sidi"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(sign_extend:DI
++	  (minus:SI (if_then_else:SI
++		      (match_operator 1 "sw_64_zero_comparison_operator"
++			[(match_operand:DI 2 "reg_or_0_operand" "rJ")
++			 (const_int 0)])
++		      (match_operand:SI 3 "const48_operand" "I")
++		      (const_int 0))
++		    (match_operand:SI 4 "reg_or_8bit_operand" "rI"))))
++   (clobber (match_scratch:DI 5 "=r"))]
++  ""
++  "#"
++  ""
++  [(set (match_dup 5)
++	(match_op_dup:DI 1 [(match_dup 2) (const_int 0)]))
++   (set (match_dup 0)
++	(sign_extend:DI (minus:SI (mult:SI (match_dup 6) (match_dup 3))
++				  (match_dup 4))))]
++{
++  if (can_create_pseudo_p ())
++    operands[5] = gen_reg_rtx (DImode);
++  else if (reg_overlap_mentioned_p (operands[5], operands[4]))
++    operands[5] = operands[0];
++
++  operands[6] = gen_lowpart (SImode, operands[5]);
++})
++
++;; Here are the CALL and unconditional branch insns.  Calls on NT and SYSV
++;; work differently, so we have different patterns for each.
++
++(define_expand "call"
++  [(use (match_operand:DI 0))
++   (use (match_operand 1))
++   (use (match_operand 2))
++   (use (match_operand 3))]
++  ""
++{
++    emit_call_insn (gen_call_osf (operands[0], operands[1]));
++  DONE;
++})
++
++(define_expand "sibcall"
++  [(parallel [(call (mem:DI (match_operand 0))
++			    (match_operand 1))
++	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
++  ""
++{
++  gcc_assert (MEM_P (operands[0]));
++  operands[0] = XEXP (operands[0], 0);
++})
++
++(define_expand "call_osf"
++  [(parallel [(call (mem:DI (match_operand 0))
++		    (match_operand 1))
++	      (use (reg:DI 29))
++	      (clobber (reg:DI 26))])]
++  ""
++{
++  gcc_assert (MEM_P (operands[0]));
++
++  operands[0] = XEXP (operands[0], 0);
++  if (! call_operand (operands[0], Pmode))
++    operands[0] = copy_to_mode_reg (Pmode, operands[0]);
++})
++
++
++(define_expand "call_value"
++  [(use (match_operand 0))
++   (use (match_operand:DI 1))
++   (use (match_operand 2))
++   (use (match_operand 3))
++   (use (match_operand 4))]
++  ""
++{
++    emit_call_insn (gen_call_value_osf (operands[0], operands[1],
++					operands[2]));
++  DONE;
++})
++
++(define_expand "sibcall_value"
++  [(parallel [(set (match_operand 0)
++		   (call (mem:DI (match_operand 1))
++			 (match_operand 2)))
++	      (unspec [(reg:DI 29)] UNSPEC_SIBCALL)])]
++  ""
++{
++  gcc_assert (MEM_P (operands[1]));
++  operands[1] = XEXP (operands[1], 0);
++})
++
++(define_expand "call_value_osf"
++  [(parallel [(set (match_operand 0)
++		   (call (mem:DI (match_operand 1))
++			 (match_operand 2)))
++	      (use (reg:DI 29))
++	      (clobber (reg:DI 26))])]
++  ""
++{
++  gcc_assert (MEM_P (operands[1]));
++
++  operands[1] = XEXP (operands[1], 0);
++  if (! call_operand (operands[1], Pmode))
++    operands[1] = copy_to_mode_reg (Pmode, operands[1]);
++})
++
++(define_insn "*call_osf_1_er_noreturn"
++  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS
++   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
++  "@
++   call $26,($27),0
++   bsr $26,%0\t\t!samegp
++   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,*,8")])
++
++(define_insn "*call_osf_1_er_setfpec0"
++  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3"
++  "@
++   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%0\t\t!samegp
++   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*call_osf_1_er_setfpec1"
++  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1 "
++  "@
++   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%0\t\t!samegp
++   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*call_osf_1_er"
++  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS"
++  "@
++   call $26,(%0),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%0\t\t!samegp
++   ldl $27,%0($29)\t\t!literal!%#\;call $26,($27),%0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++;; We must use peep2 instead of a split because we need accurate life
++;; information for $gp.  Consider the case of { bar (); while (1); }.
++;;(define_peephole2
++;;  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand"))
++;;		    (match_operand 1))
++;;	      (use (reg:DI 29))
++;;	      (clobber (reg:DI 26))])]
++;;  "TARGET_EXPLICIT_RELOCS && reload_completed
++;;   && ! samegp_function_operand (operands[0], Pmode)
++;;   && (peep2_regno_dead_p (1, 29)
++;;       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
++;;  [(parallel [(call (mem:DI (match_dup 2))
++;;		    (match_dup 1))
++;;	      (use (reg:DI 29))
++;;	      (use (match_dup 0))
++;;	      (use (match_dup 3))
++;;	      (clobber (reg:DI 26))])]
++;;{
++;;  if (CONSTANT_P (operands[0]))
++;;    {
++;;      operands[2] = gen_rtx_REG (Pmode, 27);
++;;      operands[3] = GEN_INT (sw_64_next_sequence_number++);
++;;      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
++;;				      operands[0], operands[3]));
++;;    }
++;;  else
++;;    {
++;;      operands[2] = operands[0];
++;;      operands[0] = const0_rtx;
++;;      operands[3] = const0_rtx;
++;;    }
++;;})
++
++;;(define_peephole2
++;;  [(parallel [(call (mem:DI (match_operand:DI 0 "call_operand"))
++;;		    (match_operand 1))
++;;	      (use (reg:DI 29))
++;;	      (clobber (reg:DI 26))])]
++;;  "TARGET_EXPLICIT_RELOCS && reload_completed
++;;   && ! samegp_function_operand (operands[0], Pmode)
++;;   && ! (peep2_regno_dead_p (1, 29)
++;;	 || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
++;;  [(parallel [(call (mem:DI (match_dup 2))
++;;		    (match_dup 1))
++;;	      (set (match_dup 5)
++;;		   (unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP1))
++;;	      (use (match_dup 0))
++;;	      (use (match_dup 4))
++;;	      (clobber (reg:DI 26))])
++;;   (set (match_dup 5)
++;;	(unspec:DI [(match_dup 5) (match_dup 3)] UNSPEC_LDGP2))]
++;;{
++;;  if (CONSTANT_P (operands[0]))
++;;    {
++;;      operands[2] = gen_rtx_REG (Pmode, 27);
++;;      operands[4] = GEN_INT (sw_64_next_sequence_number++);
++;;      emit_insn (gen_movdi_er_high_g (operands[2], pic_offset_table_rtx,
++;;				      operands[0], operands[4]));
++;;    }
++;;  else
++;;    {
++;;      operands[2] = operands[0];
++;;      operands[0] = const0_rtx;
++;;      operands[4] = const0_rtx;
++;;    }
++;;  operands[3] = GEN_INT (sw_64_next_sequence_number++);
++;;  operands[5] = pic_offset_table_rtx;
++;;})
++
++
++(define_insn "*call_osf_2_er_nogp"
++  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (use (match_operand 2))
++   (use (match_operand 3 "const_int_operand"))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS"
++  "call $26,(%0),%2%J3"
++  [(set_attr "type" "call")])
++
++
++(define_insn "*call_osf_2_er_setfpec0"
++  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
++	 (match_operand 1))
++   (set (reg:DI 29)
++	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
++		   UNSPEC_LDGP1))
++   (use (match_operand 2))
++   (use (match_operand 3 "const_int_operand"))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3 "
++  "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4"
++  [(set_attr "type" "call")
++   (set_attr "cannot_copy" "true")
++   (set_attr "length" "8")])
++
++(define_insn "*call_osf_2_er_setfpec1"
++  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
++	 (match_operand 1))
++   (set (reg:DI 29)
++	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
++		   UNSPEC_LDGP1))
++   (use (match_operand 2))
++   (use (match_operand 3 "const_int_operand"))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1 "
++  "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4"
++  [(set_attr "type" "call")
++   (set_attr "cannot_copy" "true")
++   (set_attr "length" "8")])
++
++(define_insn "*call_osf_2_er"
++  [(call (mem:DI (match_operand:DI 0 "register_operand" "c"))
++	 (match_operand 1))
++   (set (reg:DI 29)
++	(unspec:DI [(reg:DI 29) (match_operand 4 "const_int_operand")]
++		   UNSPEC_LDGP1))
++   (use (match_operand 2))
++   (use (match_operand 3 "const_int_operand"))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS"
++  "call $26,(%0),%2%J3\;ldih $29,0($26)\t\t!gpdisp!%4"
++  [(set_attr "type" "call")
++   (set_attr "cannot_copy" "true")
++   (set_attr "length" "8")])
++
++(define_insn "*call_osf_1_noreturn"
++  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "! TARGET_EXPLICIT_RELOCS
++   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
++  "@
++   call $26,($27),0
++   bsr $26,$%0..ng
++   call $26,%0"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,*,8")])
++
++(define_insn "*call_osf_1"
++  [(call (mem:DI (match_operand:DI 0 "call_operand" "c,R,s"))
++	 (match_operand 1))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "! TARGET_EXPLICIT_RELOCS"
++  "@
++   call $26,($27),0\;ldgp $29,0($26)
++   bsr $26,$%0..ng
++   call $26,%0\;ldgp $29,0($26)"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*sibcall_osf_1_er"
++  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
++	 (match_operand 1))
++   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
++  "TARGET_EXPLICIT_RELOCS"
++  "@
++   br $31,%0\t\t!samegp
++   ldl $27,%0($29)\t\t!literal!%#\;jmp $31,($27),%0\t\t!lituse_jsr!%#"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,8")])
++
++;; Note that the assembler expands "jmp foo" with $at, which
++;; doesn't do what we want.
++(define_insn "*sibcall_osf_1"
++  [(call (mem:DI (match_operand:DI 0 "symbolic_operand" "R,s"))
++	 (match_operand 1))
++   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
++  "! TARGET_EXPLICIT_RELOCS"
++  "@
++   br $31,$%0..ng
++   ldi $27,%0\;jmp $31,($27),%0"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,8")])
++
++;; Call subroutine returning any type.
++
++(define_expand "untyped_call"
++  [(parallel [(call (match_operand 0)
++		    (const_int 0))
++	      (match_operand 1)
++	      (match_operand 2)])]
++  ""
++{
++  int i;
++
++  emit_call_insn (gen_call (operands[0], const0_rtx, NULL, const0_rtx));
++
++  for (i = 0; i < XVECLEN (operands[2], 0); i++)
++    {
++      rtx set = XVECEXP (operands[2], 0, i);
++      emit_move_insn (SET_DEST (set), SET_SRC (set));
++    }
++
++  /* The optimizer does not know that the call sets the function value
++     registers we stored in the result block.  We avoid problems by
++     claiming that all hard registers are used and clobbered at this
++     point.  */
++  emit_insn (gen_blockage ());
++
++  DONE;
++})
++
++;; UNSPEC_VOLATILE is considered to use and clobber all hard registers and
++;; all of memory.  This blocks insns from being moved across this point.
++
++(define_insn "blockage"
++  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
++  ""
++  ""
++  [(set_attr "length" "0")
++   (set_attr "type" "none")])
++
++(define_insn "jump"
++  [(set (pc)
++	(label_ref (match_operand 0)))]
++  ""
++  "br $31,%l0"
++  [(set_attr "type" "ibr")])
++
++;; "ret $31,($26),1"
++(define_expand "return"
++  [(return)]
++  "direct_return ()")
++
++(define_insn "*return_internal"
++  [(return)]
++  "reload_completed"
++{
++    return "ret $31,($26),1";
++}
++
++  [(set_attr "type" "ibr")])
++
++(define_insn "indirect_jump"
++  [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
++  ""
++  "jmp $31,(%0),0"
++  [(set_attr "type" "ibr")])
++
++(define_expand "tablejump"
++  [(parallel [(set (pc)
++		   (match_operand 0 "register_operand"))
++	      (use (label_ref:DI (match_operand 1)))])]
++  ""
++{
++      rtx dest = gen_reg_rtx (DImode);
++      emit_insn (gen_extendsidi2 (dest, operands[0]));
++      emit_insn (gen_adddi3 (dest, pic_offset_table_rtx, dest));
++      operands[0] = dest;
++})
++
++(define_insn "*tablejump_internal"
++  [(set (pc)
++	(match_operand:DI 0 "register_operand" "r"))
++   (use (label_ref (match_operand 1)))]
++  ""
++  "jmp $31,(%0),0"
++  [(set_attr "type" "ibr")])
++
++;; call_pal->sys_call 0x86
++;; Cache flush.  Used by sw_64_trampoline_init.  0x86 is PAL_imb, but we don't
++;; want to have to include pal.h in our .s file.
++(define_insn "imb"
++  [(unspec_volatile [(const_int 0)] UNSPECV_IMB)]
++  ""
++  "sys_call 0x86"
++  [(set_attr "type" "callpal")])
++
++(define_expand "clear_cache"
++  [(match_operand:DI 0)		; region start
++   (match_operand:DI 1)]		; region end
++  ""
++{
++  emit_insn (gen_imb ());
++  DONE;
++})
++
++;; call_pal ->sys_call 0x80
++;; BUGCHK is documented common to SYSV PALcode.
++(define_insn "trap"
++  [(trap_if (const_int 1) (const_int 0))
++   (use (reg:DI 29))]
++  ""
++  "sys_call 0x80"
++  [(set_attr "type" "callpal")])
++
++;; For userland, we load the thread pointer from the TCB.
++;; For the kernel, we load the per-cpu private value.
++
++;; call_pal->sys_call xx
++(define_insn "get_thread_pointerdi"
++  [(set (match_operand:DI 0 "register_operand" "=v")
++	(unspec:DI [(const_int 0)] UNSPEC_TP))]
++  ""
++{
++  if (TARGET_TLS_KERNEL)
++    return "sys_call 0x32";
++  else if (flag_sw_rtid == 1)
++    return "rtid %0";
++  else
++    return "sys_call 0x9e";
++    ;;return "rtid %0";
++}
++  [(set_attr "type" "callpal")])
++
++;; For completeness, and possibly a __builtin function, here's how to
++;; set the thread pointer.  Since we don't describe enough of this
++;; quantity for CSE, we have to use a volatile unspec, and then there's
++;; not much point in creating an R16_REG register class.
++
++(define_expand "set_thread_pointerdi"
++  [(set (reg:DI 16) (match_operand:DI 0 "input_operand"))
++   (unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
++  "")
++
++;; call_pal->sys_call xx
++(define_insn "*set_tp"
++  [(unspec_volatile [(reg:DI 16)] UNSPECV_SET_TP)]
++  ""
++{
++  if (TARGET_TLS_KERNEL)
++    return "sys_call 0x31";
++  else
++    return "sys_call 0x9f";
++}
++  [(set_attr "type" "callpal")])
++
++
++;; Finally, we have the basic data motion insns.  The byte and word insns
++;; are done via define_expand.  Start with the floating-point insns, since
++;; they are simpler.
++
++(define_expand "movsf"
++  [(set (match_operand:SF 0 "nonimmediate_operand")
++	(match_operand:SF 1 "general_operand"))]
++  ""
++{
++  if (MEM_P (operands[0])
++      && ! reg_or_0_operand (operands[1], SFmode))
++    operands[1] = force_reg (SFmode, operands[1]);
++})
++
++(define_insn "*movsf"
++  [(set (match_operand:SF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
++	(match_operand:SF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
++  "register_operand (operands[0], SFmode)
++   || reg_or_0_operand (operands[1], SFmode)"
++  "@
++   fcpys %R1,%R1,%0
++   fld%,%U1 %0,%1
++   bis $31,%r1,%0
++   ldw %0,%1
++   fst%,%U0 %R1,%0
++   stw %r1,%0
++   ifmovs %1,%0
++   fimovs %1,%0"
++  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")
++   (set_attr "isa" "*,*,*,*,*,*,fix,fix")])
++
++(define_expand "movdf"
++  [(set (match_operand:DF 0 "nonimmediate_operand")
++	(match_operand:DF 1 "general_operand"))]
++  ""
++{
++  if (MEM_P (operands[0])
++      && ! reg_or_0_operand (operands[1], DFmode))
++    operands[1] = force_reg (DFmode, operands[1]);
++})
++(define_insn "*movdf"
++  [(set (match_operand:DF 0 "nonimmediate_operand" "=f,f,*r,*r,m,m,f,*r")
++	(match_operand:DF 1 "input_operand" "fG,m,*rG,m,fG,*r,*r,f"))]
++  "register_operand (operands[0], DFmode)
++   || reg_or_0_operand (operands[1], DFmode)"
++  "@
++   fcpys %R1,%R1,%0
++   fld%-%U1 %0,%1
++   bis $31,%r1,%0
++   ldl %0,%1
++   fst%-%U0 %R1,%0
++   stl %r1,%0
++   ifmovd %1,%0
++   fimovd %1,%0"
++  [(set_attr "type" "fcpys,fld,ilog,ild,fst,ist,itof,ftoi")
++   (set_attr "isa" "*,*,*,*,*,*,fix,fix")])
++
++;; Subregs suck for register allocation.  Pretend we can move TFmode
++;; data between general registers until after reload.
++;; ??? Is this still true now that we have the lower-subreg pass?
++
++(define_expand "movtf"
++  [(set (match_operand:TF 0 "nonimmediate_operand")
++	(match_operand:TF 1 "general_operand"))]
++  ""
++{
++  if (MEM_P (operands[0])
++      && ! reg_or_0_operand (operands[1], TFmode))
++    operands[1] = force_reg (TFmode, operands[1]);
++})
++
++(define_insn_and_split "*movtf_internal"
++  [(set (match_operand:TF 0 "nonimmediate_operand" "=r,m")
++	(match_operand:TF 1 "input_operand" "rmG,rG"))]
++  "register_operand (operands[0], TFmode)
++   || reg_or_0_operand (operands[1], TFmode)"
++  "#"
++  "reload_completed"
++  [(set (match_dup 0) (match_dup 2))
++   (set (match_dup 1) (match_dup 3))]
++  "sw_64_split_tmode_pair (operands, TFmode, true);")
++
++;; We do two major things here: handle mem->mem and construct long
++;; constants.
++
++(define_expand "movsi"
++  [(set (match_operand:SI 0 "nonimmediate_operand")
++	(match_operand:SI 1 "general_operand"))]
++  ""
++{
++  if (sw_64_expand_mov (SImode, operands))
++    DONE;
++})
++
++(define_insn "*movsi"
++  [(set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r,r,r,r,r,m,r")
++	(match_operand:SI 1 "input_operand" "rJ,K,L,T,s,n,m,rJ,s"))]
++  "register_operand (operands[0], SImode)
++   || reg_or_0_operand (operands[1], SImode)"
++  "@
++   bis $31,%r1,%0
++   ldi %0,%1($31)
++   ldih %0,%h1($31)
++   #
++   #
++   #
++   ldw%U1 %0,%1
++   stw%U0 %r1,%0
++   ldi %0,%1"
++  [(set_attr "type" "ilog,iadd,iadd,iadd,iadd,multi,ild,ist,ldsym")
++   (set_attr "isa" "*,*,*,*,*,*,*,*,vms")])
++
++;; Split a load of a large constant into the appropriate two-insn
++;; sequence.
++
++(define_split
++  [(set (match_operand:SI 0 "register_operand")
++	(match_operand:SI 1 "non_add_const_operand"))]
++  ""
++  [(const_int 0)]
++{
++  if (sw_64_split_const_mov (SImode, operands))
++    DONE;
++  else
++    FAIL;
++})
++
++(define_insn "*movdi_er_low_l"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(lo_sum:DI (match_operand:DI 1 "register_operand" "r")
++		   (match_operand:DI 2 "local_symbolic_operand")))]
++  "TARGET_EXPLICIT_RELOCS"
++{
++  if (true_regnum (operands[1]) == 29)
++    return "ldi %0,%2(%1)\t\t!gprel";
++  else
++    return "ldi %0,%2(%1)\t\t!gprellow";
++}
++  [(set_attr "usegp" "yes")])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(match_operand:DI 1 "small_symbolic_operand"))]
++  "TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(lo_sum:DI (match_dup 2) (match_dup 1)))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(match_operand:DI 1 "local_symbolic_operand"))]
++  "TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(plus:DI (match_dup 2) (high:DI (match_dup 1))))
++   (set (match_dup 0)
++	(lo_sum:DI (match_dup 0) (match_dup 1)))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_split
++  [(match_operand 0 "some_small_symbolic_operand")]
++  ""
++  [(match_dup 0)]
++  "operands[0] = split_small_symbolic_operand (operands[0]);")
++
++;; Accepts any symbolic, not just global, since function calls that
++;; don't go via bsr still use !literal in hopes of linker relaxation.
++(define_insn "movdi_er_high_g"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_LITERAL))]
++  "TARGET_EXPLICIT_RELOCS"
++{
++  if (INTVAL (operands[3]) == 0)
++    return "ldl %0,%2(%1)\t\t!literal";
++  else
++    return "ldl %0,%2(%1)\t\t!literal!%3";
++}
++  [(set_attr "type" "ldsym")])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(match_operand:DI 1 "global_symbolic_operand"))]
++  "TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(unspec:DI [(match_dup 2)
++		    (match_dup 1)
++		    (const_int 0)] UNSPEC_LITERAL))]
++  "operands[2] = pic_offset_table_rtx;")
++
++(define_insn "movdi_er_tlsgd"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_TLSGD))]
++  "HAVE_AS_TLS"
++{
++  if (INTVAL (operands[3]) == 0)
++    return "ldi %0,%2(%1)\t\t!tlsgd";
++  else
++    return "ldi %0,%2(%1)\t\t!tlsgd!%3";
++}
++[(set_attr "cannot_copy" "true")])
++
++
++(define_insn "*movdi_er_tlsrelgot"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_TLSRELGOT))]
++  "HAVE_AS_TLS"
++{
++  if (INTVAL (operands[3]) == 0)
++    return "ldih %0,%2(%1)\t\t!tlsrel_got";
++  else
++    return "ldih %0,%2(%1)\t\t!tlsrel_got!%3";
++}
++[(set_attr "cannot_copy" "true")])
++
++
++(define_insn "movdi_er_tlsldm"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand 2 "const_int_operand")]
++		   UNSPEC_TLSLDM))]
++  "HAVE_AS_TLS"
++{
++  if (INTVAL (operands[2]) == 0)
++    return "ldi %0,%&(%1)\t\t!tlsldm";
++  else
++    return "ldi %0,%&(%1)\t\t!tlsldm!%2";
++}
++[(set_attr "cannot_copy" "true")])
++
++;; insert ldih insn with tlsrelgot relocation before ldl insn with gotdtprel relocation.
++(define_insn "*movdi_er_gotdtprel"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_GOTDTPREL))]
++  "HAVE_AS_TLS"
++{
++    if (INTVAL (operands[3]) == 0)
++	return "ldl %0,%2(%1)\t\t!gotdtprel";
++    else
++	return "ldl %0,%2(%1)\t\t!gotdtprel!%3";
++}
++[(set_attr "type" "ild")
++ (set_attr "usegp" "yes")])
++
++(define_insn "*movdi_er_gotdtp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")]
++		   UNSPEC_DTPREL))]
++  "HAVE_AS_TLS"
++  "ldl %0,%2(%1)\t\t!gotdtprel"
++  [(set_attr "type" "ild")
++   (set_attr "usegp" "yes")])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(match_operand:DI 1 "gotdtp_symbolic_operand"))]
++  "HAVE_AS_TLS && reload_completed"
++  [(set (match_dup 0)
++	(unspec:DI [(match_dup 2)
++		    (match_dup 1)] UNSPEC_DTPREL))]
++{
++  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
++  operands[2] = pic_offset_table_rtx;
++})
++
++(define_insn "*movdi_er_gottprel"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")
++		    (match_operand 3 "const_int_operand")]
++		   UNSPEC_TPREL))]
++  "HAVE_AS_TLS"
++{
++    if (INTVAL (operands[3]) == 0)
++	return "ldl %0,%2(%1)\t\t!gottprel";
++    else
++	return "ldl %0,%2(%1)\t\t!gottprel!%3";
++}
++[(set_attr "type" "ild")
++ (set_attr "usegp" "yes")])
++
++(define_insn "*movdi_er_gottp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand:DI 2 "symbolic_operand")]
++		   UNSPEC_TPREL))]
++  "HAVE_AS_TLS"
++  "ldl %0,%2(%1)\t\t!gottprel"
++  [(set_attr "type" "ild")
++   (set_attr "usegp" "yes")])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(match_operand:DI 1 "gottp_symbolic_operand"))]
++  "HAVE_AS_TLS && reload_completed"
++  [(set (match_dup 0)
++	(unspec:DI [(match_dup 2)
++		    (match_dup 1)] UNSPEC_TPREL))]
++{
++  operands[1] = XVECEXP (XEXP (operands[1], 0), 0, 0);
++  operands[2] = pic_offset_table_rtx;
++})
++(define_insn "*movdi"
++  [(set (match_operand:DI 0 "nonimmediate_operand"
++				"=r,r,r,r,r,r,r,r, m, *f,*f, Q, r,*f")
++	(match_operand:DI 1 "input_operand"
++				"rJ,K,L,T,s,n,s,m,rJ,*fJ, Q,*f,*f, r"))]
++  "register_operand (operands[0], DImode)
++   || reg_or_0_operand (operands[1], DImode)"
++  "@
++   mov %r1,%0
++   ldi %0,%1($31)
++   ldih %0,%h1($31)
++   #
++   #
++   #
++   ldi %0,%1
++   ldl%A1%U1 %0,%1
++   stl%A0%U0 %r1,%0
++   fmov %R1,%0
++   fldd%U1 %0,%1
++   fstd%U0 %R1,%0
++   fimovd %1,%0
++   ifmovd %1,%0"
++  [(set_attr "type" "ilog,iadd,iadd,iadd,ldsym,multi,ldsym,ild,ist,fcpys,fld,fst,ftoi,itof")
++   (set_attr "isa" "*,*,*,er,er,*,ner,*,*,*,*,*,fix,fix")
++   (set_attr "usegp" "*,*,*,yes,*,*,*,*,*,*,*,*,*,*")])
++
++(define_insn "force_movdi"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")]
++			    UNSPECV_FORCE_MOV))]
++  ""
++  "mov %1,%0"
++  [(set_attr "type" "ilog")])
++
++;; We do three major things here: handle mem->mem, put 64-bit constants in
++;; memory, and construct long 32-bit constants.
++
++(define_expand "movdi"
++  [(set (match_operand:DI 0 "nonimmediate_operand")
++	(match_operand:DI 1 "general_operand"))]
++  ""
++{
++  if (sw_64_expand_mov (DImode, operands))
++    DONE;
++})
++
++;; Split a load of a large constant into the appropriate two-insn
++;; sequence.
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(match_operand:DI 1 "non_add_const_operand"))]
++  ""
++  [(const_int 0)]
++{
++  if (sw_64_split_const_mov (DImode, operands))
++    DONE;
++  else
++    FAIL;
++})
++
++;; We need to prevent reload from splitting TImode moves, because it
++;; might decide to overwrite a pointer with the value it points to.
++;; In that case we have to do the loads in the appropriate order so
++;; that the pointer is not destroyed too early.
++
++(define_insn_and_split "*movti_internal"
++  [(set (match_operand:TI 0 "nonimmediate_operand" "=r,m")
++	(match_operand:TI 1 "input_operand" "rmJ,rJ"))]
++  "(register_operand (operands[0], TImode)
++    /* Prevent rematerialization of constants.  */
++    && ! CONSTANT_P (operands[1]))
++   || reg_or_0_operand (operands[1], TImode)"
++  "#"
++  "reload_completed"
++  [(set (match_dup 0) (match_dup 2))
++   (set (match_dup 1) (match_dup 3))]
++  "sw_64_split_tmode_pair (operands, TImode, true);")
++
++(define_expand "movti"
++  [(set (match_operand:TI 0 "nonimmediate_operand")
++	(match_operand:TI 1 "general_operand"))]
++  ""
++{
++  if (MEM_P (operands[0])
++      && ! reg_or_0_operand (operands[1], TImode))
++    operands[1] = force_reg (TImode, operands[1]);
++
++  if (operands[1] == const0_rtx)
++    ;
++  /* We must put 64-bit constants in memory.  We could keep the
++     32-bit constants in TImode and rely on the splitter, but
++     this doesn't seem to be worth the pain.  */
++  else if (CONST_SCALAR_INT_P (operands[1]))
++    {
++      rtx in[2], out[2], target;
++
++      gcc_assert (can_create_pseudo_p ());
++
++      split_double (operands[1], &in[0], &in[1]);
++
++      if (in[0] == const0_rtx)
++	out[0] = const0_rtx;
++      else
++	{
++	  out[0] = gen_reg_rtx (DImode);
++	  emit_insn (gen_movdi (out[0], in[0]));
++	}
++
++      if (in[1] == const0_rtx)
++	out[1] = const0_rtx;
++      else
++	{
++	  out[1] = gen_reg_rtx (DImode);
++	  emit_insn (gen_movdi (out[1], in[1]));
++	}
++
++      if (!REG_P (operands[0]))
++	target = gen_reg_rtx (TImode);
++      else
++	target = operands[0];
++
++      emit_insn (gen_movdi (operand_subword (target, 0, 0, TImode), out[0]));
++      emit_insn (gen_movdi (operand_subword (target, 1, 0, TImode), out[1]));
++
++      if (target != operands[0])
++	emit_insn (gen_rtx_SET (operands[0], target));
++
++      DONE;
++    }
++})
++
++;; These are the partial-word cases.
++;;
++;; First we have the code to load an aligned word.  Operand 0 is the register
++;; in which to place the result.  It's mode is QImode or HImode.  Operand 1
++;; is an SImode MEM at the low-order byte of the proper word.  Operand 2 is the
++;; number of bits within the word that the value is.  Operand 3 is an SImode
++;; scratch register.  If operand 0 is a hard register, operand 3 may be the
++;; same register.  It is allowed to conflict with operand 1 as well.
++
++(define_expand "aligned_loadqi"
++  [(set (match_operand:SI 3 "register_operand")
++	(match_operand:SI 1 "memory_operand"))
++   (set (match_operand:DI 0 "register_operand")
++	(zero_extract:DI (subreg:DI (match_dup 3) 0)
++			 (const_int 8)
++			 (match_operand:DI 2 "const_int_operand")))])
++
++(define_expand "aligned_loadhi"
++  [(set (match_operand:SI 3 "register_operand")
++	(match_operand:SI 1 "memory_operand"))
++   (set (match_operand:DI 0 "register_operand")
++	(zero_extract:DI (subreg:DI (match_dup 3) 0)
++			 (const_int 16)
++			 (match_operand:DI 2 "const_int_operand")))])
++
++;; Similar for unaligned loads, where we use the sequence from the
++;; Sw_64 Architecture manual.  We have to distinguish between little-endian
++;; and big-endian systems as the sequences are different.
++;;
++;; Operand 1 is the address.  Operands 2 and 3 are temporaries, where
++;; operand 3 can overlap the input and output registers.
++
++(define_expand "unaligned_loadqi"
++  [(set (match_operand:DI 2 "register_operand")
++	(mem:DI (and:DI (match_operand:DI 1 "address_operand")
++			(const_int -8))))
++   (set (match_operand:DI 3 "register_operand")
++	(match_dup 1))
++   (set (match_operand:DI 0 "register_operand")
++	(zero_extract:DI (match_dup 2)
++			 (const_int 8)
++			 (ashift:DI (match_dup 3) (const_int 3))))])
++
++(define_expand "unaligned_loadhi"
++  [(set (match_operand:DI 2 "register_operand")
++	(mem:DI (and:DI (match_operand:DI 1 "address_operand")
++			(const_int -8))))
++   (set (match_operand:DI 3 "register_operand")
++	(match_dup 1))
++   (set (match_operand:DI 0 "register_operand")
++	(zero_extract:DI (match_dup 2)
++			 (const_int 16)
++			 (ashift:DI (match_dup 3) (const_int 3))))])
++
++;; Storing an aligned byte or word requires two temporaries.  Operand 0 is the
++;; aligned SImode MEM.  Operand 1 is the register containing the
++;; byte or word to store.  Operand 2 is the number of bits within the word that
++;; the value should be placed.  Operands 3 and 4 are SImode temporaries.
++
++(define_expand "aligned_store"
++  [(set (match_operand:SI 3 "register_operand")
++	(match_operand:SI 0 "memory_operand"))
++   (set (subreg:DI (match_dup 3) 0)
++	(and:DI (subreg:DI (match_dup 3) 0) (match_dup 5)))
++   (set (subreg:DI (match_operand:SI 4 "register_operand") 0)
++	(ashift:DI (zero_extend:DI (match_operand 1 "register_operand"))
++		   (match_operand:DI 2 "const_int_operand")))
++   (set (subreg:DI (match_dup 4) 0)
++	(ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0)))
++   (set (match_dup 0) (match_dup 4))]
++  ""
++{
++  operands[5] = GEN_INT (~ (GET_MODE_MASK (GET_MODE (operands[1]))
++			    << INTVAL (operands[2])));
++})
++
++;; For the unaligned byte and halfword cases, we use code similar to that
++;; in the ;; Architecture book, but reordered to lower the number of registers
++;; required.  Operand 0 is the address.  Operand 1 is the data to store.
++;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may
++;; be the same temporary, if desired.  If the address is in a register,
++;; operand 2 can be that register.
++
++(define_expand "@unaligned_store<mode>"
++  [(set (match_operand:DI 3 "register_operand")
++	(mem:DI (and:DI (match_operand:DI 0 "address_operand")
++			(const_int -8))))
++   (set (match_operand:DI 2 "register_operand")
++	(match_dup 0))
++   (set (match_dup 3)
++	(and:DI (not:DI (ashift:DI (match_dup 5)
++				   (ashift:DI (match_dup 2) (const_int 3))))
++		(match_dup 3)))
++   (set (match_operand:DI 4 "register_operand")
++	(ashift:DI (zero_extend:DI
++		     (match_operand:I12MODE 1 "register_operand"))
++		   (ashift:DI (match_dup 2) (const_int 3))))
++   (set (match_dup 4) (ior:DI (match_dup 4) (match_dup 3)))
++   (set (mem:DI (and:DI (match_dup 0) (const_int -8)))
++	(match_dup 4))]
++  ""
++  "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));")
++
++;; Here are the define_expand's for QI and HI moves that use the above
++;; patterns.  We have the normal sets, plus the ones that need scratch
++;; registers for reload.
++
++(define_expand "mov<mode>"
++  [(set (match_operand:I12MODE 0 "nonimmediate_operand")
++	(match_operand:I12MODE 1 "general_operand"))]
++  ""
++{
++  if (sw_64_expand_mov (<MODE>mode, operands))
++    DONE;
++})
++
++(define_insn "*movqi"
++  [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,m")
++	(match_operand:QI 1 "input_operand" "rJ,n,m,rJ"))]
++  "register_operand (operands[0], QImode)
++   || reg_or_0_operand (operands[1], QImode)"
++  "@
++   bis $31,%r1,%0
++   ldi %0,%L1($31)
++   ldbu%U1 %0,%1
++   stb%U0 %r1,%0"
++  [(set_attr "type" "ilog,iadd,ild,ist")
++   (set_attr "isa" "*,*,bwx,bwx")])
++
++(define_insn "*movhi"
++  [(set (match_operand:HI 0 "nonimmediate_operand" "=r,r,r,m")
++	(match_operand:HI 1 "input_operand" "rJ,n,m,rJ"))]
++  "register_operand (operands[0], HImode)
++   || reg_or_0_operand (operands[1], HImode)"
++  "@
++   bis $31,%r1,%0
++   ldi %0,%L1($31)
++   ldhu%U1 %0,%1
++   sth%U0 %r1,%0"
++  [(set_attr "type" "ilog,iadd,ild,ist")
++   (set_attr "isa" "*,*,bwx,bwx")])
++
++;; Helpers for the above.  The way reload is structured, we can't
++;; always get a proper address for a stack slot during reload_foo
++;; expansion, so we must delay our address manipulations until after.
++
++(define_insn_and_split "@reload_in<mode>_aligned"
++  [(set (match_operand:I12MODE 0 "register_operand" "=r")
++	(match_operand:I12MODE 1 "memory_operand" "m"))]
++  "!TARGET_BWX && (reload_in_progress || reload_completed)"
++  "#"
++  "!TARGET_BWX && reload_completed"
++  [(const_int 0)]
++{
++  rtx aligned_mem, bitnum;
++  get_aligned_mem (operands[1], &aligned_mem, &bitnum);
++  emit_insn (gen_aligned_load<reloadmode>
++	     (gen_lowpart (DImode, operands[0]), aligned_mem, bitnum,
++	      gen_rtx_REG (SImode, REGNO (operands[0]))));
++  DONE;
++})
++
++(define_mode_iterator VEC [V8QI V4HI V2SI])
++(define_mode_iterator VEC12 [V8QI V4HI])
++
++(define_expand "mov<mode>"
++  [(set (match_operand:VEC 0 "nonimmediate_operand")
++	(match_operand:VEC 1 "general_operand"))]
++  ""
++{
++  if (sw_64_expand_mov (<MODE>mode, operands))
++    DONE;
++})
++
++(define_split
++  [(set (match_operand:VEC 0 "register_operand")
++	(match_operand:VEC 1 "non_zero_const_operand"))]
++  ""
++  [(const_int 0)]
++{
++  if (sw_64_split_const_mov (<MODE>mode, operands))
++    DONE;
++  else
++    FAIL;
++})
++
++
++(define_expand "movmisalign<mode>"
++  [(set (match_operand:VEC 0 "nonimmediate_operand")
++	(match_operand:VEC 1 "general_operand"))]
++  "flag_sw_unalign_byte != 1 || !TARGET_SW8A"
++{
++  sw_64_expand_movmisalign (<MODE>mode, operands);
++  DONE;
++})
++
++(define_insn "*mov<mode>_fix"
++  [(set (match_operand:VEC 0 "nonimmediate_operand" "=r,r,r,m,*f,*f,m,r,*f")
++	(match_operand:VEC 1 "input_operand" "rW,i,m,rW,*fW,m,*f,*f,r"))]
++  "register_operand (operands[0], <MODE>mode)
++   || reg_or_0_operand (operands[1], <MODE>mode)"
++  "@
++   bis $31,%r1,%0
++   #
++   ldl%A1%U1 %0,%1
++   stl%A0%U0 %r1,%0
++   fcpys %R1,%R1,%0
++   fldd%U1 %0,%1
++   fstd%U0 %R1,%0
++   fimovd %1,%0
++   ifmovd %1,%0"
++  [(set_attr "type" "ilog,multi,ild,ist,fcpys,fld,fst,ftoi,itof")
++   (set_attr "isa" "*,*,*,*,*,*,*,fix,fix")])
++
++(define_insn "<code><mode>3"
++  [(set (match_operand:VEC12 0 "register_operand" "=r")
++	(any_maxmin:VEC12
++	 (match_operand:VEC12 1 "reg_or_0_operand" "rW")
++	 (match_operand:VEC12 2 "reg_or_0_operand" "rW")))]
++  "TARGET_MAX"
++  "<maxmin><modesuffix> %r1,%r2,%0"
++  [(set_attr "type" "mvi")])
++
++(define_insn "one_cmpl<mode>2"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(not:VEC (match_operand:VEC 1 "register_operand" "r")))]
++  ""
++  "ornot $31,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "and<mode>3"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(and:VEC (match_operand:VEC 1 "register_operand" "r")
++		 (match_operand:VEC 2 "register_operand" "r")))]
++  ""
++  "and %1,%2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*andnot<mode>3"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(and:VEC (not:VEC (match_operand:VEC 1 "register_operand" "r"))
++		 (match_operand:VEC 2 "register_operand" "r")))]
++  ""
++  "bic %2,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "ior<mode>3"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(ior:VEC (match_operand:VEC 1 "register_operand" "r")
++		 (match_operand:VEC 2 "register_operand" "r")))]
++  ""
++  "bis %1,%2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*iornot<mode>3"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(ior:VEC (not:DI (match_operand:VEC 1 "register_operand" "r"))
++		 (match_operand:VEC 2 "register_operand" "r")))]
++  ""
++  "ornot %2,%1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "xor<mode>3"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(xor:VEC (match_operand:VEC 1 "register_operand" "r")
++		 (match_operand:VEC 2 "register_operand" "r")))]
++  ""
++  "xor %1,%2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "*xornot<mode>3"
++  [(set (match_operand:VEC 0 "register_operand" "=r")
++	(not:VEC (xor:VEC (match_operand:VEC 1 "register_operand" "r")
++			  (match_operand:VEC 2 "register_operand" "r"))))]
++  ""
++  "eqv %1,%2,%0"
++  [(set_attr "type" "ilog")])
++
++(define_expand "vec_shl_<mode>"
++  [(set (match_operand:VEC 0 "register_operand")
++	(ashift:DI (match_operand:VEC 1 "register_operand")
++		   (match_operand:DI 2 "reg_or_6bit_operand")))]
++  ""
++{
++  operands[0] = gen_lowpart (DImode, operands[0]);
++  operands[1] = gen_lowpart (DImode, operands[1]);
++})
++
++(define_expand "vec_shr_<mode>"
++  [(set (match_operand:VEC 0 "register_operand")
++	(lshiftrt:DI (match_operand:VEC 1 "register_operand")
++		     (match_operand:DI 2 "reg_or_6bit_operand")))]
++  ""
++{
++  operands[0] = gen_lowpart (DImode, operands[0]);
++  operands[1] = gen_lowpart (DImode, operands[1]);
++})
++
++;; Bit field extract patterns which use ext[wlq][lh]
++
++(define_expand "extvmisaligndi"
++  [(set (match_operand:DI 0 "register_operand")
++	(sign_extract:DI (match_operand:BLK 1 "memory_operand")
++			 (match_operand:DI 2 "const_int_operand")
++			 (match_operand:DI 3 "const_int_operand")))]
++  ""
++{
++  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
++  if (INTVAL (operands[3]) % 8 != 0
++      || (INTVAL (operands[2]) != 16
++	  && INTVAL (operands[2]) != 32
++	  && INTVAL (operands[2]) != 64))
++    FAIL;
++
++  sw_64_expand_unaligned_load (operands[0], operands[1],
++			       INTVAL (operands[2]) / 8,
++			       INTVAL (operands[3]) / 8, 1);
++  DONE;
++})
++
++(define_expand "extzvdi"
++  [(set (match_operand:DI 0 "register_operand")
++	(zero_extract:DI (match_operand:DI 1 "register_operand")
++			 (match_operand:DI 2 "const_int_operand")
++			 (match_operand:DI 3 "const_int_operand")))]
++  ""
++{
++  /* We can do 8, 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
++  if (INTVAL (operands[3]) % 8 != 0
++      || (INTVAL (operands[2]) != 8
++	  && INTVAL (operands[2]) != 16
++	  && INTVAL (operands[2]) != 32
++	  && INTVAL (operands[2]) != 64))
++    FAIL;
++})
++
++(define_expand "extzvmisaligndi"
++  [(set (match_operand:DI 0 "register_operand")
++	(zero_extract:DI (match_operand:BLK 1 "memory_operand")
++			 (match_operand:DI 2 "const_int_operand")
++			 (match_operand:DI 3 "const_int_operand")))]
++  ""
++{
++  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.
++     We fail 8-bit fields, falling back on a simple byte load.  */
++  if (INTVAL (operands[3]) % 8 != 0
++      || (INTVAL (operands[2]) != 16
++	  && INTVAL (operands[2]) != 32
++	  && INTVAL (operands[2]) != 64))
++    FAIL;
++
++  sw_64_expand_unaligned_load (operands[0], operands[1],
++			       INTVAL (operands[2]) / 8,
++			       INTVAL (operands[3]) / 8, 0);
++  DONE;
++})
++
++(define_expand "insvmisaligndi"
++  [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand")
++			 (match_operand:DI 1 "const_int_operand")
++			 (match_operand:DI 2 "const_int_operand"))
++	(match_operand:DI 3 "register_operand"))]
++  ""
++{
++  /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
++  if (INTVAL (operands[2]) % 8 != 0
++      || (INTVAL (operands[1]) != 16
++	  && INTVAL (operands[1]) != 32
++	  && INTVAL (operands[1]) != 64))
++    FAIL;
++
++  sw_64_expand_unaligned_store (operands[0], operands[3],
++				INTVAL (operands[1]) / 8,
++				INTVAL (operands[2]) / 8);
++  DONE;
++})
++
++;; Block move/clear, see sw_64.c for more details.
++;; Argument 0 is the destination
++;; Argument 1 is the source
++;; Argument 2 is the length
++;; Argument 3 is the alignment
++
++(define_expand "cpymemqi"
++  [(parallel [(set (match_operand:BLK 0 "memory_operand")
++		   (match_operand:BLK 1 "memory_operand"))
++	      (use (match_operand:DI 2 "immediate_operand"))
++	      (use (match_operand:DI 3 "immediate_operand"))])]
++  "flag_sw_unalign_byte != 1  || !TARGET_SW8A"
++{
++  if (sw_64_expand_block_move (operands))
++    DONE;
++  else
++    FAIL;
++})
++
++(define_expand "setmemqi"
++  [(parallel [(set (match_operand:BLK 0 "memory_operand")
++		   (match_operand 2 "const_int_operand"))
++	      (use (match_operand:DI 1 "immediate_operand"))
++	      (use (match_operand:DI 3 "immediate_operand"))])]
++  "flag_sw_unalign_byte != 1  || !TARGET_SW8A"
++{
++  /* If value to set is not zero, use the library routine.  */
++  if (operands[2] != const0_rtx)
++    FAIL;
++
++  if (sw_64_expand_block_clear (operands))
++    DONE;
++  else
++    FAIL;
++})
++
++;; Subroutine of stack space allocation.  Perform a stack probe.
++(define_expand "stack_probe_internal"
++  [(set (match_dup 1) (match_operand:DI 0 "const_int_operand"))]
++  ""
++{
++  operands[1] = gen_rtx_MEM (DImode, plus_constant (Pmode, stack_pointer_rtx,
++						    INTVAL (operands[0])));
++  MEM_VOLATILE_P (operands[1]) = 1;
++
++  operands[0] = const0_rtx;
++})
++
++;; This is how we allocate stack space.  If we are allocating a
++;; constant amount of space and we know it is less than 4096
++;; bytes, we need do nothing.
++;;
++;; If it is more than 4096 bytes, we need to probe the stack
++;; periodically.
++(define_expand "allocate_stack"
++  [(set (reg:DI 30)
++	(plus:DI (reg:DI 30)
++		 (match_operand:DI 1 "reg_or_cint_operand")))
++   (set (match_operand:DI 0 "register_operand" "=r")
++	(match_dup 2))]
++  ""
++{
++  if (CONST_INT_P (operands[1])
++      && INTVAL (operands[1]) < 32768)
++    {
++      if (INTVAL (operands[1]) >= 4096)
++	{
++	  /* We do this the same way as in the prologue and generate explicit
++	     probes.  Then we update the stack by the constant.  */
++
++	  int probed = 4096;
++
++	  emit_insn (gen_stack_probe_internal (GEN_INT (- probed)));
++	  while (probed + 8192 < INTVAL (operands[1]))
++	    emit_insn (gen_stack_probe_internal
++		       (GEN_INT (- (probed += 8192))));
++
++	  if (probed + 4096 < INTVAL (operands[1]))
++	    emit_insn (gen_stack_probe_internal
++		       (GEN_INT (- INTVAL (operands[1]))));
++	}
++
++      operands[1] = GEN_INT (- INTVAL (operands[1]));
++      operands[2] = virtual_stack_dynamic_rtx;
++    }
++  else
++    {
++      rtx_code_label *out_label = 0;
++      rtx_code_label *loop_label = gen_label_rtx ();
++      rtx want = gen_reg_rtx (Pmode);
++      rtx tmp = gen_reg_rtx (Pmode);
++      rtx memref, test;
++
++      emit_insn (gen_subdi3 (want, stack_pointer_rtx,
++			     force_reg (Pmode, operands[1])));
++
++      if (!CONST_INT_P (operands[1]))
++	{
++	  rtx limit = GEN_INT (4096);
++	  out_label = gen_label_rtx ();
++	  test = gen_rtx_LTU (VOIDmode, operands[1], limit);
++	  emit_jump_insn
++	    (gen_cbranchdi4 (test, operands[1], limit, out_label));
++	}
++
++      emit_insn (gen_adddi3 (tmp, stack_pointer_rtx, GEN_INT (-4096)));
++      emit_label (loop_label);
++      memref = gen_rtx_MEM (DImode, tmp);
++      MEM_VOLATILE_P (memref) = 1;
++      emit_move_insn (memref, const0_rtx);
++      emit_insn (gen_adddi3 (tmp, tmp, GEN_INT (-8192)));
++      test = gen_rtx_GTU (VOIDmode, tmp, want);
++      emit_jump_insn (gen_cbranchdi4 (test, tmp, want, loop_label));
++
++      memref = gen_rtx_MEM (DImode, want);
++      MEM_VOLATILE_P (memref) = 1;
++      emit_move_insn (memref, const0_rtx);
++
++      if (out_label)
++	emit_label (out_label);
++
++      emit_move_insn (stack_pointer_rtx, want);
++      emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
++      DONE;
++    }
++})
++
++;; This is used by sw_64_expand_prolog to do the same thing as above,
++;; except we cannot at that time generate new basic blocks, so we hide
++;; the loop in this one insn.
++
++(define_insn "prologue_stack_probe_loop"
++  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")
++		     (match_operand:DI 1 "register_operand" "r")]
++		    UNSPECV_PSPL)]
++  ""
++{
++  operands[2] = gen_label_rtx ();
++  (*targetm.asm_out.internal_label) (asm_out_file, "L",
++			     CODE_LABEL_NUMBER (operands[2]));
++
++  return "stl $31,-8192(%1)\;subl %0,1,%0\;ldi %1,-8192(%1)\;bne %0,%l2";
++}
++  [(set_attr "length" "16")
++   (set_attr "type" "multi")])
++
++(define_expand "prologue"
++  [(const_int 0)]
++  ""
++{
++  sw_64_expand_prologue ();
++  DONE;
++})
++
++;; These take care of emitting the ldgp insn in the prologue.  This will be
++;; an ldi/ldih pair and we want to align them properly.  So we have two
++;; unspec_volatile insns, the first of which emits the ldgp assembler macro
++;; and the second of which emits nothing.  However, both are marked as type
++;; IADD (the default) so the alignment code in sw_64.c does the right thing
++;; with them.
++
++(define_expand "prologue_ldgp"
++  [(set (match_dup 0)
++	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
++   (set (match_dup 0)
++	(unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_PLDGP2))]
++  ""
++{
++  operands[0] = pic_offset_table_rtx;
++  operands[1] = gen_rtx_REG (Pmode, 27);
++  operands[2] = (TARGET_EXPLICIT_RELOCS
++		 ? GEN_INT (sw_64_next_sequence_number++)
++		 : const0_rtx);
++})
++
++(define_insn "*ldgp_er_1"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_LDGP1))]
++  "TARGET_EXPLICIT_RELOCS"
++  "ldih %0,0(%1)\t\t!gpdisp!%2"
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*ldgp_er_2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "register_operand" "r")
++		    (match_operand 2 "const_int_operand")]
++		   UNSPEC_LDGP2))]
++  "TARGET_EXPLICIT_RELOCS"
++  "ldi %0,0(%1)\t\t!gpdisp!%2"
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*exc_ldgp_er_2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++       (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
++		   (match_operand 2 "const_int_operand")]
++		  UNSPECV_LDGP2))]
++  "TARGET_EXPLICIT_RELOCS"
++  "ldi %0,0(%1)\t\t!gpdisp!%2"
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*prologue_ldgp_er_2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_PLDGP2))]
++  "TARGET_EXPLICIT_RELOCS"
++{
++   return "ldi %0,0(%1)\t\t!gpdisp!%2\n$%~..ng:";
++}
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*prologue_ldgp_1"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_LDGP1))]
++  ""
++{
++   return "ldgp %0,0(%1)\n$%~..ng:";
++}
++  [(set_attr "cannot_copy" "true")])
++
++(define_insn "*prologue_ldgp_2"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "r")
++			     (match_operand 2 "const_int_operand")]
++			    UNSPECV_PLDGP2))]
++  ""
++)
++
++(define_insn "hardware_prefetch_use_syscall"
++[(unspec_volatile  [
++(match_operand:DI 0 "register_operand" "=r")
++(match_operand:DI 1 "register_operand" "=r")
++] UNSPECV_HARDWARE_PREFETCH_CNT)]
++""
++{
++	return  "ldi $16,110($31)\;ldi $18,1($31)\;ldi $19,120($30)\;\
++stl %0,120($30)\;\
++ldl $27,syscall($29)\t\t!literal!%#\;call $26,($27),syscall\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*\;"
++			;
++}
++[(set_attr "type" "multi")
++   (set_attr "length" "8")])
++
++;; The _mcount profiling hook has special calling conventions, and
++;; does not clobber all the registers that a normal call would.  So
++;; hide the fact this is a call at all.
++
++(define_insn "prologue_mcount"
++  [(unspec_volatile [(const_int 0)] UNSPECV_MCOUNT)]
++  ""
++{
++  if (TARGET_EXPLICIT_RELOCS)
++    /* Note that we cannot use a lituse_jsr reloc, since _mcount
++       cannot be called via the PLT.  */
++    return "ldl $28,_mcount($29)\t\t!literal\;call $28,($28),_mcount";
++  else
++    return "ldi $28,_mcount\;call $28,($28),_mcount";
++}
++  [(set_attr "type" "multi")
++   (set_attr "length" "8")])
++
++(define_insn "init_fp"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(match_operand:DI 1 "register_operand" "r"))
++   (clobber (mem:BLK (match_operand:DI 2 "register_operand" "=r")))]
++  ""
++  "bis $31,%1,%0")
++
++(define_expand "epilogue"
++  [(return)]
++  ""
++  "sw_64_expand_epilogue ();")
++
++(define_expand "sibcall_epilogue"
++  [(return)]
++  ""
++{
++  sw_64_expand_epilogue ();
++  DONE;
++})
++
++(define_expand "builtin_longjmp"
++  [(use (match_operand:DI 0 "register_operand" "r"))]
++  ""
++{
++  /* The elements of the buffer are, in order:  */
++  rtx fp = gen_rtx_MEM (Pmode, operands[0]);
++  rtx lab = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 8));
++  rtx stack = gen_rtx_MEM (Pmode, plus_constant (Pmode, operands[0], 16));
++  rtx pv = gen_rtx_REG (Pmode, 27);
++
++  /* This bit is the same as expand_builtin_longjmp.  */
++  emit_move_insn (pv, lab);
++  emit_stack_restore (SAVE_NONLOCAL, stack);
++  emit_use (hard_frame_pointer_rtx);
++  emit_use (stack_pointer_rtx);
++
++  emit_move_insn (hard_frame_pointer_rtx, fp);
++  /* Load the label we are jumping through into $27 so that we know
++     where to look for it when we get back to setjmp's function for
++     restoring the gp.  */
++  emit_jump_insn (gen_builtin_longjmp_internal (pv));
++  emit_barrier ();
++  DONE;
++})
++
++;; This is effectively a copy of indirect_jump, but constrained such
++;; that register renaming cannot foil our cunning plan with $27.
++(define_insn "builtin_longjmp_internal"
++  [(set (pc)
++	(unspec_volatile [(match_operand:DI 0 "register_operand" "c")]
++			 UNSPECV_LONGJMP))]
++  ""
++  "jmp $31,(%0),0"
++  [(set_attr "type" "ibr")])
++
++(define_expand "builtin_setjmp_receiver"
++  [(unspec_volatile [(label_ref (match_operand 0))] UNSPECV_SETJMPR)]
++  "")
++
++(define_insn_and_split "*builtin_setjmp_receiver_1"
++  [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR)]
++  ""
++{
++  if (TARGET_EXPLICIT_RELOCS)
++    return "#";
++  else
++    return "br $27,$LSJ%=\n$LSJ%=:\;ldgp $29,0($27)";
++}
++  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 1)
++	(unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LDGP1))
++   (set (match_dup 1)
++	(unspec:DI [(match_dup 1) (match_dup 3)] UNSPEC_LDGP2))]
++{
++  if (prev_nonnote_insn (curr_insn) != XEXP (operands[0], 0))
++    emit_insn (gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, operands[0]),
++					UNSPECV_SETJMPR_ER));
++  operands[1] = pic_offset_table_rtx;
++  operands[2] = gen_rtx_REG (Pmode, 27);
++  operands[3] = GEN_INT (sw_64_next_sequence_number++);
++}
++  [(set_attr "length" "12")
++   (set_attr "type" "multi")])
++
++(define_insn "*builtin_setjmp_receiver_er_sl_1"
++  [(unspec_volatile [(match_operand 0)] UNSPECV_SETJMPR_ER)]
++  "TARGET_EXPLICIT_RELOCS"
++  "ldi $27,$LSJ%=-%l0($27)\n$LSJ%=:")
++
++;; When flag_reorder_blocks_and_partition is in effect, compiler puts
++;; exception landing pads in a cold section.  To prevent inter-section offset
++;; calculation, a jump to original landing pad is emitted in the place of the
++;; original landing pad.  Since landing pad is moved, RA-relative GP
++;; calculation in the prologue of landing pad breaks.  To solve this problem,
++;; we use alternative GP load approach.
++
++(define_expand "exception_receiver"
++  [(unspec_volatile [(match_dup 0)] UNSPECV_EHR)]
++  ""
++{
++  if (flag_reorder_blocks_and_partition)
++    operands[0] = copy_rtx (sw_64_gp_save_rtx ());
++  else
++    operands[0] = const0_rtx;
++})
++
++(define_insn "*exception_receiver_2"
++  [(unspec_volatile [(match_operand:DI 0 "memory_operand" "m")] UNSPECV_EHR)]
++  "flag_reorder_blocks_and_partition"
++  "ldl $29,%0"
++  [(set_attr "type" "ild")])
++
++(define_insn_and_split "*exception_receiver_1"
++  [(unspec_volatile [(const_int 0)] UNSPECV_EHR)]
++  ""
++{
++  if (TARGET_EXPLICIT_RELOCS)
++    return "#";
++  else
++    return "ldgp $29,0($26)";
++}
++  "&& TARGET_EXPLICIT_RELOCS && reload_completed"
++  [(set (match_dup 0)
++	(unspec_volatile:DI [(match_dup 1) (match_dup 2)] UNSPECV_LDGP1))
++   (set (match_dup 0)
++	(unspec_volatile:DI [(match_dup 0) (match_dup 2)] UNSPECV_LDGP2))]
++{
++  operands[0] = pic_offset_table_rtx;
++  operands[1] = gen_rtx_REG (Pmode, 26);
++  operands[2] = GEN_INT (sw_64_next_sequence_number++);
++}
++  [(set_attr "length" "8")
++   (set_attr "type" "multi")])
++
++;; Prefetch data.
++;;
++;;
++;; On SW6, these become official prefetch instructions.
++
++(define_insn "prefetch"
++  [(prefetch (match_operand:DI 0 "address_operand" "p")
++	     (match_operand:DI 1 "const_int_operand" "n")
++	     (match_operand:DI 2 "const_int_operand" "n"))]
++  "sw_64_cpu == PROCESSOR_SW6 || sw_64_cpu == PROCESSOR_SW8"
++{
++  /* Interpret "no temporal locality" as this data should be evicted once
++     it is used.  The "evict next" alternatives load the data into the cache
++     and leave the LRU eviction counter pointing to that block.  */
++     static const char * alt[2][2] ;
++  if (flag_sw_prefetch_l1)
++  {
++      alt[0][0] = "fillcs_e %a0" ;  /* read, evict next.  */
++      alt[0][1] = "fillcs %a0" ;  /* read, evict next.  */
++      alt[1][0] = "fillde_e %a0" ;  /* write, evict next.  */
++      alt[1][1] = "fillde %a0" ;  /* write, evict next.  */
++
++  }
++  else
++  {
++      alt[0][0] = "s_fillde %a0" ;  /* read, evict next.  */
++      alt[0][1] = "s_fillcs %a0" ;  /* read, evict next.  */
++      alt[1][0] = "fillde_e %a0" ;  /* write, evict next.  */
++      alt[1][1] = "fillde %a0" ;  /* write, evict next.  */
++  }
++
++  bool write = INTVAL (operands[1]) != 0;
++  bool lru = INTVAL (operands[2]) != 0;
++
++  return alt[write][lru];
++}
++  [(set_attr "type" "ild")])
++
++
++;; Close the trap shadow of preceding instructions.  This is generated
++;; by sw_64_reorg.
++
++(define_insn "trapb"
++  [(unspec_volatile [(const_int 0)] UNSPECV_TRAPB)]
++  ""
++  "memb"
++  [(set_attr "type" "misc")])
++
++;; No-op instructions used by machine-dependent reorg to preserve
++;; alignment for instruction issue.
++;; The Unicos/Mk assembler does not support these opcodes.
++
++(define_insn "nop"
++  [(const_int 0)]
++  ""
++  "nop"
++  [(set_attr "type" "ilog")])
++
++(define_insn "fnop"
++  [(const_int 1)]
++  "TARGET_FP"
++  "fcpys $f31,$f31,$f31"
++  [(set_attr "type" "fcpys")])
++
++(define_insn "unop"
++  [(const_int 2)]
++  ""
++  "ldl_u $31,0($30)")
++
++(define_insn "realign"
++  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")]
++		    UNSPECV_REALIGN)]
++  ""
++  ".align %0 #realign")
++
++;; Instructions to be emitted from __builtins.
++
++(define_insn "builtin_cmpbge"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "rJ")
++		    (match_operand:DI 2 "reg_or_8bit_operand" "rI")]
++		   UNSPEC_CMPBGE))]
++  ""
++  "cmpgeb %r1,%2,%0"
++  ;; The SW6 data sheets list this as ILOG.  OTOH, SW6 doesn't
++  ;; actually differentiate between ILOG and ICMP in the schedule.
++  [(set_attr "type" "icmp")])
++
++(define_expand "extbl"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (8), operands[2]));
++  DONE;
++})
++
++(define_expand "extwl"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (16), operands[2]));
++  DONE;
++})
++
++(define_expand "extll"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (32), operands[2]));
++  DONE;
++})
++
++(define_expand "extql"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_extxl (operands[0], operands[1], GEN_INT (64), operands[2]));
++  DONE;
++})
++
++(define_expand "builtin_insbl"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  operands[1] = gen_lowpart (QImode, operands[1]);
++  emit_insn (gen_insbl (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_expand "builtin_inswl"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  operands[1] = gen_lowpart (HImode, operands[1]);
++  emit_insn (gen_inswl (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_expand "builtin_insll"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  operands[1] = gen_lowpart (SImode, operands[1]);
++  emit_insn (gen_insll (operands[0], operands[1], operands[2]));
++  DONE;
++})
++
++(define_expand "inswh"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (16), operands[2]));
++  DONE;
++})
++
++(define_expand "inslh"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (32), operands[2]));
++  DONE;
++})
++
++(define_expand "insqh"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_insxh (operands[0], operands[1], GEN_INT (64), operands[2]));
++  DONE;
++})
++
++(define_expand "mskbl"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  rtx mask = GEN_INT (0xff);
++  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
++  DONE;
++})
++
++(define_expand "mskwl"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  rtx mask = GEN_INT (0xffff);
++  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
++  DONE;
++})
++
++(define_expand "mskll"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  rtx mask = gen_int_mode (0xffffffff, DImode);
++  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
++  DONE;
++})
++
++(define_expand "mskql"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  rtx mask = constm1_rtx;
++  emit_insn (gen_mskxl (operands[0], operands[1], mask, operands[2]));
++  DONE;
++})
++
++(define_expand "mskwh"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (16), operands[2]));
++  DONE;
++})
++
++(define_expand "msklh"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (32), operands[2]));
++  DONE;
++})
++
++(define_expand "mskqh"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "register_operand")
++   (match_operand:DI 2 "reg_or_8bit_operand")]
++  ""
++{
++  emit_insn (gen_mskxh (operands[0], operands[1], GEN_INT (64), operands[2]));
++  DONE;
++})
++
++(define_expand "builtin_zap"
++  [(set (match_operand:DI 0 "register_operand")
++	(and:DI (unspec:DI
++		  [(match_operand:DI 2 "reg_or_cint_operand")]
++		  UNSPEC_ZAP)
++		(match_operand:DI 1 "reg_or_cint_operand")))]
++  ""
++{
++  if (CONST_INT_P (operands[2]))
++    {
++      rtx mask = sw_64_expand_zap_mask (INTVAL (operands[2]));
++
++      if (mask == const0_rtx)
++	{
++	  emit_move_insn (operands[0], const0_rtx);
++	  DONE;
++	}
++      if (mask == constm1_rtx)
++	{
++	  emit_move_insn (operands[0], operands[1]);
++	  DONE;
++	}
++
++      operands[1] = force_reg (DImode, operands[1]);
++      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
++      DONE;
++    }
++
++  operands[1] = force_reg (DImode, operands[1]);
++  operands[2] = gen_lowpart (QImode, operands[2]);
++})
++
++(define_insn "*builtin_zap_1"
++  [(set (match_operand:DI 0 "register_operand" "=r,r,r,r")
++	(and:DI (unspec:DI
++		  [(match_operand:QI 2 "reg_or_cint_operand" "n,n,r,r")]
++		  UNSPEC_ZAP)
++		(match_operand:DI 1 "reg_or_cint_operand" "n,r,J,r")))]
++  ""
++  "@
++   #
++   #
++   bis $31,$31,%0
++   zap %r1,%2,%0"
++  [(set_attr "type" "shift,shift,ilog,shift")])
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(and:DI (unspec:DI
++		  [(match_operand:QI 2 "const_int_operand")]
++		  UNSPEC_ZAP)
++		(match_operand:DI 1 "const_int_operand")))]
++  ""
++  [(const_int 0)]
++{
++  rtx mask = sw_64_expand_zap_mask (INTVAL (operands[2]));
++
++  operands[1] = gen_int_mode (INTVAL (operands[1]) & INTVAL (mask), DImode);
++  emit_move_insn (operands[0], operands[1]);
++  DONE;
++})
++
++(define_split
++  [(set (match_operand:DI 0 "register_operand")
++	(and:DI (unspec:DI
++		  [(match_operand:QI 2 "const_int_operand")]
++		  UNSPEC_ZAP)
++		(match_operand:DI 1 "register_operand")))]
++  ""
++  [(set (match_dup 0)
++	(and:DI (match_dup 1) (match_dup 2)))]
++{
++  operands[2] = sw_64_expand_zap_mask (INTVAL (operands[2]));
++  if (operands[2] == const0_rtx)
++    {
++      emit_move_insn (operands[0], const0_rtx);
++      DONE;
++    }
++  if (operands[2] == constm1_rtx)
++    {
++      emit_move_insn (operands[0], operands[1]);
++      DONE;
++    }
++})
++
++(define_expand "builtin_zapnot"
++  [(set (match_operand:DI 0 "register_operand")
++	(and:DI (unspec:DI
++		  [(not:QI (match_operand:DI 2 "reg_or_cint_operand"))]
++		  UNSPEC_ZAP)
++		(match_operand:DI 1 "reg_or_cint_operand")))]
++  ""
++{
++  if (CONST_INT_P (operands[2]))
++    {
++      rtx mask = sw_64_expand_zap_mask (~ INTVAL (operands[2]));
++
++      if (mask == const0_rtx)
++	{
++	  emit_move_insn (operands[0], const0_rtx);
++	  DONE;
++	}
++      if (mask == constm1_rtx)
++	{
++	  emit_move_insn (operands[0], operands[1]);
++	  DONE;
++	}
++
++      operands[1] = force_reg (DImode, operands[1]);
++      emit_insn (gen_anddi3 (operands[0], operands[1], mask));
++      DONE;
++    }
++
++  operands[1] = force_reg (DImode, operands[1]);
++  operands[2] = gen_lowpart (QImode, operands[2]);
++})
++
++(define_insn "*builtin_zapnot_1"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(and:DI (unspec:DI
++		  [(not:QI (match_operand:QI 2 "register_operand" "r"))]
++		  UNSPEC_ZAP)
++		(match_operand:DI 1 "reg_or_0_operand" "rJ")))]
++  ""
++  "zapnot %r1,%2,%0"
++  [(set_attr "type" "shift")])
++
++(define_insn "builtin_amask"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "reg_or_8bit_operand" "rI")]
++		   UNSPEC_AMASK))]
++  ""
++  "amask %1,%0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "builtin_implver"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(const_int 0)] UNSPEC_IMPLVER))]
++  ""
++  "implver %0"
++  [(set_attr "type" "ilog")])
++
++(define_insn "builtin_rpcc"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_RPCC))]
++  ""
++  "rtc %0"
++  [(set_attr "type" "ilog")])
++
++(define_expand "builtin_minub8"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_uminv8qi3, V8QImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_minsb8"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_sminv8qi3, V8QImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_minuw4"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_uminv4hi3, V4HImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_minsw4"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_sminv4hi3, V4HImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_maxub8"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_umaxv8qi3, V8QImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_maxsb8"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_smaxv8qi3, V8QImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_maxuw4"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_umaxv4hi3, V4HImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_expand "builtin_maxsw4"
++  [(match_operand:DI 0 "register_operand")
++   (match_operand:DI 1 "reg_or_0_operand")
++   (match_operand:DI 2 "reg_or_0_operand")]
++  "TARGET_MAX"
++{
++  sw_64_expand_builtin_vector_binop (gen_smaxv4hi3, V4HImode, operands[0],
++				     operands[1], operands[2]);
++  DONE;
++})
++
++(define_insn "builtin_perr"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec:DI [(match_operand:DI 1 "reg_or_0_operand" "%rJ")
++		    (match_operand:DI 2 "reg_or_8bit_operand" "rJ")]
++		   UNSPEC_PERR))]
++  "TARGET_MAX"
++  "perr %r1,%r2,%0"
++  [(set_attr "type" "mvi")])
++
++(define_expand "builtin_pklb"
++  [(set (match_operand:DI 0 "register_operand")
++	(vec_concat:V8QI
++	  (vec_concat:V4QI
++	    (truncate:V2QI (match_operand:DI 1 "register_operand"))
++	    (match_dup 2))
++	  (match_dup 3)))]
++  "TARGET_MAX"
++{
++  operands[0] = gen_lowpart (V8QImode, operands[0]);
++  operands[1] = gen_lowpart (V2SImode, operands[1]);
++  operands[2] = CONST0_RTX (V2QImode);
++  operands[3] = CONST0_RTX (V4QImode);
++})
++
++(define_insn "*pklb"
++  [(set (match_operand:V8QI 0 "register_operand" "=r")
++	(vec_concat:V8QI
++	  (vec_concat:V4QI
++	    (truncate:V2QI (match_operand:V2SI 1 "register_operand" "r"))
++	    (match_operand:V2QI 2 "const0_operand"))
++	  (match_operand:V4QI 3 "const0_operand")))]
++  "TARGET_MAX"
++  "pklb %r1,%0"
++  [(set_attr "type" "mvi")])
++
++(define_expand "builtin_pkwb"
++  [(set (match_operand:DI 0 "register_operand")
++	(vec_concat:V8QI
++	  (truncate:V4QI (match_operand:DI 1 "register_operand"))
++	  (match_dup 2)))]
++  "TARGET_MAX"
++{
++  operands[0] = gen_lowpart (V8QImode, operands[0]);
++  operands[1] = gen_lowpart (V4HImode, operands[1]);
++  operands[2] = CONST0_RTX (V4QImode);
++})
++
++(define_insn "*pkwb"
++  [(set (match_operand:V8QI 0 "register_operand" "=r")
++	(vec_concat:V8QI
++	  (truncate:V4QI (match_operand:V4HI 1 "register_operand" "r"))
++	  (match_operand:V4QI 2 "const0_operand")))]
++  "TARGET_MAX"
++  "pkwb %r1,%0"
++  [(set_attr "type" "mvi")])
++
++(define_expand "builtin_unpkbl"
++  [(set (match_operand:DI 0 "register_operand")
++	(zero_extend:V2SI
++	  (vec_select:V2QI (match_operand:DI 1 "register_operand")
++			   (parallel [(const_int 0) (const_int 1)]))))]
++  "TARGET_MAX"
++{
++  operands[0] = gen_lowpart (V2SImode, operands[0]);
++  operands[1] = gen_lowpart (V8QImode, operands[1]);
++})
++
++(define_insn "*unpkbl"
++  [(set (match_operand:V2SI 0 "register_operand" "=r")
++	(zero_extend:V2SI
++	  (vec_select:V2QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
++			   (parallel [(const_int 0) (const_int 1)]))))]
++  "TARGET_MAX"
++  "unpkbl %r1,%0"
++  [(set_attr "type" "mvi")])
++
++(define_expand "builtin_unpkbw"
++  [(set (match_operand:DI 0 "register_operand")
++	(zero_extend:V4HI
++	  (vec_select:V4QI (match_operand:DI 1 "register_operand")
++			   (parallel [(const_int 0)
++				      (const_int 1)
++				      (const_int 2)
++				      (const_int 3)]))))]
++  "TARGET_MAX"
++{
++  operands[0] = gen_lowpart (V4HImode, operands[0]);
++  operands[1] = gen_lowpart (V8QImode, operands[1]);
++})
++
++(define_insn "*unpkbw"
++  [(set (match_operand:V4HI 0 "register_operand" "=r")
++	(zero_extend:V4HI
++	  (vec_select:V4QI (match_operand:V8QI 1 "reg_or_0_operand" "rW")
++			   (parallel [(const_int 0)
++				      (const_int 1)
++				      (const_int 2)
++				      (const_int 3)]))))]
++  "TARGET_MAX"
++  "unpkbw %r1,%0"
++  [(set_attr "type" "mvi")])
++
++(include "sync.md")
++
++;; The call patterns are at the end of the file because their
++;; wildcard operand0 interferes with nice recognition.
++
++(define_insn "*call_value_osf_1_er_noreturn"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS
++   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
++  "@
++   call $26,($27),0
++   bsr $26,%1\t\t!samegp
++   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),%1\t\t!lituse_jsr!%#"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,*,8")])
++
++(define_insn "*call_value_osf_1_er_setfpec0"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3 "
++  "@
++   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%1\t\t!samegp
++   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*call_value_osf_1_er_setfpec1"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1"
++  "@
++   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%1\t\t!samegp
++   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*call_value_osf_1_er"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS"
++  "@
++   call $26,(%1),0\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*
++   bsr $26,%1\t\t!samegp
++   ldl $27,%1($29)\t\t!literal!%#\;call $26,($27),0\t\t!lituse_jsr!%#\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++;; We must use peep2 instead of a split because we need accurate life
++;; information for $gp.  Consider the case of { bar (); while (1); }.
++(define_peephole2
++  [(parallel [(set (match_operand 0)
++		   (call (mem:DI (match_operand:DI 1 "call_operand"))
++			 (match_operand 2)))
++	      (use (reg:DI 29))
++	      (clobber (reg:DI 26))])]
++  "TARGET_EXPLICIT_RELOCS && reload_completed
++   && ! samegp_function_operand (operands[1], Pmode)
++   && (peep2_regno_dead_p (1, 29)
++       || find_reg_note (insn, REG_NORETURN, NULL_RTX))"
++  [(parallel [(set (match_dup 0)
++		   (call (mem:DI (match_dup 3))
++			 (match_dup 2)))
++	      (use (reg:DI 29))
++	      (use (match_dup 1))
++	      (use (match_dup 4))
++	      (clobber (reg:DI 26))])]
++{
++  if (CONSTANT_P (operands[1]))
++    {
++      operands[3] = gen_rtx_REG (Pmode, 27);
++      operands[4] = GEN_INT (sw_64_next_sequence_number++);
++      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
++				      operands[1], operands[4]));
++    }
++  else
++    {
++      operands[3] = operands[1];
++      operands[1] = const0_rtx;
++      operands[4] = const0_rtx;
++    }
++})
++
++(define_peephole2
++  [(parallel [(set (match_operand 0)
++		   (call (mem:DI (match_operand:DI 1 "call_operand"))
++			 (match_operand 2)))
++	      (use (reg:DI 29))
++	      (clobber (reg:DI 26))])]
++  "TARGET_EXPLICIT_RELOCS && reload_completed
++   && ! samegp_function_operand (operands[1], Pmode)
++   && ! (peep2_regno_dead_p (1, 29)
++	 || find_reg_note (insn, REG_NORETURN, NULL_RTX))
++   && !enable_asan_check_stack ()"
++  [(parallel [(set (match_dup 0)
++		   (call (mem:DI (match_dup 3))
++			 (match_dup 2)))
++	      (set (match_dup 6)
++		   (unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP1))
++	      (use (match_dup 1))
++	      (use (match_dup 5))
++	      (clobber (reg:DI 26))])
++   (set (match_dup 6)
++	(unspec:DI [(match_dup 6) (match_dup 4)] UNSPEC_LDGP2))]
++{
++  if (CONSTANT_P (operands[1]))
++    {
++      operands[3] = gen_rtx_REG (Pmode, 27);
++      operands[5] = GEN_INT (sw_64_next_sequence_number++);
++      emit_insn (gen_movdi_er_high_g (operands[3], pic_offset_table_rtx,
++				      operands[1], operands[5]));
++    }
++  else
++    {
++      operands[3] = operands[1];
++      operands[1] = const0_rtx;
++      operands[5] = const0_rtx;
++    }
++  operands[4] = GEN_INT (sw_64_next_sequence_number++);
++  operands[6] = pic_offset_table_rtx;
++})
++
++(define_insn "*call_value_osf_2_er_nogp"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (use (match_operand 3))
++   (use (match_operand 4))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS"
++  "call $26,(%1),%3%J4"
++  [(set_attr "type" "call")])
++
++(define_insn "*call_value_osf_2_er"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "register_operand" "c"))
++	      (match_operand 2)))
++   (set (reg:DI 29)
++	(unspec:DI [(reg:DI 29) (match_operand 5 "const_int_operand")]
++		   UNSPEC_LDGP1))
++   (use (match_operand 3))
++   (use (match_operand 4))
++   (clobber (reg:DI 26))]
++  "TARGET_EXPLICIT_RELOCS"
++  {
++    return "call $26,(%1),%3%J4\;ldih $29,0($26)\t\t!gpdisp!%5";
++  }
++  [(set_attr "type" "call")
++   (set_attr "cannot_copy" "true")
++   (set_attr "length" "8")])
++
++(define_insn "*call_value_osf_1_noreturn"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "! TARGET_EXPLICIT_RELOCS
++   && find_reg_note (insn, REG_NORETURN, NULL_RTX)"
++  "@
++   call $26,($27),0
++   bsr $26,$%1..ng
++   call $26,%1"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,*,8")])
++
++(define_int_iterator TLS_CALL
++	[UNSPEC_TLSGD_CALL
++	 UNSPEC_TLSLDM_CALL])
++
++(define_int_attr tls
++	[(UNSPEC_TLSGD_CALL "tlsgd")
++	 (UNSPEC_TLSLDM_CALL "tlsldm")])
++
++(define_insn "call_value_osf_<tls>"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "symbolic_operand"))
++	      (const_int 0)))
++   (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "HAVE_AS_TLS"
++  "ldl $27,%1($29)\t\t!literal!%2\;call $26,($27),%1\t\t!lituse_<tls>!%2\;ldih $29,0($26)\t\t!gpdisp!%*\;ldi $29,0($29)\t\t!gpdisp!%*"
++  [(set_attr "type" "call")
++   (set_attr "cannot_copy" "true")
++   (set_attr "length" "16")])
++
++;; We must use peep2 instead of a split because we need accurate life
++;; information for $gp.
++(define_peephole2
++  [(parallel
++    [(set (match_operand 0)
++	  (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
++		(const_int 0)))
++     (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
++     (use (reg:DI 29))
++     (clobber (reg:DI 26))])]
++  "HAVE_AS_TLS && reload_completed
++   && peep2_regno_dead_p (1, 29)"
++  [(set (match_dup 3)
++	(unspec:DI [(match_dup 5)
++		    (match_dup 1)
++		    (match_dup 2)] UNSPEC_LITERAL))
++   (parallel [(set (match_dup 0)
++		   (call (mem:DI (match_dup 3))
++			 (const_int 0)))
++	      (use (match_dup 5))
++	      (use (match_dup 1))
++	      (use (unspec [(match_dup 2)] TLS_CALL))
++	      (clobber (reg:DI 26))])
++   (set (match_dup 5)
++	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
++{
++  operands[3] = gen_rtx_REG (Pmode, 27);
++  operands[4] = GEN_INT (sw_64_next_sequence_number++);
++  operands[5] = pic_offset_table_rtx;
++})
++
++(define_peephole2
++  [(parallel
++    [(set (match_operand 0)
++	  (call (mem:DI (match_operand:DI 1 "symbolic_operand"))
++		(const_int 0)))
++     (unspec [(match_operand:DI 2 "const_int_operand")] TLS_CALL)
++     (use (reg:DI 29))
++     (clobber (reg:DI 26))])]
++  "HAVE_AS_TLS && reload_completed
++   && !peep2_regno_dead_p (1, 29)
++   && !find_reg_note (insn, REG_EH_REGION, NULL_RTX)"
++  [(set (match_dup 3)
++	(unspec:DI [(match_dup 5)
++		    (match_dup 1)
++		    (match_dup 2)] UNSPEC_LITERAL))
++   (parallel [(set (match_dup 0)
++		   (call (mem:DI (match_dup 3))
++			 (const_int 0)))
++	      (set (match_dup 5)
++		   (unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP1))
++	      (use (match_dup 1))
++	      (use (unspec [(match_dup 2)] TLS_CALL))
++	      (clobber (reg:DI 26))])
++   (set (match_dup 5)
++	(unspec:DI [(match_dup 5) (match_dup 4)] UNSPEC_LDGP2))]
++{
++  operands[3] = gen_rtx_REG (Pmode, 27);
++  operands[4] = GEN_INT (sw_64_next_sequence_number++);
++  operands[5] = pic_offset_table_rtx;
++})
++
++
++(define_insn "*call_value_osf_1_setfpec0"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "! TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 3"
++  "@
++  call $26,($27),0\;ldgp $29,0($26)
++  bsr $26,$%1..ng
++  call $26,%1\;ldgp $29,0($26)"
++  [(set_attr "type" "call")
++ (set_attr "length" "12,*,16")])
++
++(define_insn "*call_value_osf_1_setfpec1"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "! TARGET_EXPLICIT_RELOCS && flag_fpcr_set == 1"
++  "@
++   call $26,($27),0\;ldgp $29,0($26)
++   bsr $26,$%1..ng
++   call $26,%1\;ldgp $29,0($26)"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*call_value_osf_1"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "call_operand" "c,R,s"))
++	      (match_operand 2)))
++   (use (reg:DI 29))
++   (clobber (reg:DI 26))]
++  "! TARGET_EXPLICIT_RELOCS"
++  "@
++   call $26,($27),0\;ldgp $29,0($26)
++   bsr $26,$%1..ng
++   call $26,%1\;ldgp $29,0($26)"
++  [(set_attr "type" "call")
++   (set_attr "length" "12,*,16")])
++
++(define_insn "*sibcall_value_osf_1_er"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
++	      (match_operand 2)))
++   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
++  "TARGET_EXPLICIT_RELOCS"
++  "@
++   br $31,%1\t\t!samegp
++   ldl $27,%1($29)\t\t!literal!%#\;jmp $31,($27),%1\t\t!lituse_jsr!%#"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,8")])
++
++(define_insn "*sibcall_value_osf_1"
++  [(set (match_operand 0)
++	(call (mem:DI (match_operand:DI 1 "symbolic_operand" "R,s"))
++	      (match_operand 2)))
++   (unspec [(reg:DI 29)] UNSPEC_SIBCALL)]
++  "! TARGET_EXPLICIT_RELOCS"
++  "@
++   br $31,$%1..ng
++   ldi $27,%1\;jmp $31,($27),%1"
++  [(set_attr "type" "call")
++   (set_attr "length" "*,8")])
++
++;; Builtins to replace 1.0f/sqrtf(x) with instructions using RSQRTE and the
++;; appropriate fixup.
++;; Currently, does not work with the double precision floating-point.(0x5fe6eb000000000a)
++(define_expand "rsqrtsf2"
++   [(match_operand:SF 0 "register_operand" "")
++    (match_operand:SF 1 "register_operand" "")]
++   "TARGET_FP && flag_reciprocal_math == 1 && flag_sw_rsqrt == 1"
++  {
++       sw_64_emit_rsqrt (operands[0], operands[1], 1);
++       DONE;
++  })
++
++(define_insn "*movsf2"
++  [(set (match_operand:SF 0 "nonimmediate_operand" "=r")
++	(unspec:SF [(match_operand:SF 1 "input_operand" "f")]
++		   UNSPEC_FIMOVS))]
++ "TARGET_FP && flag_reciprocal_math == 1 && flag_sw_rsqrt == 1"
++ "fimovs %1,%0"
++  [(set_attr "type" "ldsym")])
++
++(define_insn "speculation_barrier"
++  [(unspec_volatile [(const_int 0)] UNSPECV_SPECULATION_BARRIER)]
++  ""
++  "imemb"
++  [(set_attr "type" "misc")])
++
++(define_insn "stack_tie"
++  [(set (mem:BLK (scratch))
++  (unspec:BLK [(match_operand:DI 0 "register_operand" "r")
++	 (match_operand:DI 1 "register_operand" "r")]
++	UNSPEC_TIE))]
++  ""
++  ""
++  [(set_attr "length" "0")]
++)
++
++(include "m32.md")
+diff --git a/gcc/config/sw_64/sw_64.opt b/gcc/config/sw_64/sw_64.opt
+new file mode 100644
+index 000000000..fdb6304a2
+--- /dev/null
++++ b/gcc/config/sw_64/sw_64.opt
+@@ -0,0 +1,318 @@
++; Options for the Sw_64 port of the compiler
++;
++; Copyright (C) 2005-2020 Free Software Foundation, Inc.
++;
++; This file is part of GCC.
++;
++; GCC is free software; you can redistribute it and/or modify it under
++; the terms of the GNU General Public License as published by the Free
++; Software Foundation; either version 3, or (at your option) any later
++; version.
++;
++; GCC is distributed in the hope that it will be useful, but WITHOUT
++; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
++; License for more details.
++;
++; You should have received a copy of the GNU General Public License
++; along with GCC; see the file COPYING3.  If not see
++; <http://www.gnu.org/licenses/>.
++fsw-sf-cmpsel
++Target Var(flag_sw_sf_cmpsel) Init(0)
++use or not use SF cmp/br/selcet instructions.
++
++msw-use-32align
++C C++ Fortran LTO Driver Target Mask(SW_32ALIGN) Save
++Use or not use 32align.
++
++fsw-hardware-prefetch
++Target Var(flag_sw_hardware_prefetch) Init(0)
++set hardware_prefetch registers:PFH_CTL,PFH_CNT.
++
++fsw-hardware-prefetch-clt=
++Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_clt) Init(5) Optimization
++
++fsw-hardware-prefetch-cnt-l1=
++Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l1) Init(0) Optimization
++
++fsw-hardware-prefetch-cnt-l2=
++Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l2) Init(0) Optimization
++
++fsw-hardware-prefetch-cnt-l3=
++Common Report Joined RejectNegative UInteger Var(flag_hardware_prefetch_cnt_l3) Init(5) Optimization
++
++fsw-fselect
++Target Var(flag_sw_fselect) Init(0)
++Use or not use less instructions for sel/fsel.
++
++fsw-branch-fusion
++Target Var(flag_sw_branch_fusion) Init(1)
++fuse the cbranch instructions.
++
++fsw-branch-combination
++Target Var(flag_sw_branch_combination) Init(0)
++combine the cbranch instructions.
++
++fsw-unalign-byte
++Target Var(flag_sw_unalign_byte) Init(0)
++Not use or use ldl_u/stl_u instructions.
++
++fsw-rev
++Target Report Var(flag_sw_rev) Init(1)
++Use or not use rev instruction.
++
++fsw-cmov
++Target Report Var(flag_sw_cmov) Init(1)
++Use added floating-point integer conversion instruction.
++
++fsw-bitop
++Target Report Var(flag_sw_bitop) Init(0)
++Use ISA bit operate instructions.
++
++fsw-shift-word
++Target Report Var(flag_sw_shift_word) Init(1)
++Use or not use sw8a shift instructions.
++
++fsw-int-divmod
++Target Report Var(flag_sw_int_divmod) Init(1)
++Use or not use int div/mod instructions.
++
++fsw-fprnd
++Target Report Var(flag_sw_fprnd) Init(0)
++Use float-point rounding instructions.
++
++fsw-auto-inc-dec
++Target Var(flag_sw_auto_inc_dec) Init(0)
++Use or not use int auto-inc-dec load/store instructions.
++
++fsw-use-cas
++Target Var(flag_sw_use_cas) Init(1)
++Use or no use compare and swap instruction.
++
++fsw-fma
++Target Report Var(flag_sw_fma) Init(1)
++Add fma option.
++
++fsw-sdsame
++Target Report Var(flag_sw_sdsame) Init(0)
++For des and src same.
++;;;;;;;;;;;;;;;;;;;;;;;;;
++
++fsw-rsqrt
++Target Report Var(flag_sw_rsqrt) Init(0)
++Fast calculation of 1.0f/sqrtf (x). Does not work with double precision floating-point.
++;;;;;;;;;;;;;;;;;;;;;;;;;;;;
++
++fsw-fast-math
++Target Report Var(flag_sw_fast_math) Init(0)
++Avoid spec2017-628 error fast-math.  The corresponding code is in gcc/gimple-match-head.c.
++;;;;;;;;;;;;;;;;;;;;;;;;;
++
++fsw-nofcpys
++Target Var(flag_sw_nofcpys) Init(1)
++delete fcpys after fcvtsd instruction.
++
++fsw-rtid
++Target Var(flag_sw_rtid) Init(1)
++Use rtid instead of syscall 0x9e.
++
++fsw-rtx-cost
++Target Var(flag_sw_rtx_cost) Init(0)
++Adjust the rtx-cost.
++
++fsw-sxaddl
++Target Var(flag_sw_sxaddl) Init(1)
++Combine the sXaddl instructions.
++
++fsw-delnop
++Target Var(flag_sw_delnop) Init(1)
++Delete the nop instruction.
++
++fsw-int-div-opt
++Target Report Var(flag_sw_int_div_opt) Init(0)
++SW div opt.
++
++fsw-prefetch-l1
++Target Var(flag_sw_prefetch_l1) Init(1)
++Use l1 load prefetch instead of L2.
++
++fsw-prefetch-add
++Target Var(flag_sw_prefetch_add) Init(1)
++generate prefetch for cases like stream add.
++
++fsw-prefetch-unroll
++Target Var(flag_sw_prefetch_unroll) Init(0)
++Optimize loop unroll in the prefetch pass.
++
++msoft-float
++Target Report Mask(SOFT_FP)
++Do not use hardware fp.
++
++fsw-recip
++Target Report Var(flag_sw_recip) Init(0)
++Use ISA floating reciprocal instructions.
++
++fsw-recip-precision
++Target Report Var(flag_sw_recip_precision) Init(0)
++Assume that the reciprocal estimate instructions provide more accuracy.
++;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
++
++
++mfp-regs
++Target Report Mask(FPREGS)
++Use fp registers.
++
++mgas
++Target Ignore
++Does nothing.  Preserved for backward compatibility.
++
++mieee-conformant
++Target RejectNegative Mask(IEEE_CONFORMANT)
++Request IEEE-conformant math library routines (SYSV).
++
++mieee
++Target Report RejectNegative Mask(IEEE)
++Emit IEEE-conformant code, without inexact exceptions.
++
++mieee-main
++Target Report RejectNegative Mask(IEEE_MAIN)
++Emit IEEE-conformant code, without inexact exceptions.
++
++mieee-with-inexact
++Target Report RejectNegative Mask(IEEE_WITH_INEXACT)
++
++mbuild-constants
++Target Report Mask(BUILD_CONSTANTS)
++Do not emit complex integer constants to read-only memory.
++
++mfloat-vax
++Target Report RejectNegative Mask(FLOAT_VAX)
++Use VAX fp.
++
++mfloat-ieee
++Target Report RejectNegative InverseMask(FLOAT_VAX)
++Do not use VAX fp.
++
++mbwx
++Target Report Mask(BWX)
++Emit code for the byte/word ISA extension.
++
++mmax
++Target Report Mask(MAX)
++Emit code for the motion video ISA extension.
++
++mfix
++Target Report Mask(FIX)
++Emit code for the fp move and sqrt ISA extension.
++
++mcix
++Target Report Mask(CIX)
++Emit code for the counting ISA extension.
++
++msw6a
++Target Report Mask(SW6A)
++Emit code for the SW6A ISA extension.
++
++msw6b
++Target Report Mask(SW6B)
++Emit code for the SW6B ISA extension.
++
++msw8a
++Target Report Mask(SW8A)
++Emit code for the SW8A ISA extension.
++
++mexplicit-relocs
++Target Report Mask(EXPLICIT_RELOCS)
++Emit code using explicit relocation directives.
++
++msmall-data
++Target Report RejectNegative Mask(SMALL_DATA)
++Emit 16-bit relocations to the small data areas.
++
++mlarge-data
++Target Report RejectNegative InverseMask(SMALL_DATA)
++Emit 32-bit relocations to the small data areas.
++
++msmall-text
++Target Report RejectNegative Mask(SMALL_TEXT)
++Emit direct branches to local functions.
++
++mlarge-text
++Target Report RejectNegative InverseMask(SMALL_TEXT)
++Emit indirect branches to local functions.
++
++mtls-kernel
++Target Report Mask(TLS_KERNEL)
++Emit rdval for thread pointer.
++
++mlong-double-128
++Target Report RejectNegative Mask(LONG_DOUBLE_128)
++Use 128-bit long double.
++
++mlong-double-64
++Target Report RejectNegative InverseMask(LONG_DOUBLE_128)
++Use 64-bit long double.
++
++mcpu=
++Target RejectNegative Joined Var(sw_64_cpu_string)
++Use features of and schedule given CPU.
++
++mtune=
++Target RejectNegative Joined Var(sw_64_tune_string)
++Schedule given CPU.
++
++mfp-rounding-mode=
++Target RejectNegative Joined Var(sw_64_fprm_string)
++Control the generated fp rounding mode.
++
++mfp-trap-mode=
++Target RejectNegative Joined Var(sw_64_fptm_string)
++Control the IEEE trap mode.
++
++mtrap-precision=
++Target RejectNegative Joined Var(sw_64_tp_string)
++Control the precision given to fp exceptions.
++
++mmemory-latency=
++Target RejectNegative Joined Var(sw_64_mlat_string)
++Tune expected memory latency.
++
++mtls-size=
++Target RejectNegative Joined UInteger Var(sw_64_tls_size) Init(32)
++Specify bit size of immediate TLS offsets.
++
++msimd
++C C++ Fortran Driver Target Mask(SW_SIMD) Save
++Support SW SIMD built-in functions and code generation.
++
++mgprel-size=
++Target RejectNegative Joined UInteger Var(sw_64_gprel_size) Init(16)
++Specify bit size of gprel relocation offsets.
++
++mtls-tlsgd=
++Target RejectNegative Joined UInteger Var(sw_64_tls_gd) Init(16)
++Specify the bitsize of tlsgd relocation offset relative GP.
++
++mtls-tlsldm=
++Target RejectNegative Joined UInteger Var(sw_64_tls_ldm) Init(16)
++Specify the bitsize of tlsldm relocation offset relative GP.
++
++mtls-gotdtprel=
++Target RejectNegative Joined UInteger Var(sw_64_tls_gotdtprel) Init(16)
++Specify the bitsize of gotdtprel relocation offset relative GP.
++
++mtls-gottprel=
++Target RejectNegative Joined UInteger Var(sw_64_tls_gottprel) Init(16)
++Specify the bitsize of gottprel relocation offset relative GP.
++
++mlra
++Target Report Var(sh_lra_flag) Init(0) Save
++Use reload instead of LRA (transitional).
++
++mtrunc
++C Fortran Driver Target Mask(SW_TRUNC) Save
++Support fix_trunc code generation.
++
++m32
++C ObjC C++ ObjC++ LTO Fortran Driver Target Report Mask(SW_M32) Init(0)
++M32 optimization.
+diff --git a/gcc/config/sw_64/sync.md b/gcc/config/sw_64/sync.md
+new file mode 100644
+index 000000000..71fd0478e
+--- /dev/null
++++ b/gcc/config/sw_64/sync.md
+@@ -0,0 +1,499 @@
++;; GCC machine description for Sw_64 synchronization instructions.
++;; Copyright (C) 2005-2020 Free Software Foundation, Inc.
++;;
++;; This file is part of GCC.
++;;
++;; GCC is free software; you can redistribute it and/or modify
++;; it under the terms of the GNU General Public License as published by
++;; the Free Software Foundation; either version 3, or (at your option)
++;; any later version.
++;;
++;; GCC is distributed in the hope that it will be useful,
++;; but WITHOUT ANY WARRANTY; without even the implied warranty of
++;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++;; GNU General Public License for more details.
++;;
++;; You should have received a copy of the GNU General Public License
++;; along with GCC; see the file COPYING3.  If not see
++;; <http://www.gnu.org/licenses/>.
++
++(define_code_iterator FETCHOP [plus minus ior xor and])
++(define_code_attr fetchop_name
++  [(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
++(define_code_attr fetchop_pred
++  [(plus "add_operand") (minus "reg_or_8bit_operand")
++   (ior "or_operand") (xor "or_operand") (and "and_operand")])
++(define_code_attr fetchop_constr
++  [(plus "rKL") (minus "rI") (ior "rIN") (xor "rIN") (and "rINM")])
++
++
++(define_expand "memory_barrier"
++  [(set (match_dup 0)
++	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
++  ""
++{
++  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
++  MEM_VOLATILE_P (operands[0]) = 1;
++})
++
++;; mb-> memb
++(define_insn "*memory_barrier"
++  [(set (match_operand:BLK 0)
++	(unspec:BLK [(match_dup 0)] UNSPEC_MB))]
++  ""
++  "memb"
++  [(set_attr "type" "mb")])
++
++(define_insn "write_memory_barrier"
++  [(unspec:BLK [(const_int 0)] UNSPEC_MB)]
++  "TARGET_SW8A"
++  "wmemb"
++  [(set_attr "type" "mb")])
++
++;; "ld<modesuffix>_l %0,%1"
++(define_insn "@load_locked_<mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "=r")
++	(unspec_volatile:I48MODE
++	  [(match_operand:I48MODE 1 "memory_operand" "m")]
++	  UNSPECV_LL))]
++  ""
++ {
++  switch ('<modesuffix>')
++    {
++	case 'w':
++	  return "ldi %0,%1\;lldw %0,0(%0)";
++	case 'l':
++	  return "ldi %0,%1\;lldl %0,0(%0)";
++	default:
++	 return "ld<modesuffix>_l %0,%1";
++    }
++  }
++  [(set_attr "type" "ld_l")])
++
++;; "st<modesuffix>_c %0,%1"
++(define_insn "@store_conditional_<mode>"
++  [(set (match_operand:DI 0 "register_operand" "=r")
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
++   (set (match_operand:I48MODE 1 "memory_operand" "=m")
++	(match_operand:I48MODE 2 "reg_or_0_operand" "0"))
++  (clobber (reg:DI 28))]
++  ""
++  {
++    switch ('<modesuffix>')
++    {
++	case 'w':
++	  return "ldi $28,%1\;lstw %0,0($28)";
++	case 'l':
++	  return "ldi $28,%1\;lstl %0,0($28)";
++	default:
++	  return "st<modesuffix>_c %0,%1";
++      }
++  }
++  [(set_attr "type" "st_c")])
++
++   (define_insn "builtin_rd_f"
++      [(set (match_operand:DI 0 "register_operand" "=r")
++	  (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))]
++  ""
++  "rd_f %0"
++  [(set_attr "type" "st_c")])
++
++   (define_insn "builtin_wr_f"
++     [(match_operand:DI 0 "register_operand" "r")
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_LL)]
++   ""
++   "wr_f %0"
++   [(set_attr "type" "ld_l")])
++
++;; The Sw_64 Architecture Handbook says that it is UNPREDICTABLE whether
++;; the lock is cleared by a normal load or store.  This means we cannot
++;; expand a ll/sc sequence before reload, lest a register spill is
++;; inserted inside the sequence.  It is also UNPREDICTABLE whether the
++;; lock is cleared by a TAKEN branch.  This means that we can not expand
++;; a ll/sc sequence containing a branch (i.e. compare-and-swap) until after
++;; the final basic-block reordering pass.
++
++(define_expand "atomic_compare_and_swap<mode>"
++  [(parallel
++     [(set (match_operand:DI 0 "register_operand")	  ;; bool out
++	   (unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
++      (set (match_operand:I48MODE 1 "register_operand")	  ;; val out
++	   (unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
++      (set (match_operand:I48MODE 2 "memory_operand")	  ;; memory
++	   (unspec_volatile:I48MODE
++	     [(match_dup 2)
++	      (match_operand:I48MODE 3 "reg_or_8bit_operand")  ;; expected
++	      (match_operand:I48MODE 4 "add_operand")	  ;; desired
++	      (match_operand:SI 5 "const_int_operand")	  ;; is_weak
++	      (match_operand:SI 6 "const_int_operand")	  ;; succ model
++	      (match_operand:SI 7 "const_int_operand")	  ;; fail model
++	      (match_operand:DI 8 "register_operand")]
++	     UNSPECV_CMPXCHG))
++   (clobber (reg:DI 28))])]
++  ""
++{
++  if (<MODE>mode == SImode)
++    {
++      operands[3] = convert_modes (DImode, SImode, operands[3], 0);
++      operands[4] = convert_modes (DImode, SImode, operands[4], 0);
++    }
++  if (TARGET_SW8A)
++    {
++      if (flag_sw_use_cas)
++	{
++	  if (CONST_INT_P (operands[3]))
++	    operands[3] = force_reg (DImode, operands[3]);
++
++	  if (CONST_INT_P (operands[4]))
++	    operands[4] = force_reg (DImode, operands[4]);
++      emit_insn (gen_atomic_compare_and_swap<mode>_target_sw8a (operands[0],
++								operands[1],
++								operands[2],
++								operands[3],
++								operands[4],
++								operands[5],
++								operands[6],
++								operands[7]));
++      DONE;
++	}
++    }
++})
++
++(define_insn_and_split "*atomic_compare_and_swap<mode>"
++  [(set (match_operand:DI 0 "register_operand" "=&r")		;; bool out
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
++   (set (match_operand:I48MODE 1 "register_operand" "=&r")	;; val out
++	(unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
++   (set (match_operand:I48MODE 2 "memory_operand" "+m")		;; memory
++	(unspec_volatile:I48MODE
++	  [(match_dup 2)
++	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")	;; expected
++	   (match_operand:DI 4 "add_operand" "rKL")		;; desired
++	   (match_operand:SI 5 "const_int_operand")		;; is_weak
++	   (match_operand:SI 6 "const_int_operand")		;; succ model
++	   (match_operand:SI 7 "const_int_operand")		;; fail model
++	   (match_operand:DI 8 "register_operand" "r")]
++	   UNSPECV_CMPXCHG))
++	(clobber (reg:DI 28))]
++
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_compare_and_swap (operands);
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_expand "atomic_compare_and_swap<mode>"
++  [(match_operand:DI 0 "register_operand")		;; bool out
++   (match_operand:I12MODE 1 "register_operand")		;; val out
++   (match_operand:I12MODE 2 "mem_noofs_operand")	;; memory
++   (match_operand:I12MODE 3 "register_operand")		;; expected
++   (match_operand:I12MODE 4 "add_operand")		;; desired
++   (match_operand:SI 5 "const_int_operand")		;; is_weak
++   (match_operand:SI 6 "const_int_operand")		;; succ model
++   (match_operand:SI 7 "const_int_operand")		;; fail model
++   (match_operand:DI 8 "register_operand")]
++  ""
++{
++  if (flag_sw_use_cas)
++    {
++      if (CONST_INT_P (operands[3]))
++	operands[3] = force_reg (<MODE>mode, operands[3]);
++
++      if (CONST_INT_P (operands[4]))
++	operands[4] = force_reg (<MODE>mode, operands[4]);
++    }
++  sw_64_expand_compare_and_swap_12 (operands);
++  DONE;
++})
++
++(define_insn_and_split "@atomic_compare_and_swap<mode>_1"
++  [(set (match_operand:DI 0 "register_operand" "=&r")		;; bool out
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
++   (set (match_operand:DI 1 "register_operand" "=&r")		;; val out
++	(zero_extend:DI
++	  (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG)))
++   (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w")	;; memory
++	(unspec_volatile:I12MODE
++	  [(match_dup 2)
++	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")	;; expected
++	   (match_operand:DI 4 "reg_or_0_operand" "rJ")		;; desired
++	   (match_operand:DI 5 "register_operand" "r")		;; align
++	   (match_operand:SI 6 "const_int_operand")		;; is_weak
++	   (match_operand:SI 7 "const_int_operand")		;; succ model
++	   (match_operand:SI 8 "const_int_operand")		;; fail model
++	   (match_operand:DI 9 "register_operand" "r")]
++	  UNSPECV_CMPXCHG))
++	(clobber (match_scratch:DI 10 "=&r"))
++	(clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_compare_and_swap_12 (operands);
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_compare_and_swap<mode>_target_sw8a"
++  [(set (match_operand:DI 0 "register_operand" "=&r")	      ;; bool out
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
++   (set (match_operand:I48MODE 1 "register_operand" "=&r")	 ;; val out
++	(unspec_volatile:I48MODE [(const_int 0)] UNSPECV_CMPXCHG))
++   (set (match_operand:I48MODE 2 "memory_operand" "+m")	    ;; memory
++	(unspec_volatile:I48MODE
++	  [(match_dup 2)
++	   (match_operand:DI 3 "reg_or_8bit_operand" "r")       ;; expected
++	   (match_operand:DI 4 "add_operand" "r")	       ;; desired
++	   (match_operand:SI 5 "const_int_operand")	     ;; is_weak
++	   (match_operand:SI 6 "const_int_operand")	     ;; succ model
++	   (match_operand:SI 7 "const_int_operand")]	    ;; fail model
++	  UNSPECV_CMPXCHG))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  if (flag_sw_use_cas)
++    sw_64_split_atomic_cas (operands);
++  else
++    sw_64_split_compare_and_swap (operands);
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "@atomic_compare_and_swap<mode>_1_target_sw8a"
++  [(set (match_operand:DI 0 "register_operand" "=&r")	   ;; bool out
++	(unspec_volatile:DI [(const_int 0)] UNSPECV_CMPXCHG))
++   (set (match_operand:DI 1 "register_operand" "=&r")	   ;; val out
++	(zero_extend:DI
++	  (unspec_volatile:I12MODE [(const_int 0)] UNSPECV_CMPXCHG)))
++   (set (match_operand:I12MODE 2 "mem_noofs_operand" "+w")      ;; memory
++	(unspec_volatile:I12MODE
++	  [(match_dup 2)
++	   (match_operand:DI 3 "reg_or_8bit_operand" "rI")      ;; expected
++	   (match_operand:DI 4 "register_operand" "r")	  ;; desired
++	   (match_operand:DI 5 "register_operand" "r")	  ;; align
++	   (match_operand:SI 6 "const_int_operand")	     ;; is_weak
++	   (match_operand:SI 7 "const_int_operand")	     ;; succ model
++	   (match_operand:SI 8 "const_int_operand")]	    ;; fail model
++	  UNSPECV_CMPXCHG))
++   (clobber (match_scratch:DI 9 "=&r"))
++   (clobber (match_scratch:DI 10 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++    sw_64_split_compare_and_swap_12 (operands);
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn "sw_64_atomic_cas<mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "")	 ;; out
++	(match_operand:I48MODE 1 "memory_operand" ""))  ;; memory.
++   (set (match_dup 1)
++	(unspec_volatile:I48MODE
++	  [(match_dup 0)
++	   (match_operand:I48MODE 2 "register_operand" "")]      ;; value.
++	UNSPECV_CMPXCHG))
++   (clobber (reg:DI 28))]
++  "TARGET_SW8A && flag_sw_use_cas"
++  "ldi $28,%1\;cas<modesuffix> %0,$28,%2")
++;; endif
++
++(define_insn_and_split "atomic_exchange<mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "=&r")	;; output
++	(match_operand:I48MODE 1 "memory_operand" "+m"))	;; memory
++   (set (match_dup 1)
++	(unspec:I48MODE
++	  [(match_operand:I48MODE 2 "add_operand" "rKL")	;; input
++	   (match_operand:SI 3 "const_int_operand")]		;; model
++	  UNSPEC_XCHG))
++   (clobber (match_scratch:I48MODE 4 "=&r"))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_exchange (operands);
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_expand "atomic_exchange<mode>"
++  [(match_operand:I12MODE 0 "register_operand")		;; output
++   (match_operand:I12MODE 1 "mem_noofs_operand")	;; memory
++   (match_operand:I12MODE 2 "reg_or_0_operand")		;; input
++   (match_operand:SI 3 "const_int_operand")]		;; model
++  ""
++{
++  sw_64_expand_atomic_exchange_12 (operands);
++  DONE;
++})
++
++(define_insn_and_split "@atomic_exchange<mode>_1"
++  [(set (match_operand:DI 0 "register_operand" "=&r")		;; output
++	(zero_extend:DI
++	  (match_operand:I12MODE 1 "mem_noofs_operand" "+w")))	;; memory
++   (set (match_dup 1)
++	(unspec:I12MODE
++	  [(match_operand:DI 2 "reg_or_8bit_operand" "rI")	;; input
++	   (match_operand:DI 3 "register_operand" "r")		;; align
++	   (match_operand:SI 4 "const_int_operand")]		;; model
++	  UNSPEC_XCHG))
++   (clobber (match_scratch:DI 5 "=&r"))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_exchange_12 (operands);
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_<fetchop_name><mode>"
++  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
++	(unspec:I48MODE
++	  [(FETCHOP:I48MODE (match_dup 0)
++	     (match_operand:I48MODE 1 "<fetchop_pred>" "<fetchop_constr>"))
++	   (match_operand:SI 2 "const_int_operand")]
++	  UNSPEC_ATOMIC))
++   (clobber (match_scratch:I48MODE 3 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_op (<CODE>, operands[0], operands[1],
++			 NULL, NULL, operands[3],
++			 (enum memmodel) INTVAL (operands[2]));
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_nand<mode>"
++  [(set (match_operand:I48MODE 0 "memory_operand" "+m")
++	(unspec:I48MODE
++	  [(not:I48MODE
++	     (and:I48MODE (match_dup 0)
++	       (match_operand:I48MODE 1 "register_operand" "r")))
++	   (match_operand:SI 2 "const_int_operand")]
++	  UNSPEC_ATOMIC))
++   (clobber (match_scratch:I48MODE 3 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_op (NOT, operands[0], operands[1],
++			 NULL, NULL, operands[3],
++			 (enum memmodel) INTVAL (operands[2]));
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_fetch_<fetchop_name><mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
++	(match_operand:I48MODE 1 "memory_operand" "+m"))
++   (set (match_dup 1)
++	(unspec:I48MODE
++	  [(FETCHOP:I48MODE (match_dup 1)
++	     (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>"))
++	   (match_operand:SI 3 "const_int_operand")]
++	  UNSPEC_ATOMIC))
++   (clobber (match_scratch:I48MODE 4 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_op (<CODE>, operands[1], operands[2],
++			 operands[0], NULL, operands[4],
++			 (enum memmodel) INTVAL (operands[3]));
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_fetch_nand<mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
++	(match_operand:I48MODE 1 "memory_operand" "+m"))
++   (set (match_dup 1)
++	(unspec:I48MODE
++	  [(not:I48MODE
++	     (and:I48MODE (match_dup 1)
++	       (match_operand:I48MODE 2 "register_operand" "r")))
++	   (match_operand:SI 3 "const_int_operand")]
++	  UNSPEC_ATOMIC))
++   (clobber (match_scratch:I48MODE 4 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_op (NOT, operands[1], operands[2],
++			 operands[0], NULL, operands[4],
++			 (enum memmodel) INTVAL (operands[3]));
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_<fetchop_name>_fetch<mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
++	(FETCHOP:I48MODE
++	  (match_operand:I48MODE 1 "memory_operand" "+m")
++	  (match_operand:I48MODE 2 "<fetchop_pred>" "<fetchop_constr>")))
++   (set (match_dup 1)
++	(unspec:I48MODE
++	  [(FETCHOP:I48MODE (match_dup 1) (match_dup 2))
++	   (match_operand:SI 3 "const_int_operand")]
++	  UNSPEC_ATOMIC))
++   (clobber (match_scratch:I48MODE 4 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_op (<CODE>, operands[1], operands[2],
++			 NULL, operands[0], operands[4],
++			 (enum memmodel) INTVAL (operands[3]));
++  DONE;
++}
++  [(set_attr "type" "multi")])
++
++(define_insn_and_split "atomic_nand_fetch<mode>"
++  [(set (match_operand:I48MODE 0 "register_operand" "=&r")
++	(not:I48MODE
++	  (and:I48MODE (match_operand:I48MODE 1 "memory_operand" "+m")
++	    (match_operand:I48MODE 2 "register_operand" "r"))))
++   (set (match_dup 1)
++	(unspec:I48MODE
++	  [(not:I48MODE (and:I48MODE (match_dup 1) (match_dup 2)))
++	   (match_operand:SI 3 "const_int_operand")]
++	  UNSPEC_ATOMIC))
++   (clobber (match_scratch:I48MODE 4 "=&r"))
++   (clobber (reg:DI 28))]
++  ""
++  "#"
++  "epilogue_completed"
++  [(const_int 0)]
++{
++  sw_64_split_atomic_op (NOT, operands[1], operands[2],
++			 NULL, operands[0], operands[4],
++			 (enum memmodel) INTVAL (operands[3]));
++  DONE;
++}
++  [(set_attr "type" "multi")])
+diff --git a/gcc/config/sw_64/t-linux b/gcc/config/sw_64/t-linux
+new file mode 100644
+index 000000000..d78ef47df
+--- /dev/null
++++ b/gcc/config/sw_64/t-linux
+@@ -0,0 +1 @@
++MULTIARCH_DIRNAME = $(call if_multiarch,sw_64-linux-gnu)
+diff --git a/gcc/config/sw_64/t-sw_64 b/gcc/config/sw_64/t-sw_64
+new file mode 100644
+index 000000000..d7b5e98a0
+--- /dev/null
++++ b/gcc/config/sw_64/t-sw_64
+@@ -0,0 +1,19 @@
++# Copyright (C) 2016-2020 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3, or (at your option)
++# any later version.
++#
++# GCC is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++PASSES_EXTRA += $(srcdir)/config/sw_64/sw_64-passes.def
+diff --git a/gcc/config/sw_64/x-sw_64 b/gcc/config/sw_64/x-sw_64
+new file mode 100644
+index 000000000..229866b30
+--- /dev/null
++++ b/gcc/config/sw_64/x-sw_64
+@@ -0,0 +1,3 @@
++driver-sw_64.o: $(srcdir)/config/sw_64/driver-sw_64.c
++	$(COMPILE) $<
++	$(POSTCOMPILE)
+diff --git a/gcc/configure b/gcc/configure
+index d4f97834f..dc49a48d8 100755
+--- a/gcc/configure
++++ b/gcc/configure
+@@ -24696,6 +24696,7 @@ foo:	.long	25
+ 	tls_first_minor=13
+ 	tls_as_opt=--fatal-warnings
+ 	;;
++
+   arc*-*-*)
+     conftest_s='
+ 	add_s r0,r0, @foo@tpoff'
+@@ -25109,6 +25110,29 @@ foo:	.long	25
+ 	xor	%l1, %tle_lox10(foo), %o5
+ 	ld	[%g7 + %o5], %o1"
+ 	;;
++  sw_64*-*-*)
++    conftest_s='
++	.section ".tdata","awT",@progbits
++foo:    .long   25
++	.text
++	ldl     $27,__tls_get_addr($29)	 !literal!1
++	ldi     $16,foo($29)		    !tlsgd!1
++	call    $26,($27),__tls_get_addr	!lituse_tlsgd!1
++	ldl     $27,__tls_get_addr($29)	 !literal!2
++	ldi     $16,foo($29)		    !tlsldm!2
++	call    $26,($27),__tls_get_addr	!lituse_tlsldm!2
++	ldl     $1,foo($29)		     !gotdtprel
++	ldih    $2,foo($29)		     !dtprelhi
++	ldi     $3,foo($2)		      !dtprello
++	ldi     $4,foo($29)		     !dtprel
++	ldl     $1,foo($29)		     !gottprel
++	ldih    $2,foo($29)		     !tprelhi
++	ldi     $3,foo($2)		      !tprello
++	ldi     $4,foo($29)		     !tprel'
++	tls_first_major=2
++	tls_first_minor=13
++	tls_as_opt=--fatal-warnings
++	;;
+   tilepro*-*-*)
+       conftest_s='
+ 	.section ".tdata","awT",@progbits
+@@ -25663,6 +25687,7 @@ fi
+ 
+     ;;
+ 
++
+   avr-*-*)
+     { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for --mlink-relax option" >&5
+ $as_echo_n "checking assembler for --mlink-relax option... " >&6; }
+@@ -28283,6 +28308,113 @@ fi
+ 
+ 
+     ;;
++
++  sw_64*-*-linux* | sw_64*-*-*bsd*)
++    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for explicit relocation support" >&5
++$as_echo_n "checking assembler for explicit relocation support... " >&6; }
++if ${gcc_cv_as_sw_64_explicit_relocs+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++  gcc_cv_as_sw_64_explicit_relocs=no
++    if test $in_tree_gas = yes; then
++    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 12 \) \* 1000 + 0`
++  then gcc_cv_as_sw_64_explicit_relocs=yes
++fi
++  elif test x$gcc_cv_as != x; then
++    $as_echo '	.set nomacro
++	.text
++	ext0b	$3, $2, $3	!lituse_bytoff!1
++	ldl	$2, a($29)	!literal!1
++	ldl	$4, b($29)	!literal!2
++	ldl_u	$3, 0($2)	!lituse_base!1
++	ldl	$27, f($29)	!literal!5
++	call	$26, ($27), f	!lituse_jsr!5
++	ldih	$29, 0($26)	!gpdisp!3
++	ldi	$0, c($29)	!gprel
++	ldih	$1, d($29)	!gprelhigh
++	ldi	$1, d($1)	!gprellow
++	ldi	$29, 0($29)	!gpdisp!3' > conftest.s
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }
++    then
++	gcc_cv_as_sw_64_explicit_relocs=yes
++    else
++      echo "configure: failed program was" >&5
++      cat conftest.s >&5
++    fi
++    rm -f conftest.o conftest.s
++  fi
++fi
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_explicit_relocs" >&5
++$as_echo "$gcc_cv_as_sw_64_explicit_relocs" >&6; }
++if test $gcc_cv_as_sw_64_explicit_relocs = yes; then
++
++$as_echo "#define HAVE_AS_EXPLICIT_RELOCS 1" >>confdefs.h
++
++fi
++
++
++    { $as_echo "$as_me:${as_lineno-$LINENO}: checking assembler for jsrdirect relocation support" >&5
++$as_echo_n "checking assembler for jsrdirect relocation support... " >&6; }
++if ${gcc_cv_as_sw_64_jsrdirect_relocs+:} false; then :
++  $as_echo_n "(cached) " >&6
++else
++  gcc_cv_as_sw_64_jsrdirect_relocs=no
++    if test $in_tree_gas = yes; then
++    if test $gcc_cv_gas_vers -ge `expr \( \( 2 \* 1000 \) + 16 \) \* 1000 + 90`
++  then gcc_cv_as_sw_64_jsrdirect_relocs=yes
++fi
++#trouble#
++  elif test x$gcc_cv_as != x; then
++    $as_echo '	.set nomacro
++	.text
++	ldl	$27, a($29)	!literal!1
++	call	$26, ($27), a	!lituse_jsrdirect!1' > conftest.s
++    if { ac_try='$gcc_cv_as $gcc_cv_as_flags  -o conftest.o conftest.s >&5'
++  { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_try\""; } >&5
++  (eval $ac_try) 2>&5
++  ac_status=$?
++  $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5
++  test $ac_status = 0; }; }
++    then
++	gcc_cv_as_sw_64_jsrdirect_relocs=yes
++    else
++      echo "configure: failed program was" >&5
++      cat conftest.s >&5
++    fi
++    rm -f conftest.o conftest.s
++  fi
++fi
++
++{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $gcc_cv_as_sw_64_jsrdirect_relocs" >&5
++$as_echo "$gcc_cv_as_sw_64_jsrdirect_relocs" >&6; }
++if test $gcc_cv_as_sw_64_jsrdirect_relocs = yes; then
++
++$as_echo "#define HAVE_AS_JSRDIRECT_RELOCS 1" >>confdefs.h
++
++fi
++cat >> confdefs.h <<_ACEOF
++#define FLAG_SW64_NOWARN 1
++#define FLAG_SW64_NBLEN 1
++#define FLAG_SW64_ATOMIC 1
++#define FLAG_SW64_90139 1
++#define FLAG_SW64_PREFETCH 1
++#define FLAG_SW64_PROTECT 1
++#define FLAG_SW64_SIMD 1
++#define FLAG_SW64_AUTOSIMD 1
++#define FLAG_SW64_M32 1
++#define FLAG_SW64_INC_DEC 1
++#define FLAG_SW64_DELNOP 1
++#define FLAG_SW64_FM 1
++#define FLAG_SW64_WMEMB 1
++_ACEOF
++
++    ;;
++
+ esac
+ 
+ # Mips and HP-UX need the GNU assembler.
+@@ -28311,7 +28443,7 @@ esac
+ case "$cpu_type" in
+   aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
+   | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
+-  | tilegx | tilepro | visium | xstormy16 | xtensa)
++  | sw_64 | tilegx | tilepro | visium | xstormy16 | xtensa)
+     insn="nop"
+     ;;
+   ia64 | s390)
+@@ -29629,6 +29761,17 @@ $as_echo "$as_me: WARNING: --build-id is not supported by your linker; --enable-
+   fi
+ fi
+ 
++# sw_64  add --enable-linker-no-relax to support linker -Wl,-no-relax
++# Check whether --enable-linker-no-relax was given.
++if test "${enable_linker_no_relax+set}" = set; then :
++  enableval=$enable_linker_no_relax;
++else
++  enable_linker_no_relax=no
++fi
++
++if test x"$enable_linker_no_relax" = xyes; then
++  $as_echo "#define ENABLE_LD_NORELAX 1" >>confdefs.h
++fi
+ # In binutils 2.21, GNU ld gained support for new emulations fully
+ # supporting the Solaris 2 ABI.  Detect their presence in the linker used.
+ { $as_echo "$as_me:${as_lineno-$LINENO}: checking linker *_sol2 emulation support" >&5
+diff --git a/gcc/configure.ac b/gcc/configure.ac
+index 44154f69f..1e37b4ae9 100644
+--- a/gcc/configure.ac
++++ b/gcc/configure.ac
+@@ -23,6 +23,7 @@
+ # Initialization and sanity checks
+ # --------------------------------
+ 
++AC_PREREQ(2.64)
+ AC_INIT
+ AC_CONFIG_SRCDIR(tree.c)
+ AC_CONFIG_HEADER(auto-host.h:config.in)
+@@ -3456,6 +3457,7 @@ foo:	.long	25
+ 	tls_first_minor=13
+ 	tls_as_opt=--fatal-warnings
+ 	;;
++
+   arc*-*-*)
+     conftest_s='
+ 	add_s r0,r0, @foo@tpoff'
+@@ -3870,6 +3872,29 @@ foo:	.long	25
+ 	xor	%l1, %tle_lox10(foo), %o5
+ 	ld	[%g7 + %o5], %o1"
+ 	;;
++  sw_64*-*-*)
++    conftest_s='
++	.section ".tdata","awT",@progbits
++foo:    .long   25
++	.text
++	ldl     $27,__tls_get_addr($29)	 !literal!1
++	ldi     $16,foo($29)		    !tlsgd!1
++	call    $26,($27),__tls_get_addr	!lituse_tlsgd!1
++	ldl     $27,__tls_get_addr($29)	 !literal!2
++	ldi     $16,foo($29)		    !tlsldm!2
++	call    $26,($27),__tls_get_addr	!lituse_tlsldm!2
++	ldl     $1,foo($29)		     !gotdtprel
++	ldih    $2,foo($29)		     !dtprelhi
++	ldi     $3,foo($2)		      !dtprello
++	ldi     $4,foo($29)		     !dtprel
++	ldl     $1,foo($29)		     !gottprel
++	ldih    $2,foo($29)		     !tprelhi
++	ldi     $3,foo($2)		      !tprello
++	ldi     $4,foo($29)		     !tprel'
++	tls_first_major=2
++	tls_first_minor=13
++	tls_as_opt=--fatal-warnings
++	;;
+   tilepro*-*-*)
+       conftest_s='
+ 	.section ".tdata","awT",@progbits
+@@ -4345,6 +4370,34 @@ bar:
+       [AC_DEFINE(HAVE_AS_SPARC_GOTDATA_OP, 1,
+ 		[Define if your assembler and linker support GOTDATA_OP relocs.])])
+ 
++  sw_64*-*-linux* | sw_64*-*-*bsd*)
++    gcc_GAS_CHECK_FEATURE([explicit relocation support],
++	gcc_cv_as_sw_64_explicit_relocs, [2,12,0],,
++[       .set nomacro
++	.text
++	ext0b   $3, $2, $3      !lituse_bytoff!1
++	ldl     $2, a($29)      !literal!1
++	ldl     $4, b($29)      !literal!2
++	ldl_u   $3, 0($2)       !lituse_base!1
++	ldl     $27, f($29)     !literal!5
++	call    $26, ($27), f   !lituse_jsr!5
++	ldih    $29, 0($26)     !gpdisp!3
++	ldi     $0, c($29)      !gprel
++	ldih    $1, d($29)      !gprelhigh
++	ldi     $1, d($1)       !gprellow
++	ldi     $29, 0($29)     !gpdisp!3],,
++    [AC_DEFINE(HAVE_AS_EXPLICIT_RELOCS, 1,
++  [Define if your assembler supports explicit relocations.])])
++    gcc_GAS_CHECK_FEATURE([jsrdirect relocation support],
++	gcc_cv_as_sw_64_jsrdirect_relocs, [2,16,90],,
++[       .set nomacro
++	.text
++	ldl     $27, a($29)     !literal!1
++	call    $26, ($27), a   !lituse_jsrdirect!1],,
++    [AC_DEFINE(HAVE_AS_JSRDIRECT_RELOCS, 1,
++  [Define if your assembler supports the lituse_jsrdirect relocation.])])
++    ;;
++
+     gcc_GAS_CHECK_FEATURE([unaligned pcrel relocs],
+       gcc_cv_as_sparc_ua_pcrel,,
+       [-K PIC],
+@@ -5145,7 +5198,7 @@ esac
+ # ??? Once 2.11 is released, probably need to add first known working
+ # version to the per-target configury.
+ case "$cpu_type" in
+-  aarch64 | alpha | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
++  aarch64 | alpha | sw_64 | arc | arm | avr | bfin | cris | csky | i386 | m32c | m68k \
+   | microblaze | mips | nds32 | nios2 | pa | riscv | rs6000 | score | sparc \
+   | tilegx | tilepro | visium | xstormy16 | xtensa)
+     insn="nop"
+@@ -6052,6 +6105,31 @@ if test x"$enable_linker_build_id" = xyes; then
+   fi
+ fi
+ 
++# --no-relax
++AC_ARG_ENABLE(linker-no-relax,
++[AS_HELP_STRING([--enable-linker-no-relax],
++		[compiler will always pass --no-relax to linker])],
++[],
++enable_linker_no_relax=no)
++
++if test x"$enable_linker_build_id" = xyes; then
++  if test x"$gcc_cv_ld_buildid" = xyes; then
++    AC_DEFINE(ENABLE_LD_BUILDID, 1,
++    [Define if gcc should always pass --build-id to linker.])
++  else
++    AC_MSG_WARN(--build-id is not supported by your linker; --enable-linker-build-id ignored)
++  fi
++fi
++
++# --no-relax
++if test x"$enable_linker_no_relax" = xyes; then
++    AC_DEFINE(ENABLE_LD_NORELAX, 1,
++    [Define if gcc should always pass --no-relax to linker.])
++  else
++    AC_MSG_WARN(--no-relax is not supported by your linker; --enable-linker-no-relax ignored)
++  fi
++fi
++
+ # In binutils 2.21, GNU ld gained support for new emulations fully
+ # supporting the Solaris 2 ABI.  Detect their presence in the linker used.
+ AC_CACHE_CHECK(linker *_sol2 emulation support,
+@@ -6224,7 +6302,8 @@ case "$target" in
+   powerpc*-*-linux* | \
+   sparc*-*-linux* | \
+   s390*-*-linux* | \
+-  alpha*-*-linux*)
++  alpha*-*-linux* | \
++  sw_64*-*-linux*)
+     AC_ARG_WITH(long-double-128,
+       [AS_HELP_STRING([--with-long-double-128],
+ 		      [use 128-bit long double by default])],
+diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
+index 7d98ec419..8cc0d42c2 100644
+--- a/gcc/doc/install.texi
++++ b/gcc/doc/install.texi
+@@ -3518,6 +3518,8 @@ information have to.
+ @item
+ @uref{#sparcv9-x-solaris2,,sparcv9-*-solaris2*}
+ @item
++@uref{#sw_64-x-x,,sw_64*-*-*}
++@item
+ @uref{#c6x-x-x,,c6x-*-*}
+ @item
+ @uref{#tilegx-x-linux,,tilegx-*-linux*}
+@@ -3602,6 +3604,7 @@ of the options are given at configure time.
+ @end html
+ @anchor{alpha-x-x}
+ @heading alpha*-*-*
++
+ This section contains general configuration information for all
+ Alpha-based platforms using ELF@.  In addition to reading this
+ section, please read all other sections that match your target.
+@@ -4643,6 +4646,13 @@ zSeries system (64-bit) running GNU/Linux for zSeries@.
+ zSeries system (64-bit) running TPF@.  This platform is
+ supported as cross-compilation target only.
+ 
++
++@html
++<hr />
++@end html
++@anchor{sw_64-x-x}
++@heading sw_64*-*-*
++
+ @html
+ <hr />
+ @end html
+diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
+index 972512e81..04c1c0ab6 100644
+--- a/gcc/emit-rtl.c
++++ b/gcc/emit-rtl.c
+@@ -2399,6 +2399,43 @@ adjust_address_1 (rtx memref, machine_mode mode, poly_int64 offset,
+ 	addr = gen_rtx_ZERO_EXTEND (address_mode,
+ 				    plus_constant (pointer_mode,
+ 						   XEXP (addr, 0), offset));
++#endif
++#ifdef FLAG_SW64_INC_DEC
++      else if (GET_CODE (addr) == POST_INC)
++	;
++      else if (GET_CODE (addr) == POST_DEC)
++	{
++	  rtx term;
++	  rtx reg = XEXP (addr, 0);
++	  if (known_eq (offset, 0))
++	    term = GEN_INT (8);
++	  else
++	    term = GEN_INT (-24);
++	  addr = gen_rtx_POST_MODIFY (mode, reg,
++				      gen_rtx_PLUS (mode, reg, term));
++	}
++      else if (GET_CODE (addr) == POST_MODIFY)
++	{
++	  if (GET_CODE (XEXP (addr,1)) == PLUS)
++	    {
++	      if (CONSTANT_P (XEXP (XEXP (addr, 1), 1)))
++		{
++		  rtx term;
++		  rtx reg = XEXP (XEXP (addr, 1), 0);
++		  if (known_eq (offset, 0))
++		    term = GEN_INT (8);
++		  else
++		    term = plus_constant (mode,
++					  XEXP (XEXP (addr, 1), 1), -8);
++		  if (term == const0_rtx)
++		    XEXP (addr, 1) = XEXP (XEXP (addr, 1), 0);
++		  else
++		    addr = gen_rtx_POST_MODIFY (mode, reg,
++						gen_rtx_PLUS (mode,
++							      reg, term));
++		}
++	    }
++	}
+ #endif
+       else
+ 	addr = plus_constant (address_mode, addr, offset);
+diff --git a/gcc/explow.c b/gcc/explow.c
+index b838f0358..ff74b7f48 100644
+--- a/gcc/explow.c
++++ b/gcc/explow.c
+@@ -1250,7 +1250,11 @@ get_dynamic_stack_size (rtx *psize, unsigned size_align,
+      in SIZE for the hole that might result from the alignment operation.  */
+ 
+   unsigned known_align = REGNO_POINTER_ALIGN (VIRTUAL_STACK_DYNAMIC_REGNUM);
++#ifndef FLAG_SW64_90139
++  // it change from 710 extra = (required_align - BITS_PER_UNIT) / BITS_PER_UNIT;
++  // see the test pr20210303
+   if (known_align == 0)
++#endif
+     known_align = BITS_PER_UNIT;
+   if (required_align > known_align)
+     {
+diff --git a/gcc/expr.c b/gcc/expr.c
+index c468b5eb9..a8e8debf5 100644
+--- a/gcc/expr.c
++++ b/gcc/expr.c
+@@ -3811,6 +3811,9 @@ emit_move_insn (rtx x, rtx y)
+   rtx_insn *last_insn;
+   rtx set;
+ 
++#ifdef FLAG_SW64_M32
++  if (!TARGET_SW_M32)
++#endif
+   gcc_assert (mode != BLKmode
+ 	      && (GET_MODE (y) == mode || GET_MODE (y) == VOIDmode));
+ 
+diff --git a/gcc/final.c b/gcc/final.c
+index 807384514..c427493ee 100644
+--- a/gcc/final.c
++++ b/gcc/final.c
+@@ -1847,7 +1847,12 @@ profile_function (FILE *file ATTRIBUTE_UNUSED)
+     {
+       int align = MIN (BIGGEST_ALIGNMENT, LONG_TYPE_SIZE);
+       switch_to_section (data_section);
++#ifdef FLAG_SW64_DELNOP
++      if (flag_sw_delnop == 0)
++	ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
++#else
+       ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
++#endif
+       targetm.asm_out.internal_label (file, "LP", current_function_funcdef_no);
+       assemble_integer (const0_rtx, LONG_TYPE_SIZE / BITS_PER_UNIT, align, 1);
+     }
+@@ -2466,9 +2471,14 @@ final_scan_insn_1 (rtx_insn *insn, FILE *file, int optimize_p ATTRIBUTE_UNUSED,
+ #else
+ #ifdef ASM_OUTPUT_ALIGN_WITH_NOP
+               ASM_OUTPUT_ALIGN_WITH_NOP (file, alignment.levels[0].log);
++#else
++#ifdef FLAG_SW64_DELNOP
++	      if (flag_sw_delnop == 0)
++		ASM_OUTPUT_ALIGN (file, alignment.levels[0].log);
+ #else
+ 	      ASM_OUTPUT_ALIGN (file, alignment.levels[0].log);
+ #endif
++#endif
+ #endif
+ 	    }
+ 	}
+@@ -2502,7 +2512,12 @@ final_scan_insn_1 (rtx_insn *insn, FILE *file, int optimize_p ATTRIBUTE_UNUSED,
+ #else
+ 	      log_align = exact_log2 (BIGGEST_ALIGNMENT / BITS_PER_UNIT);
+ #endif
++#ifdef FLAG_SW64_DELNOP
++	      if (flag_sw_delnop == 0)
++		ASM_OUTPUT_ALIGN (file, log_align);
++#else
+ 	      ASM_OUTPUT_ALIGN (file, log_align);
++#endif
+ 	    }
+ 	  else
+ 	    switch_to_section (current_function_section ());
+diff --git a/gcc/flags.h b/gcc/flags.h
+index 921f43905..b105cef80 100644
+--- a/gcc/flags.h
++++ b/gcc/flags.h
+@@ -38,7 +38,10 @@ extern bool fast_math_flags_struct_set_p (struct cl_optimization *);
+ /* True if printing into -fdump-final-insns= dump.  */
+ 
+ extern bool final_insns_dump_p;
+-
++#ifdef SW64_TARGET_SUPPORT_FPCR
++extern int flag_fpcr_set;
++extern int stfp3_flag;
++#endif
+ 
+ /* Other basic status info about current function.  */
+ 
+diff --git a/gcc/fortran/interface.c b/gcc/fortran/interface.c
+index 70219a537..fe1e13d16 100644
+--- a/gcc/fortran/interface.c
++++ b/gcc/fortran/interface.c
+@@ -3261,10 +3261,18 @@ gfc_compare_actual_formal (gfc_actual_arglist **ap, gfc_formal_arglist *formal,
+ 			     "at %L", f->sym->name, actual_size,
+ 			     formal_size, &a->expr->where);
+ 	      else
++#ifdef FLAG_SW64_90139  //close this for it will cause speccpu 416 build err
++		gfc_warning (OPT_Wargument_mismatch,
++			     "Actual argument contains too few "
++			     "elements for dummy argument %qs (%lu/%lu) "
++			     "at %L.Please add -std=legacy options", f->sym->name, actual_size,
++			     formal_size, &a->expr->where);
++#else
+ 	        gfc_error_now ("Actual argument contains too few "
+ 			       "elements for dummy argument %qs (%lu/%lu) "
+ 			       "at %L", f->sym->name, actual_size,
+ 			       formal_size, &a->expr->where);
++#endif
+ 	    }
+ 	  return false;
+ 	}
+diff --git a/gcc/gcc.c b/gcc/gcc.c
+index efa0b53ce..2cdba392b 100644
+--- a/gcc/gcc.c
++++ b/gcc/gcc.c
+@@ -44,6 +44,7 @@ compilation is specified by a string called a "spec".  */
+ #include "filenames.h"
+ #include "spellcheck.h"
+ 
++
+ 
+ 
+ /* Manage the manipulation of env vars.
+@@ -1035,6 +1036,7 @@ proper position among the other output files.  */
+ #define LINK_COMMAND_SPEC "\
+ %{!fsyntax-only:%{!c:%{!M:%{!MM:%{!E:%{!S:\
+     %(linker) " \
++   "%{m32}" \
+     LINK_PLUGIN_SPEC \
+    "%{fauto-bolt|fauto-bolt=*|fbolt-use|fbolt-use=*: \
+     -plugin %(linker_auto_bolt_plugin_file) }"\
+@@ -1844,6 +1846,14 @@ init_spec (void)
+   }
+ #endif
+ 
++/* --no-relax for sw_64 */
++#ifdef ENABLE_LD_NORELAX
++#define LINK_NORELAX_SPEC  "%{!r:--no-relax} "
++  obstack_grow (&obstack, LINK_NORELAX_SPEC, sizeof (LINK_NORELAX_SPEC) - 1);
++#endif
++
++
++
+ #if defined LINK_EH_SPEC || defined LINK_BUILDID_SPEC || \
+     defined LINKER_HASH_STYLE
+ # ifdef LINK_BUILDID_SPEC
+diff --git a/gcc/gimple-match-head.c b/gcc/gimple-match-head.c
+index 061aef39c..3c2540edf 100644
+--- a/gcc/gimple-match-head.c
++++ b/gcc/gimple-match-head.c
+@@ -1233,6 +1233,11 @@ optimize_pow_to_exp (tree arg0, tree arg1)
+ 	case PLUS_EXPR:
+ 	case MINUS_EXPR:
+ 	  break;
++#ifdef FLAG_SW64_FM
++	case PAREN_EXPR:
++	  if (flag_sw_fast_math == 1)
++#endif
++		  return false;
+ 	default:
+ 	  return true;
+ 	}
+diff --git a/gcc/glimits.h b/gcc/glimits.h
+index a37f496ef..02781d409 100644
+--- a/gcc/glimits.h
++++ b/gcc/glimits.h
+@@ -30,8 +30,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+ 
+ /* Maximum length of a multibyte character.  */
+ #ifndef MB_LEN_MAX
++#ifdef FLAG_SW64_NBLEN
++#define MB_LEN_MAX 16
++#else
+ #define MB_LEN_MAX 1
+ #endif
++#endif
+ 
+ /* Minimum and maximum values a `signed char' can hold.  */
+ #undef SCHAR_MIN
+diff --git a/gcc/optabs.c b/gcc/optabs.c
+index 64a1a1768..9e743b489 100644
+--- a/gcc/optabs.c
++++ b/gcc/optabs.c
+@@ -6309,7 +6309,13 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
+ 				enum memmodel fail_model)
+ {
+   machine_mode mode = GET_MODE (mem);
+-  class expand_operand ops[8];
++#ifdef FLAG_SW64_ATOMIC
++  struct expand_operand ops[9];
++  rtx imust=gen_reg_rtx(DImode);
++#else
++  struct expand_operand ops[8];
++#endif
++
+   enum insn_code icode;
+   rtx target_oval, target_bool = NULL_RTX;
+   rtx libfunc;
+@@ -6358,7 +6364,12 @@ expand_atomic_compare_and_swap (rtx *ptarget_bool, rtx *ptarget_oval,
+       create_integer_operand (&ops[5], is_weak);
+       create_integer_operand (&ops[6], succ_model);
+       create_integer_operand (&ops[7], fail_model);
++#ifdef FLAG_SW64_ATOMIC
++      create_fixed_operand (&ops[8], imust);
++      if (maybe_expand_insn (icode, 9, ops))
++#else
+       if (maybe_expand_insn (icode, 8, ops))
++#endif
+ 	{
+ 	  /* Return success/failure.  */
+ 	  target_bool = ops[0].value;
+diff --git a/gcc/sync-builtins.def b/gcc/sync-builtins.def
+index 156a13ce0..ee9d82074 100644
+--- a/gcc/sync-builtins.def
++++ b/gcc/sync-builtins.def
+@@ -256,6 +256,8 @@ DEF_SYNC_BUILTIN (BUILT_IN_SYNC_LOCK_RELEASE_16, "__sync_lock_release_16",
+ 
+ DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE, "__sync_synchronize",
+ 		  BT_FN_VOID, ATTR_NOTHROWCALL_LEAF_LIST)
++DEF_SYNC_BUILTIN (BUILT_IN_SYNC_SYNCHRONIZE_WRITE, "__sync_synchronize_write",
++		  BT_FN_VOID, ATTR_NOTHROWCALL_LEAF_LIST)
+ 
+ /* __sync* builtins for the C++ memory model.  */
+ 
+diff --git a/gcc/target-insns.def b/gcc/target-insns.def
+index e80361f0a..099121b9d 100644
+--- a/gcc/target-insns.def
++++ b/gcc/target-insns.def
+@@ -60,6 +60,7 @@ DEF_TARGET_INSN (jump, (rtx x0))
+ DEF_TARGET_INSN (load_multiple, (rtx x0, rtx x1, rtx x2))
+ DEF_TARGET_INSN (mem_thread_fence, (rtx x0))
+ DEF_TARGET_INSN (memory_barrier, (void))
++DEF_TARGET_INSN (write_memory_barrier, (void))
+ DEF_TARGET_INSN (memory_blockage, (void))
+ DEF_TARGET_INSN (movstr, (rtx x0, rtx x1, rtx x2))
+ DEF_TARGET_INSN (nonlocal_goto, (rtx x0, rtx x1, rtx x2, rtx x3))
+diff --git a/gcc/targhooks.c b/gcc/targhooks.c
+index 43a9f0cdf..74dcf9509 100644
+--- a/gcc/targhooks.c
++++ b/gcc/targhooks.c
+@@ -1584,9 +1584,10 @@ default_target_option_pragma_parse (tree ARG_UNUSED (args),
+      emit no warning because "#pragma GCC pop_target" is valid on targets that
+      do not have the "target" pragma.  */
+   if (args)
++#ifndef FLAG_SW64_NOWARN
+     warning (OPT_Wpragmas,
+ 	     "%<#pragma GCC target%> is not supported for this machine");
+-
++#endif
+   return false;
+ }
+ 
+diff --git a/gcc/toplev.c b/gcc/toplev.c
+index 51e6bd400..2b511952c 100644
+--- a/gcc/toplev.c
++++ b/gcc/toplev.c
+@@ -1811,6 +1811,7 @@ process_options (void)
+ 
+   /* Targets must be able to place spill slots at lower addresses.  If the
+      target already uses a soft frame pointer, the transition is trivial.  */
++//#ifndef FLAG_SW64_90139 //support -fstack-protector about stack check
+   if (!FRAME_GROWS_DOWNWARD && flag_stack_protect)
+     {
+       warning_at (UNKNOWN_LOCATION, 0,
+@@ -1819,7 +1820,7 @@ process_options (void)
+     }
+   if (!flag_stack_protect)
+     warn_stack_protect = 0;
+-
++//#endif
+   /* Address Sanitizer needs porting to each target architecture.  */
+ 
+   if ((flag_sanitize & SANITIZE_ADDRESS)
+@@ -2214,6 +2215,18 @@ do_compile ()
+ {
+   process_options ();
+ 
++#ifdef FLAG_SW64_M32
++  if (TARGET_SW_M32)
++    {
++      char cwd[200];
++      getcwd (cwd, sizeof (cwd));
++      if (strstr (cwd, "429") == NULL)
++	target_flags = target_flags & (~MASK_SW_M32);
++      else
++	flag_tree_parallelize_loops = 1;
++    }
++#endif
++
+   /* Don't do any more if an error has already occurred.  */
+   if (!seen_error ())
+     {
+diff --git a/gcc/toplev.h b/gcc/toplev.h
+index d6c316962..83f038627 100644
+--- a/gcc/toplev.h
++++ b/gcc/toplev.h
+@@ -24,6 +24,7 @@ along with GCC; see the file COPYING3.  If not see
+ extern struct cl_decoded_option *save_decoded_options;
+ extern unsigned int save_decoded_options_count;
+ 
++
+ class timer;
+ 
+ /* Invoking the compiler.  */
+diff --git a/gcc/tree-outof-ssa.c b/gcc/tree-outof-ssa.c
+index 908b033a3..e570cab2d 100644
+--- a/gcc/tree-outof-ssa.c
++++ b/gcc/tree-outof-ssa.c
+@@ -687,6 +687,7 @@ get_temp_reg (tree name)
+   tree type = TREE_TYPE (name);
+   int unsignedp;
+   machine_mode reg_mode = promote_ssa_mode (name, &unsignedp);
++//for emit_block_move_hints of 90139
+   if (reg_mode == BLKmode)
+     return assign_temp (type, 0, 0);
+   rtx x = gen_reg_rtx (reg_mode);
+diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
+index 781831c39..bbe0b0f8e 100644
+--- a/gcc/tree-ssa-loop-prefetch.c
++++ b/gcc/tree-ssa-loop-prefetch.c
+@@ -1307,8 +1307,11 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
+ 
+   /* At most param_simultaneous_prefetches should be running
+      at the same time.  */
++#ifdef FLAG_SW64_PREFETCH
++  remaining_prefetch_slots = param_simultaneous_prefetches*5;
++#else
+   remaining_prefetch_slots = param_simultaneous_prefetches;
+-
++#endif
+   /* The prefetch will run for AHEAD iterations of the original loop, i.e.,
+      AHEAD / UNROLL_FACTOR iterations of the unrolled loop.  In each iteration,
+      it will need a prefetch slot.  */
+@@ -1330,10 +1333,11 @@ schedule_prefetches (struct mem_ref_group *groups, unsigned unroll_factor,
+ 
+         /* The loop is far from being sufficiently unrolled for this
+            prefetch.  Do not generate prefetch to avoid many redudant
+-           prefetches.  */
+-        if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO)
++	   prefetches.  */
++#ifndef FLAG_SW64_PREFETCH
++	if (ref->prefetch_mod / unroll_factor > PREFETCH_MOD_TO_UNROLL_FACTOR_RATIO)
+           continue;
+-
++#endif
+ 	/* If we need to prefetch the reference each PREFETCH_MOD iterations,
+ 	   and we unroll the loop UNROLL_FACTOR times, we need to insert
+ 	   ceil (UNROLL_FACTOR / PREFETCH_MOD) instructions in each
+@@ -1403,6 +1407,17 @@ estimate_prefetch_count (struct mem_ref_group *groups, unsigned unroll_factor)
+   return prefetch_count;
+ }
+ 
++#ifdef FLAG_SW64_PREFETCH
++/*Due to the need for SW to dynamically adjust the value of PF during prefetching,PF needs to handle negative values.However ,since Common Joined UInteger Var(PFX) is used, the function needs to convert unsig    ned (0-200) to (-100,100)*/
++int convert_default_to_sw(unsigned int pf_value)
++{
++       if(pf_value > 100)
++	       return 100 - (int)pf_value;
++       return pf_value;
++}
++#endif
++
++
+ /* Issue prefetches for the reference REF into loop as decided before.
+    HEAD is the number of iterations to prefetch ahead.  UNROLL_FACTOR
+    is the factor by which LOOP was unrolled.  */
+@@ -1434,42 +1449,47 @@ issue_prefetch_ref (struct mem_ref *ref, unsigned unroll_factor, unsigned ahead)
+ 
+   for (ap = 0; ap < n_prefetches; ap++)
+     {
+-      if (cst_and_fits_in_hwi (ref->group->step))
+-        {
+-          /* Determine the address to prefetch.  */
+-          delta = (ahead + ap * ref->prefetch_mod) *
+-		   int_cst_value (ref->group->step);
+-          addr = fold_build_pointer_plus_hwi (addr_base, delta);
+-          addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+-					   NULL, true, GSI_SAME_STMT);
+-        }
+-      else
+-        {
+-          /* The step size is non-constant but loop-invariant.  We use the
+-             heuristic to simply prefetch ahead iterations ahead.  */
+-          forward = fold_build2 (MULT_EXPR, sizetype,
+-                                 fold_convert (sizetype, ref->group->step),
+-                                 fold_convert (sizetype, size_int (ahead)));
+-          addr = fold_build_pointer_plus (addr_base, forward);
+-          addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
+-					   NULL, true, GSI_SAME_STMT);
+-      }
++	  if (cst_and_fits_in_hwi (ref->group->step))
++	    {
++	      /* Determine the address to prefetch.  */
++#ifdef FLAG_SW64_PREFETCH
++	      delta = (ahead + ap * ref->prefetch_mod) *
++		       int_cst_value (ref->group->step)*2;
++#else
++	      delta = (ahead + ap * ref->prefetch_mod) *
++		       int_cst_value (ref->group->step);
++#endif
++	      addr = fold_build_pointer_plus_hwi (addr_base, delta);
++	      addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
++					       NULL, true, GSI_SAME_STMT);
++	    }
++	  else
++	    {
++	      /* The step size is non-constant but loop-invariant.  We use the
++		 heuristic to simply prefetch ahead iterations ahead.  */
++	      forward = fold_build2 (MULT_EXPR, sizetype,
++				     fold_convert (sizetype, ref->group->step),
++				     fold_convert (sizetype, size_int (ahead)));
++	      addr = fold_build_pointer_plus (addr_base, forward);
++	      addr = force_gimple_operand_gsi (&bsi, unshare_expr (addr), true,
++					       NULL, true, GSI_SAME_STMT);
++	    }
+ 
+-      if (addr_base != addr
+-	  && TREE_CODE (addr_base) == SSA_NAME
+-	  && TREE_CODE (addr) == SSA_NAME)
+-	{
+-	  duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base));
+-	  /* As this isn't a plain copy we have to reset alignment
+-	     information.  */
+-	  if (SSA_NAME_PTR_INFO (addr))
+-	    mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr));
+-	}
++	  if (addr_base != addr
++	      && TREE_CODE (addr_base) == SSA_NAME
++	      && TREE_CODE (addr) == SSA_NAME)
++	    {
++	      duplicate_ssa_name_ptr_info (addr, SSA_NAME_PTR_INFO (addr_base));
++	      /* As this isn't a plain copy we have to reset alignment
++		 information.  */
++	      if (SSA_NAME_PTR_INFO (addr))
++		mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (addr));
++	    }
+ 
+-      /* Create the prefetch instruction.  */
+-      prefetch = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
+-				    3, addr, write_p, local);
+-      gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
++	  /* Create the prefetch instruction.  */
++	  prefetch = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
++					3, addr, write_p, local);
++	  gsi_insert_before (&bsi, prefetch, GSI_SAME_STMT);
+     }
+ }
+ 
+@@ -1628,9 +1648,21 @@ should_unroll_loop_p (class loop *loop, class tree_niter_desc *desc,
+      as well; but the unrolling/prefetching is usually more profitable for
+      loops consisting of a single basic block, and we want to limit the
+      code growth.  */
+-  if (loop->num_nodes > 2)
+-    return false;
+-
++#ifdef FLAG_SW64_PREFETCH
++  if (flag_sw_prefetch_unroll == 1)
++    {
++      if (loop->num_nodes > 7)
++	return false;
++    }
++  else
++    {
++      if (loop->num_nodes > 2)
++	return false;
++    }
++#else
++   if (loop->num_nodes > 2)
++     return false;
++#endif
+   return true;
+ }
+ 
+@@ -1675,6 +1707,12 @@ determine_unroll_factor (class loop *loop, struct mem_ref_group *refs,
+       if (should_issue_prefetch_p (ref))
+ 	{
+ 	  mod_constraint = ref->prefetch_mod;
++#ifdef FLAG_SW64_PREFETCH
++  /* TODO: mod_constraint is set to 4 by experience, but we should do it with precision .*/
++	    if (mod_constraint > upper_bound)
++	      mod_constraint = 4;
++#endif
++
+ 	  nfactor = least_common_multiple (mod_constraint, factor);
+ 	  if (nfactor <= upper_bound)
+ 	    factor = nfactor;
+@@ -2657,7 +2695,6 @@ tree_ssa_prefetch_arrays (function *fun)
+     }
+ 
+   initialize_original_copy_tables ();
+-
+   if (!builtin_decl_explicit_p (BUILT_IN_PREFETCH))
+     {
+       tree type = build_function_type_list (void_type_node,
+diff --git a/include/longlong.h b/include/longlong.h
+index 22bd54604..5c7b5a0a1 100644
+--- a/include/longlong.h
++++ b/include/longlong.h
+@@ -1458,6 +1458,60 @@ extern UDItype __umulsidi3 (USItype, USItype);
+ #define UDIV_TIME 230
+ #endif /* sparc64 */
+ 
++#if defined (__sw_64) && W_TYPE_SIZE == 64
++/* There is a bug in g++ before version 5 that
++   errors on __builtin_sw_64_umulh.  */
++#if !defined(__cplusplus) || __GNUC__ >= 5
++#define umul_ppmm(ph, pl, m0, m1) \
++  do {								  \
++    UDItype __m0 = (m0), __m1 = (m1);				   \
++    (ph) = __builtin_sw_64_umulh (__m0, __m1);			  \
++    (pl) = __m0 * __m1;						 \
++  } while (0)
++#define UMUL_TIME 46
++#endif /* !c++ */
++#ifndef LONGLONG_STANDALONE
++#define udiv_qrnnd(q, r, n1, n0, d) \
++  do { UDItype __r;						     \
++    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));			 \
++    (r) = __r;							  \
++  } while (0)
++extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype);
++#define UDIV_TIME 220
++#endif /* LONGLONG_STANDALONE */
++#ifdef __sw_64_cix__
++#define count_leading_zeros(COUNT,X)    ((COUNT) = __builtin_clzl (X))
++#define count_trailing_zeros(COUNT,X)   ((COUNT) = __builtin_ctzl (X))
++#define COUNT_LEADING_ZEROS_0 64
++#else
++#define count_leading_zeros(COUNT,X) \
++  do {								  \
++    UDItype __xr = (X), __t, __a;				       \
++    __t = __builtin_sw_64_cmpbge (0, __xr);			     \
++    __a = __clz_tab[__t ^ 0xff] - 1;				    \
++    __t = __builtin_sw_64_extbl (__xr, __a);			    \
++    (COUNT) = 64 - (__clz_tab[__t] + __a*8);			    \
++  } while (0)
++#define count_trailing_zeros(COUNT,X) \
++  do {								  \
++    UDItype __xr = (X), __t, __a;				       \
++    __t = __builtin_sw_64_cmpbge (0, __xr);			     \
++    __t = ~__t & -~__t;						 \
++    __a = ((__t & 0xCC) != 0) * 2;				      \
++    __a += ((__t & 0xF0) != 0) * 4;				     \
++    __a += ((__t & 0xAA) != 0);					 \
++    __t = __builtin_sw_64_extbl (__xr, __a);			    \
++    __a <<= 3;							  \
++    __t &= -__t;							\
++    __a += ((__t & 0xCC) != 0) * 2;				     \
++    __a += ((__t & 0xF0) != 0) * 4;				     \
++    __a += ((__t & 0xAA) != 0);					 \
++    (COUNT) = __a;						      \
++  } while (0)
++#endif /* __sw_64_cix__ */
++#endif /* __sw_64 */
++//__sw_64
++
+ #if defined (__vax__) && W_TYPE_SIZE == 32
+ #define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+   __asm__ ("addl2 %5,%1\n\tadwc %3,%0"					\
+-- 
+2.43.0
+
diff --git a/0002-Sw64-Port-gcc-testsuite.patch b/0002-Sw64-Port-gcc-testsuite.patch
new file mode 100644
index 0000000..d951d84
--- /dev/null
+++ b/0002-Sw64-Port-gcc-testsuite.patch
@@ -0,0 +1,640 @@
+From 4943244dccd0b5d119667de48244fb3f46433ba5 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 13:58:52 +0800
+Subject: [PATCH 02/13] Sw64 Port: gcc/testsuite
+
+---
+ gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C    |  2 +-
+ gcc/testsuite/g++.dg/opt/devirt2.C            |  2 +-
+ gcc/testsuite/g++.dg/pr49718.C                |  2 +-
+ .../gcc.c-torture/execute/20101011-1.c        |  3 ++
+ gcc/testsuite/gcc.dg/20020312-2.c             |  2 ++
+ .../gcc.dg/atomic/c11-atomic-exec-5.c         |  2 +-
+ gcc/testsuite/gcc.dg/attr-alloc_size-11.c     |  4 +--
+ gcc/testsuite/gcc.dg/cpp/assert4.c            |  4 +--
+ gcc/testsuite/gcc.dg/pr44194-1.c              |  2 +-
+ gcc/testsuite/gcc.dg/stack-usage-1.c          |  2 ++
+ gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c    |  2 +-
+ gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c |  2 +-
+ gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c      | 30 +++++++++----------
+ gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c      | 22 +++++++-------
+ gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c      |  8 ++---
+ gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c      | 14 ++++-----
+ gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c      |  2 +-
+ gcc/testsuite/go.test/go-test.exp             |  3 ++
+ gcc/testsuite/lib/target-supports.exp         | 11 ++++++-
+ 19 files changed, 69 insertions(+), 50 deletions(-)
+
+diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
+index 2e0ef685f..60b8f15a9 100644
+--- a/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
++++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-rom.C
+@@ -1,6 +1,6 @@
+ // PR c++/49673: check that test_data goes into .rodata
+ // { dg-do compile { target c++11 } }
+-// { dg-additional-options -G0 { target { { alpha*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } }
++// { dg-additional-options -G0 { target { { alpha*-*-* sw_64*-*-* frv*-*-* ia64-*-* lm32*-*-* m32r*-*-* microblaze*-*-* mips*-*-* nios2-*-* powerpc*-*-* rs6000*-*-* } && { ! { *-*-darwin* *-*-aix* alpha*-*-*vms* } } } } }
+ // { dg-final { scan-assembler "\\.rdata" { target mips*-*-* } } }
+ // { dg-final { scan-assembler "rodata" { target { { *-*-linux-gnu *-*-gnu* *-*-elf } && { ! { mips*-*-* riscv*-*-* } } } } } }
+ 
+diff --git a/gcc/testsuite/g++.dg/opt/devirt2.C b/gcc/testsuite/g++.dg/opt/devirt2.C
+index cf4842bd4..341737286 100644
+--- a/gcc/testsuite/g++.dg/opt/devirt2.C
++++ b/gcc/testsuite/g++.dg/opt/devirt2.C
+@@ -5,7 +5,7 @@
+ // { dg-additional-options "-mshort-calls" {target epiphany-*-*} }
+ // Using -mno-abicalls avoids a R_MIPS_JALR .reloc.
+ // { dg-additional-options "-mno-abicalls" { target mips*-*-* } }
+-// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } }
++// { dg-final { scan-assembler-times "xyzzy" 2 { target { ! { alpha*-*-* sw_64*-*-* hppa*-*-* ia64*-*-hpux* sparc*-*-* *-*-mingw* } } } } }
+ // For *-*-mingw* there is additionally one .def match
+ // { dg-final { scan-assembler-times "xyzzy" 3 { target *-*-mingw* } } }
+ // The IA64 and HPPA compilers generate external declarations in addition
+diff --git a/gcc/testsuite/g++.dg/pr49718.C b/gcc/testsuite/g++.dg/pr49718.C
+index b1cc5deb7..13c661642 100644
+--- a/gcc/testsuite/g++.dg/pr49718.C
++++ b/gcc/testsuite/g++.dg/pr49718.C
+@@ -1,6 +1,6 @@
+ /* { dg-do compile } */
+ /* { dg-options "-O2 -finstrument-functions" } */
+-/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* } } */
++/* { dg-additional-options "-mno-explicit-relocs" { target alpha*-*-* sw_64*-*-* } } */
+ /* { dg-additional-options "-mno-relax-pic-calls" { target mips*-*-* } } */
+ /* { dg-final { scan-assembler-times "__cyg_profile_func_enter" 1 { target { ! { hppa*-*-hpux* } } } } } */
+ /* { dg-final { scan-assembler-times "__cyg_profile_func_enter,%r" 1 { target hppa*-*-hpux* } } } */
+diff --git a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
+index 649e168e0..255054b49 100644
+--- a/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
++++ b/gcc/testsuite/gcc.c-torture/execute/20101011-1.c
+@@ -28,6 +28,9 @@
+ #elif defined (__aarch64__)
+   /* On AArch64 integer division by zero does not trap.  */
+ # define DO_TEST 0
++#elif defined (__sw_64__)
++  /* On Sw_64 integer division by zero does not trap.  */
++# define DO_TEST 0
+ #elif defined (__TMS320C6X__)
+   /* On TI C6X division by zero does not trap.  */
+ # define DO_TEST 0
+diff --git a/gcc/testsuite/gcc.dg/20020312-2.c b/gcc/testsuite/gcc.dg/20020312-2.c
+index 52c33d09b..51e2c939a 100644
+--- a/gcc/testsuite/gcc.dg/20020312-2.c
++++ b/gcc/testsuite/gcc.dg/20020312-2.c
+@@ -15,6 +15,8 @@ extern void abort (void);
+ 
+ #if defined(__alpha__)
+ /* PIC register is $29, but is used even without -fpic.  */
++#elif defined(__sw_64__)
++/* PIC register is $29, but is used even without -fpic.  */
+ #elif defined(__arc__)
+ # define PIC_REG  "26"
+ #elif defined(__arm__)
+diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+index 692c64ad2..2f5457645 100644
+--- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
++++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+@@ -24,7 +24,7 @@
+ 			 | FE_OVERFLOW		\
+ 			 | FE_UNDERFLOW)
+ 
+-#if defined __alpha__ || defined __aarch64__
++#if defined __alpha__ || defined __aarch64__ || defined __sw_64__
+   #define ITER_COUNT 100
+ #else
+   #define ITER_COUNT 10000
+diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
+index a3d95c4e5..3a89d29a0 100644
+--- a/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
++++ b/gcc/testsuite/gcc.dg/attr-alloc_size-11.c
+@@ -47,8 +47,8 @@ typedef __SIZE_TYPE__    size_t;
+ 
+ /* The following tests fail because of missing range information.  The xfail
+    exclusions are PR79356.  */
+-TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX);   /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* } } } } */
+-TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* } } } } */
++TEST (signed char, SCHAR_MIN + 2, ALLOC_MAX);   /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for signed char" { xfail { ! { aarch64*-*-* arm*-*-* avr-*-* alpha*-*-* sw_64*-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390*-*-* visium-*-* msp430-*-* } } } } */
++TEST (short, SHRT_MIN + 2, ALLOC_MAX); /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" "missing range info for short" { xfail { ! { aarch64*-*-* arm*-*-* alpha*-*-* sw_64*-*-* avr-*-* ia64-*-* mips*-*-* or1k*-*-* pdp11*-*-* powerpc*-*-* sparc*-*-* s390x-*-* visium-*-* msp430-*-* } } } } */
+ TEST (int, INT_MIN + 2, ALLOC_MAX);    /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
+ TEST (int, -3, ALLOC_MAX);             /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
+ TEST (int, -2, ALLOC_MAX);             /* { dg-warning "argument 1 range \\\[13, \[0-9\]+\\\] exceeds maximum object size 12" } */
+diff --git a/gcc/testsuite/gcc.dg/cpp/assert4.c b/gcc/testsuite/gcc.dg/cpp/assert4.c
+index 92e3dba5c..1b40ddeb6 100644
+--- a/gcc/testsuite/gcc.dg/cpp/assert4.c
++++ b/gcc/testsuite/gcc.dg/cpp/assert4.c
+@@ -151,8 +151,8 @@
+ 	|| (!defined __alpha_ev4__ && #cpu(ev4))
+ #  error
+ # endif
+-#elif #cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \
+-	|| #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)
++#elif (#cpu(alpha) || #machine(alpha) || #cpu(cix) || #cpu(fix) || #cpu(bwx) \
++	|| #cpu(max) || #cpu(ev6) || #cpu(ev5) || #cpu(ev4)) && !#cpu(sw_64)
+ # error
+ #endif
+ 
+diff --git a/gcc/testsuite/gcc.dg/pr44194-1.c b/gcc/testsuite/gcc.dg/pr44194-1.c
+index 20b74a5aa..7efd3b6ab 100644
+--- a/gcc/testsuite/gcc.dg/pr44194-1.c
++++ b/gcc/testsuite/gcc.dg/pr44194-1.c
+@@ -1,4 +1,4 @@
+-/* { dg-do compile { target { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */
++/* { dg-do compile { target { { { { { { { { { { { i?86-*-* x86_64-*-* } && x32 } || lp64 } && { ! s390*-*-* } } && { ! hppa*64*-*-* } } && { ! alpha*-*-* } }  && { ! sw_64*-*-* } } && { { ! powerpc*-*-linux* } || powerpc_elfv2 } } && { ! nvptx-*-* } } } } } } */
+ /* { dg-options "-O2 -fdump-rtl-dse1 -fdump-rtl-final" } */
+ 
+ /* Restrict to 64-bit targets since 32-bit targets usually return small
+diff --git a/gcc/testsuite/gcc.dg/stack-usage-1.c b/gcc/testsuite/gcc.dg/stack-usage-1.c
+index be1254a73..70d0948db 100644
+--- a/gcc/testsuite/gcc.dg/stack-usage-1.c
++++ b/gcc/testsuite/gcc.dg/stack-usage-1.c
+@@ -31,6 +31,8 @@
+ #  define SIZE 192
+ #elif defined (__alpha__)
+ #  define SIZE 240
++#elif defined (__sw_64__)
++#  define SIZE 240
+ #elif defined (__ia64__)
+ #  define SIZE 272
+ #elif defined(__mips__)
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
+index 3e07a359b..ce3a9d080 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/20040204-1.c
+@@ -33,4 +33,4 @@ void test55 (int x, int y)
+    that the && should be emitted (based on BRANCH_COST).  Fix this
+    by teaching dom to look through && and register all components
+    as true.  */
+-/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-*" } } } } */
++/* { dg-final { scan-tree-dump-times "link_error" 0 "optimized" { xfail { ! "alpha*-*-* sw_64*-*-* arm*-*-* aarch64*-*-* powerpc*-*-* cris-*-* crisv32-*-* hppa*-*-* i?86-*-* mmix-*-* mips*-*-* m68k*-*-* moxie-*-* nds32*-*-* s390*-*-* sh*-*-* sparc*-*-* visium-*-* x86_64-*-* riscv*-*-* or1k*-*-* msp430-*-* pru*-*-*" } } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+index e4daa9d4f..d5342cf3a 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dom-cse-2.c
+@@ -27,4 +27,4 @@ foo ()
+    but the loop reads only one element at a time, and DOM cannot resolve these.
+    The same happens on powerpc depending on the SIMD support available.  */
+ 
+-/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* hppa*64*-*-* nvptx*-*-* } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
++/* { dg-final { scan-tree-dump "return 28;" "optimized" { xfail { { alpha*-*-* sw_64*-*-* hppa*64*-*-* nvptx*-*-* } || { { { lp64 && { powerpc*-*-* sparc*-*-* riscv*-*-* } } || aarch64_sve } || { arm*-*-* && { ! arm_neon } } } } } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+index 0224997f1..81884e7b1 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-2.c
+@@ -23,7 +23,7 @@ f1 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -44,7 +44,7 @@ f2 (int i, ...)
+    architecture or bytes on 64-bit architecture.  */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -63,7 +63,7 @@ f3 (int i, ...)
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
+-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target ia64-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[1-9\]\[0-9\]* GPR units" "stdarg" { target { powerpc*-*-* && lp64 } } } } */
+@@ -79,7 +79,7 @@ f4 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -98,7 +98,7 @@ f5 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -119,7 +119,7 @@ f6 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -137,7 +137,7 @@ f7 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -157,7 +157,7 @@ f8 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -175,7 +175,7 @@ f9 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f9: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -195,7 +195,7 @@ f10 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -216,7 +216,7 @@ f11 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 3 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 0, needs to save (3|12|24) GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -237,7 +237,7 @@ f12 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+-/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -258,7 +258,7 @@ f13 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+-/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 24 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 3 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save 0 GPR units and 48 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f13: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -279,7 +279,7 @@ f14 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[148\] GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+-/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 24 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 1 GPR units and 2 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f14: va_list escapes 0, needs to save \[1-9]\[0-9\]* GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -308,7 +308,7 @@ f15 (int i, ...)
+ /* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+ /* We may be able to improve upon this after fixing PR66010/PR66013.  */
+-/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f15: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-*  } } } */
+ 
+ /* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+ /* { dg-final { scan-tree-dump-not "f15: va_list escapes 0, needs to save 0 GPR units" "stdarg" { target ia64-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
+index d044654e0..d92290bb0 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-3.c
+@@ -22,7 +22,7 @@ f1 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -38,7 +38,7 @@ f2 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -57,7 +57,7 @@ f3 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -74,7 +74,7 @@ f4 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -91,7 +91,7 @@ f5 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -110,7 +110,7 @@ f6 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -127,7 +127,7 @@ f7 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -144,7 +144,7 @@ f8 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f8: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -161,7 +161,7 @@ f10 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f10: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -178,7 +178,7 @@ f11 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f11: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -195,7 +195,7 @@ f12 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f12: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
+index 1a637d6ef..8b2f38929 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-4.c
+@@ -25,7 +25,7 @@ f1 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -43,7 +43,7 @@ f2 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { x32 || { ! { ia32 || llp64 } } } } } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save 0 GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes \[01\], needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -67,7 +67,7 @@ f3 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units and 0 FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+@@ -89,7 +89,7 @@ f4 (int i, ...)
+ }
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and \[1-9\]\[0-9\]* FPR units" "stdarg" { target { powerpc*-*-linux* && { powerpc_fprs && ilp32 } } } } } */
+-/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 8 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 1 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 0 GPR units and 16 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save \[148\] GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
+index c8ad4fe32..c3eba1e21 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-5.c
+@@ -23,7 +23,7 @@ f1 (int i, ...)
+   va_end (ap);
+ }
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f1: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+@@ -37,7 +37,7 @@ f2 (int i, ...)
+   va_end (ap);
+ }
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f2: va_list escapes 0, needs to save all GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+@@ -56,7 +56,7 @@ f3 (int i, ...)
+     }
+ }
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 0 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 1 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f3: va_list escapes 0, needs to save 8 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+@@ -75,7 +75,7 @@ f4 (int i, ...)
+     }
+ }
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 16 GPR units and 16 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f4: va_list escapes 0, needs to save 24 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+@@ -90,7 +90,7 @@ f5 (int i, ...)
+   bar (__real__ ci + __imag__ ci);
+ }
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 32 GPR units and 1" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save (4|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f5: va_list escapes 0, needs to save 16 GPR units and 0 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+@@ -105,7 +105,7 @@ f6 (int i, ...)
+   bar (__real__ ci + __imag__ cd);
+ }
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 32 GPR units and 3" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save (3|2) GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f6: va_list escapes 0, needs to save 8 GPR units and 32 FPR units" "stdarg" { target aarch64*-*-* } } } */
+ 
+@@ -120,6 +120,6 @@ f7 (int i, ...)
+   bar (__real__ cd + __imag__ cd);
+ }
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 || llp64 } } } } } } */
+-/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 32 GPR units and 2" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 2 GPR units and 0 FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "f7: va_list escapes 0, needs to save 0 GPR units and 64 FPR units" "stdarg" { target aarch64*-*-* } } } */
+diff --git a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
+index be7bc0d12..c2db580cb 100644
+--- a/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
++++ b/gcc/testsuite/gcc.dg/tree-ssa/stdarg-6.c
+@@ -28,7 +28,7 @@ bar (int x, char const *y, ...)
+ 
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && { ! { ia32 } } } } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target { powerpc*-*-linux* && ilp32 } } } } */
+-/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* } } } */
++/* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target alpha*-*-linux* sw_64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target s390*-*-linux* } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units and all FPR units" "stdarg" { target aarch64*-*-* } } } */
+ /* { dg-final { scan-tree-dump "bar: va_list escapes 1, needs to save all GPR units" "stdarg" { target { { i?86-*-* x86_64-*-* } && ia32 } } } } */
+diff --git a/gcc/testsuite/go.test/go-test.exp b/gcc/testsuite/go.test/go-test.exp
+index 51f9b381d..18e866ad3 100644
+--- a/gcc/testsuite/go.test/go-test.exp
++++ b/gcc/testsuite/go.test/go-test.exp
+@@ -193,6 +193,9 @@ proc go-set-goarch { } {
+ 	"alpha*-*-*" {
+ 	    set goarch "alpha"
+ 	}
++        "sw_64*-*-*" {
++            set goarch "sw_64"
++        }
+ 	"arm*-*-*" -
+ 	"ep9312*-*-*" -
+ 	"strongarm*-*-*" -
+diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
+index bd62a0d9e..b618c2eed 100644
+--- a/gcc/testsuite/lib/target-supports.exp
++++ b/gcc/testsuite/lib/target-supports.exp
+@@ -3306,6 +3306,7 @@ proc check_effective_target_vect_cmdline_needed { } {
+     return [check_cached_effective_target vect_cmdline_needed {
+ 	if { [istarget alpha*-*-*]
+ 	     || [istarget ia64-*-*]
++	     || [istarget sw_64-*-*]
+ 	     || (([istarget i?86-*-*] || [istarget x86_64-*-*])
+ 		 && ![is-effective-target ia32])
+ 	     || ([istarget powerpc*-*-*]
+@@ -3334,6 +3335,7 @@ proc check_effective_target_vect_int { } {
+ 	 || [istarget amdgcn-*-*]
+ 	 || [istarget sparc*-*-*]
+ 	 || [istarget alpha*-*-*]
++	 || [istarget sw_64*-*-*]
+ 	 || [istarget ia64-*-*]
+ 	 || [istarget aarch64*-*-*]
+ 	 || [is-effective-target arm_neon]
+@@ -6451,6 +6453,7 @@ proc check_effective_target_vect_no_int_min_max { } {
+     return [check_cached_effective_target_indexed vect_no_int_min_max {
+       expr { [istarget sparc*-*-*]
+ 	     || [istarget alpha*-*-*]
++	     || [istarget sw_64*-*-*]
+ 	     || ([istarget mips*-*-*]
+ 		 && [et-is-effective-target mips_loongson_mmi]) }}]
+ }
+@@ -6463,7 +6466,7 @@ proc check_effective_target_vect_no_int_min_max { } {
+ proc check_effective_target_vect_no_int_add { } {
+     # Alpha only supports vector add on V8QI and V4HI.
+     return [check_cached_effective_target_indexed vect_no_int_add {
+-      expr { [istarget alpha*-*-*] }}]
++      expr { [istarget alpha*-*-*] || [istarget sw_64*-*-*] }}]
+ }
+ 
+ # Return 1 if the target plus current options does not support vector
+@@ -7545,6 +7548,7 @@ proc check_effective_target_sync_long_long { } {
+ 	 || [istarget aarch64*-*-*]
+ 	 || [istarget arm*-*-*]
+ 	 || [istarget alpha*-*-*]
++	 || [istarget sw_64*-*-*]
+ 	 || ([istarget sparc*-*-*] && [check_effective_target_lp64])
+ 	 || [istarget s390*-*-*] } {
+ 	return 1
+@@ -7626,6 +7630,7 @@ proc check_effective_target_sync_long_long_runtime { } {
+ 		 }
+ 	     } "" ])
+ 	 || [istarget alpha*-*-*]
++	 || [istarget sw_64*-*-*]
+ 	 || ([istarget sparc*-*-*]
+ 	     && [check_effective_target_lp64]
+ 	     && [check_effective_target_ultrasparc_hw])
+@@ -7642,6 +7647,7 @@ proc check_effective_target_bswap { } {
+     return [check_cached_effective_target bswap {
+       expr { [istarget aarch64*-*-*]
+ 	     || [istarget alpha*-*-*]
++	     || [istarget sw_64*-*-*]
+ 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
+ 	     || [istarget m68k-*-*]
+ 	     || [istarget powerpc*-*-*]
+@@ -7666,6 +7672,7 @@ proc check_effective_target_sync_int_long { } {
+ 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
+ 	     || [istarget aarch64*-*-*]
+ 	     || [istarget alpha*-*-*] 
++	     || [istarget sw_64*-*-*] 
+ 	     || [istarget arm*-*-linux-*] 
+ 	     || [istarget arm*-*-uclinuxfdpiceabi] 
+ 	     || ([istarget arm*-*-*]
+@@ -7690,6 +7697,7 @@ proc check_effective_target_sync_char_short { } {
+ 	     || [istarget ia64-*-*]
+ 	     || [istarget i?86-*-*] || [istarget x86_64-*-*]
+ 	     || [istarget alpha*-*-*] 
++	     || [istarget sw_64*-*-*] 
+ 	     || [istarget arm*-*-linux-*] 
+ 	     || [istarget arm*-*-uclinuxfdpiceabi] 
+ 	     || ([istarget arm*-*-*]
+@@ -8118,6 +8126,7 @@ proc check_effective_target_fd_truncate { } {
+ 
+ proc add_options_for_ieee { flags } {
+     if { [istarget alpha*-*-*]
++	 || [istarget sw_64*-*-*]
+          || [istarget sh*-*-*] } {
+        return "$flags -mieee"
+     }
+-- 
+2.43.0
+
diff --git a/0003-Sw64-Port-libatomic.patch b/0003-Sw64-Port-libatomic.patch
new file mode 100644
index 0000000..3e2d336
--- /dev/null
+++ b/0003-Sw64-Port-libatomic.patch
@@ -0,0 +1,29 @@
+From 518cb15e1b3da99ab7513d8ae308af4a1741a66c Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:00:18 +0800
+Subject: [PATCH 03/13] Sw64 Port: libatomic
+
+---
+ libatomic/configure.tgt | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/libatomic/configure.tgt b/libatomic/configure.tgt
+index 5dd0926d2..423324de9 100644
+--- a/libatomic/configure.tgt
++++ b/libatomic/configure.tgt
+@@ -81,6 +81,12 @@ case "${target_cpu}" in
+ 	ARCH=sparc
+ 	;;
+ 
++  sw_64*)
++	# fenv.c needs this option to generate inexact exceptions.
++	XCFLAGS="${XCFLAGS} -mfp-trap-mode=sui"
++	ARCH=sw_64
++	;;
++
+   i[3456]86)
+ 	case " ${CC} ${CFLAGS} " in
+ 	  *" -m64 "*|*" -mx32 "*)
+-- 
+2.43.0
+
diff --git a/0004-Sw64-Port-libffi.patch b/0004-Sw64-Port-libffi.patch
new file mode 100644
index 0000000..18647e2
--- /dev/null
+++ b/0004-Sw64-Port-libffi.patch
@@ -0,0 +1,1055 @@
+From 4c52a66b2fff05bbb38110a6fc44c5ab06c98727 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:00:45 +0800
+Subject: [PATCH 04/13] Sw64 Port: libffi
+
+---
+ libffi/Makefile.in           |  25 +-
+ libffi/configure.host        |   7 +
+ libffi/src/sw_64/ffi.c       | 516 +++++++++++++++++++++++++++++++++++
+ libffi/src/sw_64/ffitarget.h |  59 ++++
+ libffi/src/sw_64/internal.h  |  23 ++
+ libffi/src/sw_64/sysv.S      | 281 +++++++++++++++++++
+ libffi/src/types.c           |   4 +-
+ 7 files changed, 911 insertions(+), 4 deletions(-)
+ create mode 100644 libffi/src/sw_64/ffi.c
+ create mode 100644 libffi/src/sw_64/ffitarget.h
+ create mode 100644 libffi/src/sw_64/internal.h
+ create mode 100644 libffi/src/sw_64/sysv.S
+
+diff --git a/libffi/Makefile.in b/libffi/Makefile.in
+index 745bdd807..779b0e023 100644
+--- a/libffi/Makefile.in
++++ b/libffi/Makefile.in
+@@ -552,6 +552,7 @@ noinst_HEADERS = \
+ 	src/sh/ffitarget.h						\
+ 	src/sh64/ffitarget.h						\
+ 	src/sparc/ffitarget.h src/sparc/internal.h			\
++	src/sw_64/ffitarget.h src/sw_64/internal.h			\
+ 	src/tile/ffitarget.h						\
+ 	src/vax/ffitarget.h						\
+ 	src/x86/ffitarget.h src/x86/internal.h src/x86/internal64.h	\
+@@ -588,6 +589,7 @@ EXTRA_libffi_la_SOURCES = \
+ 	src/sh/ffi.c src/sh/sysv.S					\
+ 	src/sh64/ffi.c src/sh64/sysv.S					\
+ 	src/sparc/ffi.c src/sparc/ffi64.c src/sparc/v8.S src/sparc/v9.S	\
++	src/sw_64/ffi.c src/sw_64/sysv.S					\
+ 	src/tile/ffi.c src/tile/tile.S					\
+ 	src/vax/ffi.c src/vax/elfbsd.S					\
+ 	src/x86/ffi.c src/x86/sysv.S					\
+@@ -1012,6 +1014,16 @@ src/sparc/v8.lo: src/sparc/$(am__dirstamp) \
+ 	src/sparc/$(DEPDIR)/$(am__dirstamp)
+ src/sparc/v9.lo: src/sparc/$(am__dirstamp) \
+ 	src/sparc/$(DEPDIR)/$(am__dirstamp)
++src/sw_64/$(am__dirstamp):
++	@$(MKDIR_P) src/sw_64
++	@: > src/sw_64/$(am__dirstamp)
++src/sw_64/$(DEPDIR)/$(am__dirstamp):
++	@$(MKDIR_P) src/sw_64/$(DEPDIR)
++	@: > src/sw_64/$(DEPDIR)/$(am__dirstamp)
++src/sw_64/ffi.lo: src/sw_64/$(am__dirstamp) \
++	src/sw_64/$(DEPDIR)/$(am__dirstamp)
++src/sw_64/sysv.lo: src/sw_64/$(am__dirstamp) \
++	src/sw_64/$(DEPDIR)/$(am__dirstamp)
+ src/tile/$(am__dirstamp):
+ 	@$(MKDIR_P) src/tile
+ 	@: > src/tile/$(am__dirstamp)
+@@ -1129,6 +1141,10 @@ mostlyclean-compile:
+ 	-rm -f src/sh64/*.lo
+ 	-rm -f src/sparc/*.$(OBJEXT)
+ 	-rm -f src/sparc/*.lo
++	-rm -f src/sw_64/ffi.$(OBJEXT)
++	-rm -f src/sw_64/ffi.lo
++	-rm -f src/sw_64/sysv.$(OBJEXT)
++	-rm -f src/sw_64/sysv.lo
+ 	-rm -f src/tile/*.$(OBJEXT)
+ 	-rm -f src/tile/*.lo
+ 	-rm -f src/vax/*.$(OBJEXT)
+@@ -1211,6 +1227,8 @@ distclean-compile:
+ @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/ffi64.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/v8.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@src/sparc/$(DEPDIR)/v9.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@src/sw_64/$(DEPDIR)/ffi.Plo@am__quote@
++@AMDEP_TRUE@@am__include@ @am__quote@src/sw_64/$(DEPDIR)/sysv.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@src/tile/$(DEPDIR)/ffi.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@src/tile/$(DEPDIR)/tile.Plo@am__quote@
+ @AMDEP_TRUE@@am__include@ @am__quote@src/vax/$(DEPDIR)/elfbsd.Plo@am__quote@
+@@ -1307,6 +1325,7 @@ clean-libtool:
+ 	-rm -rf src/sh/.libs src/sh/_libs
+ 	-rm -rf src/sh64/.libs src/sh64/_libs
+ 	-rm -rf src/sparc/.libs src/sparc/_libs
++	-rm -rf src/sw_64/.libs src/sw_64/_libs
+ 	-rm -rf src/tile/.libs src/tile/_libs
+ 	-rm -rf src/vax/.libs src/vax/_libs
+ 	-rm -rf src/x86/.libs src/x86/_libs
+@@ -1669,6 +1688,8 @@ distclean-generic:
+ 	-rm -f src/sh64/$(am__dirstamp)
+ 	-rm -f src/sparc/$(DEPDIR)/$(am__dirstamp)
+ 	-rm -f src/sparc/$(am__dirstamp)
++	-rm -f src/sw_64/$(DEPDIR)/$(am__dirstamp)
++	-rm -f src/sw_64/$(am__dirstamp)
+ 	-rm -f src/tile/$(DEPDIR)/$(am__dirstamp)
+ 	-rm -f src/tile/$(am__dirstamp)
+ 	-rm -f src/vax/$(DEPDIR)/$(am__dirstamp)
+@@ -1691,7 +1712,7 @@ clean-am: clean-aminfo clean-generic clean-libtool clean-local \
+ 
+ distclean: distclean-recursive
+ 	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+-	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
++	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/sw_64/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
+ 	-rm -f Makefile
+ distclean-am: clean-am distclean-compile distclean-generic \
+ 	distclean-hdr distclean-libtool distclean-local distclean-tags
+@@ -1830,7 +1851,7 @@ installcheck-am:
+ maintainer-clean: maintainer-clean-recursive
+ 	-rm -f $(am__CONFIG_DISTCLEAN_FILES)
+ 	-rm -rf $(top_srcdir)/autom4te.cache
+-	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arc/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
++	-rm -rf src/$(DEPDIR) src/aarch64/$(DEPDIR) src/alpha/$(DEPDIR) src/arm/$(DEPDIR) src/avr32/$(DEPDIR) src/bfin/$(DEPDIR) src/cris/$(DEPDIR) src/frv/$(DEPDIR) src/ia64/$(DEPDIR) src/m32r/$(DEPDIR) src/m68k/$(DEPDIR) src/m88k/$(DEPDIR) src/metag/$(DEPDIR) src/microblaze/$(DEPDIR) src/mips/$(DEPDIR) src/moxie/$(DEPDIR) src/nios2/$(DEPDIR) src/or1k/$(DEPDIR) src/pa/$(DEPDIR) src/powerpc/$(DEPDIR) src/riscv/$(DEPDIR) src/s390/$(DEPDIR) src/sh/$(DEPDIR) src/sh64/$(DEPDIR) src/sparc/$(DEPDIR) src/sw_64/$(DEPDIR) src/tile/$(DEPDIR) src/vax/$(DEPDIR) src/x86/$(DEPDIR) src/xtensa/$(DEPDIR)
+ 	-rm -f Makefile
+ maintainer-clean-am: distclean-am maintainer-clean-aminfo \
+ 	maintainer-clean-generic maintainer-clean-local \
+diff --git a/libffi/configure.host b/libffi/configure.host
+index 786b32c5b..c9a3ecad6 100644
+--- a/libffi/configure.host
++++ b/libffi/configure.host
+@@ -219,6 +219,13 @@ case "${host}" in
+ 	SOURCES="ffi.c ffi64.c v8.S v9.S"
+ 	;;
+ 
++  sw_64*-*-*)
++	TARGET=SW_64; TARGETDIR=sw_64;
++	# Support 128-bit long double, changeable via command-line switch.
++	HAVE_LONG_DOUBLE='defined(__LONG_DOUBLE_128__)'
++	SOURCES="ffi.c sysv.S"
++	;;
++
+   tile*-*)
+         TARGET=TILE; TARGETDIR=tile
+ 	SOURCES="ffi.c tile.S"
+diff --git a/libffi/src/sw_64/ffi.c b/libffi/src/sw_64/ffi.c
+new file mode 100644
+index 000000000..c88264114
+--- /dev/null
++++ b/libffi/src/sw_64/ffi.c
+@@ -0,0 +1,516 @@
++/* -----------------------------------------------------------------------
++   ffi.c - Copyright (c) 2012  Anthony Green
++	   Copyright (c) 1998, 2001, 2007, 2008  Red Hat, Inc.
++
++   Sunway Foreign Function Interface
++
++   Permission is hereby granted, free of charge, to any person obtaining
++   a copy of this software and associated documentation files (the
++   ``Software''), to deal in the Software without restriction, including
++   without limitation the rights to use, copy, modify, merge, publish,
++   distribute, sublicense, and/or sell copies of the Software, and to
++   permit persons to whom the Software is furnished to do so, subject to
++   the following conditions:
++
++   The above copyright notice and this permission notice shall be included
++   in all copies or substantial portions of the Software.
++
++   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
++   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++   DEALINGS IN THE SOFTWARE.
++   ----------------------------------------------------------------------- */
++
++#include <ffi.h>
++#include <ffi_common.h>
++#include <stdlib.h>
++#include "internal.h"
++
++/* Force FFI_TYPE_LONGDOUBLE to be different than FFI_TYPE_DOUBLE;
++   all further uses in this file will refer to the 128-bit type.  */
++#if defined(__LONG_DOUBLE_128__)
++#if FFI_TYPE_LONGDOUBLE != 4
++#error FFI_TYPE_LONGDOUBLE out of date
++#endif
++#else
++#undef FFI_TYPE_LONGDOUBLE
++#define FFI_TYPE_LONGDOUBLE 4
++#endif
++
++extern void
++ffi_call_sysv (void *stack, void *frame, unsigned flags, void *raddr,
++	      void (*fn) (void), void *closure) FFI_HIDDEN;
++extern void
++ffi_closure_sysv (void) FFI_HIDDEN;
++extern void
++ffi_go_closure_sysv (void) FFI_HIDDEN;
++
++/* Promote a float value to its in-register double representation.
++   Unlike actually casting to double, this does not trap on NaN.  */
++static inline UINT64
++lds (void *ptr)
++{
++  UINT64 ret;
++  asm("flds %0,%1" : "=f"(ret) : "m"(*(UINT32 *) ptr));
++  return ret;
++}
++
++/* And the reverse.  */
++static inline void
++sts (void *ptr, UINT64 val)
++{
++  asm("fsts %1,%0" : "=m"(*(UINT32 *) ptr) : "f"(val));
++}
++
++ffi_status FFI_HIDDEN
++ffi_prep_cif_machdep (ffi_cif *cif)
++{
++  size_t bytes = 0;
++  int flags, i, avn;
++  ffi_type *rtype, *itype;
++
++  if (cif->abi != FFI_OSF)
++    return FFI_BAD_ABI;
++
++  /* Compute the size of the argument area.  */
++  for (i = 0, avn = cif->nargs; i < avn; i++)
++    {
++      itype = cif->arg_types[i];
++      switch (itype->type)
++	{
++	case FFI_TYPE_INT:
++	case FFI_TYPE_SINT8:
++	case FFI_TYPE_UINT8:
++	case FFI_TYPE_SINT16:
++	case FFI_TYPE_UINT16:
++	case FFI_TYPE_SINT32:
++	case FFI_TYPE_UINT32:
++	case FFI_TYPE_SINT64:
++	case FFI_TYPE_UINT64:
++	case FFI_TYPE_POINTER:
++	case FFI_TYPE_FLOAT:
++	case FFI_TYPE_DOUBLE:
++	case FFI_TYPE_LONGDOUBLE:
++	  /* All take one 8 byte slot.  */
++	  bytes += 8;
++	  break;
++
++	case FFI_TYPE_VOID:
++	case FFI_TYPE_STRUCT:
++	  /* Passed by value in N slots.  */
++	  bytes += ALIGN (itype->size, FFI_SIZEOF_ARG);
++	  break;
++
++	case FFI_TYPE_COMPLEX:
++	  /* _Complex long double passed by reference; others in 2 slots.  */
++	  if (itype->elements[0]->type == FFI_TYPE_LONGDOUBLE)
++	    bytes += 8;
++	  else
++	    bytes += 16;
++	  break;
++
++	default:
++	  abort ();
++	}
++    }
++
++  /* Set the return type flag */
++  rtype = cif->rtype;
++  switch (rtype->type)
++    {
++    case FFI_TYPE_VOID:
++      flags = SW_64_FLAGS (SW_64_ST_VOID, SW_64_LD_VOID);
++      break;
++    case FFI_TYPE_INT:
++    case FFI_TYPE_UINT32:
++    case FFI_TYPE_SINT32:
++      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT32);
++      break;
++    case FFI_TYPE_FLOAT:
++      flags = SW_64_FLAGS (SW_64_ST_FLOAT, SW_64_LD_FLOAT);
++      break;
++    case FFI_TYPE_DOUBLE:
++      flags = SW_64_FLAGS (SW_64_ST_DOUBLE, SW_64_LD_DOUBLE);
++      break;
++    case FFI_TYPE_UINT8:
++      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_UINT8);
++      break;
++    case FFI_TYPE_SINT8:
++      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_SINT8);
++      break;
++    case FFI_TYPE_UINT16:
++      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_UINT16);
++      break;
++    case FFI_TYPE_SINT16:
++      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_SINT16);
++      break;
++    case FFI_TYPE_UINT64:
++    case FFI_TYPE_SINT64:
++    case FFI_TYPE_POINTER:
++      flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT64);
++      break;
++    case FFI_TYPE_LONGDOUBLE:
++    case FFI_TYPE_STRUCT:
++      /* Passed in memory, with a hidden pointer.  */
++      flags = SW_64_RET_IN_MEM;
++      break;
++    case FFI_TYPE_COMPLEX:
++      itype = rtype->elements[0];
++      switch (itype->type)
++	{
++	case FFI_TYPE_FLOAT:
++	  flags = SW_64_FLAGS (SW_64_ST_CPLXF, SW_64_LD_CPLXF);
++	  break;
++	case FFI_TYPE_DOUBLE:
++	  flags = SW_64_FLAGS (SW_64_ST_CPLXD, SW_64_LD_CPLXD);
++	  break;
++	default:
++	  if (rtype->size <= 8)
++	    flags = SW_64_FLAGS (SW_64_ST_INT, SW_64_LD_INT64);
++	  else
++	    flags = SW_64_RET_IN_MEM;
++	  break;
++	}
++      break;
++    default:
++      abort ();
++    }
++  cif->flags = flags;
++
++  /* Include the hidden structure pointer in args requirement.  */
++  if (flags == SW_64_RET_IN_MEM)
++    bytes += 8;
++  /* Minimum size is 6 slots, so that ffi_call_sysv can pop them.  */
++  if (bytes < 6 * 8)
++    bytes = 6 * 8;
++  cif->bytes = bytes;
++
++  return FFI_OK;
++}
++
++static unsigned long
++extend_basic_type (void *valp, int type, int argn)
++{
++  switch (type)
++    {
++    case FFI_TYPE_SINT8:
++      return *(SINT8 *) valp;
++    case FFI_TYPE_UINT8:
++      return *(UINT8 *) valp;
++    case FFI_TYPE_SINT16:
++      return *(SINT16 *) valp;
++    case FFI_TYPE_UINT16:
++      return *(UINT16 *) valp;
++
++    case FFI_TYPE_FLOAT:
++      if (argn < 6)
++	return lds (valp);
++      /* FALLTHRU */
++
++    case FFI_TYPE_INT:
++    case FFI_TYPE_SINT32:
++    case FFI_TYPE_UINT32:
++      /* Note that unsigned 32-bit quantities are sign extended.  */
++      return *(SINT32 *) valp;
++
++    case FFI_TYPE_SINT64:
++    case FFI_TYPE_UINT64:
++    case FFI_TYPE_POINTER:
++    case FFI_TYPE_DOUBLE:
++      return *(UINT64 *) valp;
++
++    default:
++      abort ();
++    }
++}
++
++static void
++ffi_call_int (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
++	      void *closure)
++{
++  unsigned long *argp;
++  long i, avn, argn, flags = cif->flags;
++  ffi_type **arg_types;
++  void *frame;
++
++  /* If the return value is a struct and we don't have a return
++     value address then we need to make one.  */
++  if (rvalue == NULL && flags == SW_64_RET_IN_MEM)
++    rvalue = alloca (cif->rtype->size);
++
++  /* Allocate the space for the arguments, plus 4 words of temp
++     space for ffi_call_sysv.  */
++  argp = frame = alloca (cif->bytes + 4 * FFI_SIZEOF_ARG);
++  frame += cif->bytes;
++
++  argn = 0;
++  if (flags == SW_64_RET_IN_MEM)
++    argp[argn++] = (unsigned long) rvalue;
++
++  avn = cif->nargs;
++  arg_types = cif->arg_types;
++
++  for (i = 0, avn = cif->nargs; i < avn; i++)
++    {
++      ffi_type *ty = arg_types[i];
++      void *valp = avalue[i];
++      int type = ty->type;
++      size_t size;
++
++      switch (type)
++	{
++	case FFI_TYPE_INT:
++	case FFI_TYPE_SINT8:
++	case FFI_TYPE_UINT8:
++	case FFI_TYPE_SINT16:
++	case FFI_TYPE_UINT16:
++	case FFI_TYPE_SINT32:
++	case FFI_TYPE_UINT32:
++	case FFI_TYPE_SINT64:
++	case FFI_TYPE_UINT64:
++	case FFI_TYPE_POINTER:
++	case FFI_TYPE_FLOAT:
++	case FFI_TYPE_DOUBLE:
++	  argp[argn] = extend_basic_type (valp, type, argn);
++	  argn++;
++	  break;
++
++	case FFI_TYPE_LONGDOUBLE:
++	by_reference:
++	  /* Note that 128-bit long double is passed by reference.  */
++	  argp[argn++] = (unsigned long) valp;
++	  break;
++
++	case FFI_TYPE_VOID:
++	case FFI_TYPE_STRUCT:
++	  size = ty->size;
++	  memcpy (argp + argn, valp, size);
++	  argn += ALIGN (size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
++	  break;
++
++	case FFI_TYPE_COMPLEX:
++	  type = ty->elements[0]->type;
++	  if (type == FFI_TYPE_LONGDOUBLE)
++	    goto by_reference;
++
++	  /* Most complex types passed as two separate arguments.  */
++	  size = ty->elements[0]->size;
++	  argp[argn] = extend_basic_type (valp, type, argn);
++	  argp[argn + 1] = extend_basic_type (valp + size, type, argn + 1);
++	  argn += 2;
++	  break;
++
++	default:
++	  abort ();
++	}
++    }
++
++  flags = (flags >> SW_64_ST_SHIFT) & 0xff;
++  ffi_call_sysv (argp, frame, flags, rvalue, fn, closure);
++}
++
++void
++ffi_call (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue)
++{
++  ffi_call_int (cif, fn, rvalue, avalue, NULL);
++}
++
++void
++ffi_call_go (ffi_cif *cif, void (*fn) (void), void *rvalue, void **avalue,
++	     void *closure)
++{
++  ffi_call_int (cif, fn, rvalue, avalue, closure);
++}
++
++ffi_status
++ffi_prep_closure_loc (ffi_closure *closure, ffi_cif *cif,
++		      void (*fun) (ffi_cif *, void *, void **, void *),
++		      void *user_data, void *codeloc)
++{
++  unsigned int *tramp;
++
++  if (cif->abi != FFI_OSF)
++    return FFI_BAD_ABI;
++
++  tramp = (unsigned int *) &closure->tramp[0];
++  tramp[0] = 0x43fb0741; /* mov $27,$1	   */
++  tramp[1] = 0x8f7b0010; /* ldl $27,16($27)      */
++  tramp[2] = 0x0ffb0000; /* jmp $31,($27),0      */
++  tramp[3] = 0x43ff075f; /* nop		  */
++  *(void **) &tramp[4] = ffi_closure_sysv;
++
++  closure->cif = cif;
++  closure->fun = fun;
++  closure->user_data = user_data;
++
++  /* Flush the Icache. 0x86 is PAL_imb in Tru64 UNIX <sw_64/pal.h>.  */
++  asm volatile("sys_call 0x86" : : : "memory");
++
++  return FFI_OK;
++}
++
++ffi_status
++ffi_prep_go_closure (ffi_go_closure *closure, ffi_cif *cif,
++		     void (*fun) (ffi_cif *, void *, void **, void *))
++{
++  if (cif->abi != FFI_OSF)
++    return FFI_BAD_ABI;
++
++  closure->tramp = (void *) ffi_go_closure_sysv;
++  closure->cif = cif;
++  closure->fun = fun;
++
++  return FFI_OK;
++}
++
++long FFI_HIDDEN
++ffi_closure_sysv_inner (ffi_cif *cif,
++		       void (*fun) (ffi_cif *, void *, void **, void *),
++		       void *user_data, void *rvalue, unsigned long *argp)
++{
++  void **avalue;
++  ffi_type **arg_types;
++  long i, avn, argn, flags;
++
++  avalue = alloca (cif->nargs * sizeof (void *));
++  flags = cif->flags;
++  argn = 0;
++
++  /* Copy the caller's structure return address to that the closure
++     returns the data directly to the caller.  */
++  if (flags == SW_64_RET_IN_MEM)
++    {
++      rvalue = (void *) argp[0];
++      argn = 1;
++    }
++
++  arg_types = cif->arg_types;
++
++  /* Grab the addresses of the arguments from the stack frame.  */
++  for (i = 0, avn = cif->nargs; i < avn; i++)
++    {
++      ffi_type *ty = arg_types[i];
++      int type = ty->type;
++      void *valp = &argp[argn];
++      size_t size;
++
++      switch (type)
++	{
++	case FFI_TYPE_INT:
++	case FFI_TYPE_SINT8:
++	case FFI_TYPE_UINT8:
++	case FFI_TYPE_SINT16:
++	case FFI_TYPE_UINT16:
++	case FFI_TYPE_SINT32:
++	case FFI_TYPE_UINT32:
++	case FFI_TYPE_SINT64:
++	case FFI_TYPE_UINT64:
++	case FFI_TYPE_POINTER:
++	  argn += 1;
++	  break;
++
++	case FFI_TYPE_VOID:
++	case FFI_TYPE_STRUCT:
++	  size = ty->size;
++	  argn += ALIGN (size, FFI_SIZEOF_ARG) / FFI_SIZEOF_ARG;
++	  break;
++
++	case FFI_TYPE_FLOAT:
++	  /* Floats coming from registers need conversion from double
++	     back to float format.  */
++	  if (argn < 6)
++	    {
++	      valp = &argp[argn - 6];
++	      sts (valp, argp[argn - 6]);
++	    }
++	  argn += 1;
++	  break;
++
++	case FFI_TYPE_DOUBLE:
++	  if (argn < 6)
++	    valp = &argp[argn - 6];
++	  argn += 1;
++	  break;
++
++	case FFI_TYPE_LONGDOUBLE:
++	by_reference:
++	  /* 128-bit long double is passed by reference.  */
++	  valp = (void *) argp[argn];
++	  argn += 1;
++	  break;
++
++	case FFI_TYPE_COMPLEX:
++	  type = ty->elements[0]->type;
++	  switch (type)
++	    {
++	    case FFI_TYPE_SINT64:
++	    case FFI_TYPE_UINT64:
++	      /* Passed as separate arguments, but they wind up sequential.  */
++	      break;
++
++	    case FFI_TYPE_INT:
++	    case FFI_TYPE_SINT8:
++	    case FFI_TYPE_UINT8:
++	    case FFI_TYPE_SINT16:
++	    case FFI_TYPE_UINT16:
++	    case FFI_TYPE_SINT32:
++	    case FFI_TYPE_UINT32:
++	      /* Passed as separate arguments.  Disjoint, but there's room
++		 enough in one slot to hold the pair.  */
++	      size = ty->elements[0]->size;
++	      memcpy (valp + size, valp + 8, size);
++	      break;
++
++	    case FFI_TYPE_FLOAT:
++	      /* Passed as separate arguments.  Disjoint, and each piece
++		 may need conversion back to float.  */
++	      if (argn < 6)
++		{
++		  valp = &argp[argn - 6];
++		  sts (valp, argp[argn - 6]);
++		}
++	      if (argn + 1 < 6)
++		sts (valp + 4, argp[argn + 1 - 6]);
++	      else
++		*(UINT32 *) (valp + 4) = argp[argn + 1];
++	      break;
++
++	    case FFI_TYPE_DOUBLE:
++	      /* Passed as separate arguments.  Only disjoint if one part
++		 is in fp regs and the other is on the stack.  */
++	      if (argn < 5)
++		valp = &argp[argn - 6];
++	      else if (argn == 5)
++		{
++		  valp = alloca (16);
++		  ((UINT64 *) valp)[0] = argp[5 - 6];
++		  ((UINT64 *) valp)[1] = argp[6];
++		}
++	      break;
++
++	    case FFI_TYPE_LONGDOUBLE:
++	      goto by_reference;
++
++	    default:
++	      abort ();
++	    }
++	  argn += 2;
++	  break;
++
++	default:
++	  abort ();
++	}
++
++      avalue[i] = valp;
++    }
++
++  /* Invoke the closure.  */
++  fun (cif, rvalue, avalue, user_data);
++
++  /* Tell ffi_closure_sysv how to perform return type promotions.  */
++  return (flags >> SW_64_LD_SHIFT) & 0xff;
++}
+diff --git a/libffi/src/sw_64/ffitarget.h b/libffi/src/sw_64/ffitarget.h
+new file mode 100644
+index 000000000..f5792e1dd
+--- /dev/null
++++ b/libffi/src/sw_64/ffitarget.h
+@@ -0,0 +1,59 @@
++/* -----------------------------------------------------------------*-C-*-
++   ffitarget.h - Copyright (c) 2012  Anthony Green
++		 Copyright (c) 1996-2003  Red Hat, Inc.
++   Target configuration macros for Sunway.
++
++   Permission is hereby granted, free of charge, to any person obtaining
++   a copy of this software and associated documentation files (the
++   ``Software''), to deal in the Software without restriction, including
++   without limitation the rights to use, copy, modify, merge, publish,
++   distribute, sublicense, and/or sell copies of the Software, and to
++   permit persons to whom the Software is furnished to do so, subject to
++   the following conditions:
++
++   The above copyright notice and this permission notice shall be included
++   in all copies or substantial portions of the Software.
++
++   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
++   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++   DEALINGS IN THE SOFTWARE.
++
++   ----------------------------------------------------------------------- */
++
++#ifndef LIBFFI_TARGET_H
++#define LIBFFI_TARGET_H
++
++#ifndef LIBFFI_H
++#error									 \
++  "Please do not include ffitarget.h directly into your source.  Use ffi.h instead."
++#endif
++
++#ifndef LIBFFI_ASM
++typedef unsigned long ffi_arg;
++typedef signed long ffi_sarg;
++
++typedef enum ffi_abi
++{
++  FFI_FIRST_ABI = 0,
++  FFI_OSF,
++  FFI_LAST_ABI,
++  FFI_DEFAULT_ABI = FFI_OSF
++} ffi_abi;
++#endif
++
++#define FFI_TARGET_SPECIFIC_STACK_SPACE_ALLOCATION
++#define FFI_TARGET_HAS_COMPLEX_TYPE
++
++/* ---- Definitions for closures ----------------------------------------- */
++
++#define FFI_CLOSURES 1
++#define FFI_GO_CLOSURES 1
++#define FFI_TRAMPOLINE_SIZE 24
++#define FFI_NATIVE_RAW_API 0
++
++#endif
+diff --git a/libffi/src/sw_64/internal.h b/libffi/src/sw_64/internal.h
+new file mode 100644
+index 000000000..92ad32179
+--- /dev/null
++++ b/libffi/src/sw_64/internal.h
+@@ -0,0 +1,23 @@
++#define SW_64_ST_VOID 0
++#define SW_64_ST_INT 1
++#define SW_64_ST_FLOAT 2
++#define SW_64_ST_DOUBLE 3
++#define SW_64_ST_CPLXF 4
++#define SW_64_ST_CPLXD 5
++
++#define SW_64_LD_VOID 0
++#define SW_64_LD_INT64 1
++#define SW_64_LD_INT32 2
++#define SW_64_LD_UINT16 3
++#define SW_64_LD_SINT16 4
++#define SW_64_LD_UINT8 5
++#define SW_64_LD_SINT8 6
++#define SW_64_LD_FLOAT 7
++#define SW_64_LD_DOUBLE 8
++#define SW_64_LD_CPLXF 9
++#define SW_64_LD_CPLXD 10
++
++#define SW_64_ST_SHIFT 0
++#define SW_64_LD_SHIFT 8
++#define SW_64_RET_IN_MEM 0x10000
++#define SW_64_FLAGS(S, L) (((L) << SW_64_LD_SHIFT) | (S))
+diff --git a/libffi/src/sw_64/sysv.S b/libffi/src/sw_64/sysv.S
+new file mode 100644
+index 000000000..588cb6e76
+--- /dev/null
++++ b/libffi/src/sw_64/sysv.S
+@@ -0,0 +1,281 @@
++/* -----------------------------------------------------------------------
++   sysv.S - Copyright (c) 1998, 2001, 2007, 2008, 2011, 2014 Red Hat
++
++   Sunway/SYSV Foreign Function Interface
++
++   Permission is hereby granted, free of charge, to any person obtaining
++   a copy of this software and associated documentation files (the
++   ``Software''), to deal in the Software without restriction, including
++   without limitation the rights to use, copy, modify, merge, publish,
++   distribute, sublicense, and/or sell copies of the Software, and to
++   permit persons to whom the Software is furnished to do so, subject to
++   the following conditions:
++
++   The above copyright notice and this permission notice shall be included
++   in all copies or substantial portions of the Software.
++
++   THE SOFTWARE IS PROVIDED ``AS IS'', WITHOUT WARRANTY OF ANY KIND,
++   EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
++   NONINFRINGEMENT.  IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
++   HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
++   WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
++   OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
++   DEALINGS IN THE SOFTWARE.
++   ----------------------------------------------------------------------- */
++#define LIBFFI_ASM
++#include <fficonfig.h>
++#include <ffi.h>
++#include <ffi_cfi.h>
++#include "internal.h"
++
++	.arch sw6a
++	.text
++
++/* Aid in building a direct addressed jump table, 4 insns per entry.  */
++.macro E index
++	.align	4
++	.org	99b + \index * 16
++.endm
++
++/* ffi_call_sysv (void *stack, void *frame, unsigned flags,
++		 void *raddr, void (*fnaddr)(void), void *closure)
++
++   Bit o trickiness here -- FRAME is the base of the stack frame
++   for this function.  This has been allocated by ffi_call.  We also
++   deallocate some of the stack that has been alloca'd.  */
++
++	.align	4
++	.globl	ffi_call_sysv
++	.ent	ffi_call_sysv
++	FFI_HIDDEN(ffi_call_sysv)
++
++ffi_call_sysv:
++	cfi_startproc
++	cfi_def_cfa($17, 32)
++	mov	$16, $30
++	stl	$26, 0($17)
++	stl	$15, 8($17)
++	mov	$17, $15
++	.prologue 0
++	cfi_def_cfa_register($15)
++	cfi_rel_offset($26, 0)
++	cfi_rel_offset($15, 8)
++
++	stl	$18, 16($17)		# save flags into frame
++	stl	$19, 24($17)		# save rvalue into frame
++	mov	$20, $27		# fn into place for call
++	mov	$21, $1			# closure into static chain
++
++	# Load up all of the (potential) argument registers.
++	ldl	$16, 0($30)
++	fldd	$f16, 0($30)
++	fldd	$f17, 8($30)
++	ldl	$17, 8($30)
++	fldd	$f18, 16($30)
++	ldl	$18, 16($30)
++	fldd	$f19, 24($30)
++	ldl	$19, 24($30)
++	fldd	$f20, 32($30)
++	ldl	$20, 32($30)
++	fldd	$f21, 40($30)
++	ldl	$21, 40($30)
++
++	# Deallocate the register argument area.
++	ldi	$30, 48($30)
++
++	call	$26, ($27), 0
++0:
++	ldih	$29, 0($26)		!gpdisp!1
++	ldl	$2, 24($15)		# reload rvalue
++	ldi	$29, 0($29)		!gpdisp!1
++	ldl	$3, 16($15)		# reload flags
++	ldi	$1, 99f-0b($26)
++	ldl	$26, 0($15)
++	ldl	$15, 8($15)
++	cfi_restore($26)
++	cfi_restore($15)
++	cfi_def_cfa($sp, 0)
++	seleq	$2, 0, $3	# mash null rvalue to void
++	addl	$3, $3, $3
++	s8addl	$3, $1, $1		# 99f + stcode * 16
++	jmp	$31, ($1), $st_int
++
++	.align	4
++99:
++E 0
++	ret
++E 1
++$st_int:
++	stl	$0, 0($2)
++	ret
++E 2
++	fsts	$f0, 0($2)
++	ret
++E 4
++	fstd	$f0, 0($2)
++	ret
++E 6
++	fsts	$f0, 0($2)
++	fsts	$f1, 4($2)
++	ret
++E 10
++	fstd	$f0, 0($2)
++	fstd	$f1, 8($2)
++	ret
++
++	cfi_endproc
++	.end	ffi_call_sysv
++
++/* ffi_closure_sysv(...)
++
++   Receives the closure argument in $1.   */
++
++#define CLOSURE_FS	(16*8)
++
++	.align	4
++	.globl	ffi_go_closure_sysv
++	.ent	ffi_go_closure_sysv
++	FFI_HIDDEN(ffi_go_closure_sysv)
++
++ffi_go_closure_sysv:
++	cfi_startproc
++	ldgp	$29, 0($27)
++	subl	$30, CLOSURE_FS, $30
++	cfi_adjust_cfa_offset(CLOSURE_FS)
++	stl	$26, 0($30)
++	.prologue 1
++	cfi_rel_offset($26, 0)
++
++	stl	$16, 10*8($30)
++	stl	$17, 11*8($30)
++	stl	$18, 12*8($30)
++
++	ldl	$16, 8($1)			# load cif
++	ldl	$17, 16($1)			# load fun
++	mov	$1, $18				# closure is user_data
++	br	$do_closure
++
++	cfi_endproc
++	.end	ffi_go_closure_sysv
++
++	.align	4
++	.globl	ffi_closure_sysv
++	.ent	ffi_closure_sysv
++	FFI_HIDDEN(ffi_closure_sysv)
++
++ffi_closure_sysv:
++	cfi_startproc
++	ldgp	$29, 0($27)
++	subl	$30, CLOSURE_FS, $30
++	cfi_adjust_cfa_offset(CLOSURE_FS)
++	stl	$26, 0($30)
++	.prologue 1
++	cfi_rel_offset($26, 0)
++
++	# Store all of the potential argument registers in va_list format.
++	stl	$16, 10*8($30)
++	stl	$17, 11*8($30)
++	stl	$18, 12*8($30)
++
++	ldl	$16, 24($1)			# load cif
++	ldl	$17, 32($1)			# load fun
++	ldl	$18, 40($1)			# load user_data
++
++$do_closure:
++	stl	$19, 13*8($30)
++	stl	$20, 14*8($30)
++	stl	$21, 15*8($30)
++	fstd	$f16, 4*8($30)
++	fstd	$f17, 5*8($30)
++	fstd	$f18, 6*8($30)
++	fstd	$f19, 7*8($30)
++	fstd	$f20, 8*8($30)
++	fstd	$f21, 9*8($30)
++
++	# Call ffi_closure_sysv_inner to do the bulk of the work.
++	ldi	$19, 2*8($30)
++	ldi	$20, 10*8($30)
++	call	$26, ffi_closure_sysv_inner
++0:
++	ldih	$29, 0($26)			!gpdisp!2
++	ldi	$2, 99f-0b($26)
++	s4addl	$0, 0, $1			# ldcode * 4
++	ldl	$0, 16($30)			# preload return value
++	s4addl	$1, $2, $1			# 99f + ldcode * 16
++	ldi	$29, 0($29)			!gpdisp!2
++	ldl	$26, 0($30)
++	cfi_restore($26)
++	jmp	$31, ($1), $load_32
++
++.macro epilogue
++	addl	$30, CLOSURE_FS, $30
++	cfi_adjust_cfa_offset(-CLOSURE_FS)
++	ret
++	.align	4
++	cfi_adjust_cfa_offset(CLOSURE_FS)
++.endm
++
++	.align 4
++99:
++E 0
++	epilogue
++
++E 1
++	epilogue
++
++E 2
++$load_32:
++	sextl	$0, $0
++	epilogue
++
++E 3
++	zapnot	$0, 3, $0
++	epilogue
++
++E 4
++#ifdef __sw_64_bwx__
++	sexth	$0, $0
++#else
++	sll	$0, 48, $0
++	sra	$0, 48, $0
++#endif
++	epilogue
++
++E 5
++	and	$0, 0xff, $0
++	epilogue
++
++E 6
++#ifdef __sw_64_bwx__
++	sextb	$0, $0
++#else
++	sll	$0, 56, $0
++	sra	$0, 56, $0
++#endif
++	epilogue
++
++E 7
++	flds	$f0, 16($sp)
++	epilogue
++
++E 8
++	fldd	$f0, 16($sp)
++	epilogue
++
++E 9
++	flds	$f0, 16($sp)
++	flds	$f1, 20($sp)
++	epilogue
++
++E 10
++	fldd	$f0, 16($sp)
++	fldd	$f1, 24($sp)
++	epilogue
++
++	cfi_endproc
++	.end	ffi_closure_sysv
++
++#if defined __ELF__ && defined __linux__
++	.section	.note.GNU-stack,"",@progbits
++#endif
+diff --git a/libffi/src/types.c b/libffi/src/types.c
+index 7e80aec6e..9ff182e35 100644
+--- a/libffi/src/types.c
++++ b/libffi/src/types.c
+@@ -78,13 +78,13 @@ FFI_TYPEDEF(pointer, void*, FFI_TYPE_POINTER, const);
+ FFI_TYPEDEF(float, float, FFI_TYPE_FLOAT, const);
+ FFI_TYPEDEF(double, double, FFI_TYPE_DOUBLE, const);
+ 
+-#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha__
++#if !defined HAVE_LONG_DOUBLE_VARIANT || defined __alpha_ || defined __sw_64___
+ #define FFI_LDBL_CONST const
+ #else
+ #define FFI_LDBL_CONST
+ #endif
+ 
+-#ifdef __alpha__
++#if defined __alpha__ || defined __sw_64__
+ /* Even if we're not configured to default to 128-bit long double, 
+    maintain binary compatibility, as -mlong-double-128 can be used
+    at any time.  */
+-- 
+2.43.0
+
diff --git a/0005-Sw64-Port-libgcc.patch b/0005-Sw64-Port-libgcc.patch
new file mode 100644
index 0000000..22cf488
--- /dev/null
+++ b/0005-Sw64-Port-libgcc.patch
@@ -0,0 +1,498 @@
+From 12fe7e3f3a99735e74d4a602e78d7d84d382c43b Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:01:11 +0800
+Subject: [PATCH 05/13] Sw64 Port: libgcc
+
+---
+ libgcc/config.host                        |  18 +++
+ libgcc/config/sw_64/crtfastmath.c         |  36 +++++
+ libgcc/config/sw_64/libgcc-sw_64-ldbl.ver |  50 ++++++
+ libgcc/config/sw_64/linux-unwind.h        | 103 ++++++++++++
+ libgcc/config/sw_64/qrnnd.S               | 181 ++++++++++++++++++++++
+ libgcc/config/sw_64/t-ieee                |   2 +
+ libgcc/config/sw_64/t-linux               |   2 +
+ libgcc/config/sw_64/t-sw_64               |   6 +
+ libgcc/libgcc2.c                          |   2 +-
+ 9 files changed, 399 insertions(+), 1 deletion(-)
+ create mode 100644 libgcc/config/sw_64/crtfastmath.c
+ create mode 100644 libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
+ create mode 100644 libgcc/config/sw_64/linux-unwind.h
+ create mode 100644 libgcc/config/sw_64/qrnnd.S
+ create mode 100644 libgcc/config/sw_64/t-ieee
+ create mode 100644 libgcc/config/sw_64/t-linux
+ create mode 100644 libgcc/config/sw_64/t-sw_64
+
+diff --git a/libgcc/config.host b/libgcc/config.host
+index c529cc40f..ba196609e 100644
+--- a/libgcc/config.host
++++ b/libgcc/config.host
+@@ -199,6 +199,9 @@ s390*-*-*)
+ sh[123456789lbe]*-*-*)
+ 	cpu_type=sh
+ 	;;
++sw_64*-*-*)
++	cpu_type=sw_64
++	;;
+ tilegx*-*-*)
+ 	cpu_type=tilegx
+ 	;;
+@@ -1424,6 +1427,21 @@ sparc64-*-linux*)		# 64-bit SPARC's running GNU/Linux
+ 	;;
+ sparc64-*-netbsd*)
+ 	;;
++sw_64*-*-linux*)
++	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm sw_64/t-linux"
++	extra_parts="$extra_parts crtfastmath.o"
++	md_unwind_header=sw_64/linux-unwind.h
++	;;
++sw_64*-*-freebsd*)
++	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee t-crtfm"
++	extra_parts="$extra_parts crtbeginT.o crtfastmath.o"
++	;;
++sw_64*-*-netbsd*)
++	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee"
++	;;
++sw_64*-*-openbsd*)
++	tmake_file="${tmake_file} sw_64/t-sw_64 sw_64/t-ieee"
++	;;
+ tic6x-*-uclinux)
+ 	tmake_file="${tmake_file} t-softfp-sfdf t-softfp-excl t-softfp \
+ 		c6x/t-elf  c6x/t-uclinux t-crtstuff-pic t-libgcc-pic \
+diff --git a/libgcc/config/sw_64/crtfastmath.c b/libgcc/config/sw_64/crtfastmath.c
+new file mode 100644
+index 000000000..1cd890458
+--- /dev/null
++++ b/libgcc/config/sw_64/crtfastmath.c
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (C) 2001-2020 Free Software Foundation, Inc.
++ * Contributed by Richard Henderson (rth@redhat.com)
++ *
++ * This file is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the
++ * Free Software Foundation; either version 3, or (at your option) any
++ * later version.
++ *
++ * This file is distributed in the hope that it will be useful, but
++ * WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * General Public License for more details.
++ *
++ * Under Section 7 of GPL version 3, you are granted additional
++ * permissions described in the GCC Runtime Library Exception, version
++ * 3.1, as published by the Free Software Foundation.
++ *
++ * You should have received a copy of the GNU General Public License and
++ * a copy of the GCC Runtime Library Exception along with this program;
++ * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++ * <http://www.gnu.org/licenses/>.
++ */
++
++/* Assume SYSV/1 compatible interfaces.  */
++
++extern void
++__ieee_set_fp_control (unsigned long int);
++
++#define IEEE_MAP_DMZ (1UL << 12) /* Map denorm inputs to zero */
++#define IEEE_MAP_UMZ (1UL << 13) /* Map underflowed outputs to zero */
++
++static void __attribute__ ((constructor)) set_fast_math (void)
++{
++  __ieee_set_fp_control (IEEE_MAP_DMZ | IEEE_MAP_UMZ);
++}
+diff --git a/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
+new file mode 100644
+index 000000000..21f259687
+--- /dev/null
++++ b/libgcc/config/sw_64/libgcc-sw_64-ldbl.ver
+@@ -0,0 +1,50 @@
++# Copyright (C) 2006-2020 Free Software Foundation, Inc.
++#
++# This file is part of GCC.
++#
++# GCC is free software; you can redistribute it and/or modify
++# it under the terms of the GNU General Public License as published by
++# the Free Software Foundation; either version 3, or (at your option)
++# any later version.
++#
++# GCC is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++# GNU General Public License for more details.
++#
++# You should have received a copy of the GNU General Public License
++# along with GCC; see the file COPYING3.  If not see
++# <http://www.gnu.org/licenses/>.
++
++%ifdef __LONG_DOUBLE_128__
++
++# long double 128 bit support in libgcc_s.so.1 is only available
++# when configured with --with-long-double-128.  Make sure all the
++# symbols are available at @@GCC_LDBL_* versions to make it clear
++# there is a configurable symbol set.
++
++%exclude {
++  __fixtfdi
++  __fixunstfdi
++  __floatditf
++
++  __divtc3
++  __multc3
++  __powitf2
++}
++
++%inherit GCC_LDBL_3.0 GCC_3.0
++GCC_LDBL_3.0 {
++  __fixtfdi
++  __fixunstfdi
++  __floatditf
++}
++
++%inherit GCC_LDBL_4.0.0 GCC_4.0.0
++GCC_LDBL_4.0.0 {
++  __divtc3
++  __multc3
++  __powitf2
++}
++
++%endif
+diff --git a/libgcc/config/sw_64/linux-unwind.h b/libgcc/config/sw_64/linux-unwind.h
+new file mode 100644
+index 000000000..79da6a16a
+--- /dev/null
++++ b/libgcc/config/sw_64/linux-unwind.h
+@@ -0,0 +1,103 @@
++/* DWARF2 EH unwinding support for Sw_64 Linux.
++   Copyright (C) 2004-2020 Free Software Foundation, Inc.
++
++This file is part of GCC.
++
++GCC is free software; you can redistribute it and/or modify
++it under the terms of the GNU General Public License as published by
++the Free Software Foundation; either version 3, or (at your option)
++any later version.
++
++GCC is distributed in the hope that it will be useful,
++but WITHOUT ANY WARRANTY; without even the implied warranty of
++MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++GNU General Public License for more details.
++
++Under Section 7 of GPL version 3, you are granted additional
++permissions described in the GCC Runtime Library Exception, version
++3.1, as published by the Free Software Foundation.
++
++You should have received a copy of the GNU General Public License and
++a copy of the GCC Runtime Library Exception along with this program;
++see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++<http://www.gnu.org/licenses/>.  */
++
++#ifndef inhibit_libc
++/* Do code reading to identify a signal frame, and set the frame
++   state data appropriately.  See unwind-dw2.c for the structs.  */
++
++#include <signal.h>
++#include <sys/ucontext.h>
++
++#define MD_FALLBACK_FRAME_STATE_FOR sw_64_fallback_frame_state
++
++static _Unwind_Reason_Code
++sw_64_fallback_frame_state (struct _Unwind_Context *context,
++			    _Unwind_FrameState *fs)
++{
++  unsigned int *pc = context->ra;
++  struct sigcontext *sc;
++  long new_cfa;
++  int i;
++
++  if (pc[0] != 0x47fe0410     /* mov $30,$16 */
++      || pc[2] != 0x00000083) /* callsys */
++    return _URC_END_OF_STACK;
++  if (context->cfa == 0)
++    return _URC_END_OF_STACK;
++  if (pc[1] == 0x201f0067) /* lda $0,NR_sigreturn */
++    sc = context->cfa;
++  else if (pc[1] == 0x201f015f) /* lda $0,NR_rt_sigreturn */
++    {
++      struct rt_sigframe
++      {
++	siginfo_t info;
++	ucontext_t uc;
++      } *rt_ = context->cfa;
++      /* The void * cast is necessary to avoid an aliasing warning.
++	 The aliasing warning is correct, but should not be a problem
++	 because it does not alias anything.  */
++      sc = (struct sigcontext *) (void *) &rt_->uc.uc_mcontext;
++    }
++  else
++    return _URC_END_OF_STACK;
++
++  new_cfa = sc->sc_regs[30];
++  fs->regs.cfa_how = CFA_REG_OFFSET;
++  fs->regs.cfa_reg = 30;
++  fs->regs.cfa_offset = new_cfa - (long) context->cfa;
++  for (i = 0; i < 30; ++i)
++    {
++      fs->regs.reg[i].how = REG_SAVED_OFFSET;
++      fs->regs.reg[i].loc.offset = (long) &sc->sc_regs[i] - new_cfa;
++    }
++  for (i = 0; i < 31; ++i)
++    {
++      fs->regs.reg[i + 32].how = REG_SAVED_OFFSET;
++      fs->regs.reg[i + 32].loc.offset = (long) &sc->sc_fpregs[i] - new_cfa;
++    }
++  fs->regs.reg[64].how = REG_SAVED_OFFSET;
++  fs->regs.reg[64].loc.offset = (long) &sc->sc_pc - new_cfa;
++  fs->retaddr_column = 64;
++  fs->signal_frame = 1;
++
++  return _URC_NO_REASON;
++}
++
++#define MD_FROB_UPDATE_CONTEXT sw_64_frob_update_context
++
++/* Fix up for signal handlers that don't have S flag set.  */
++
++static void
++sw_64_frob_update_context (struct _Unwind_Context *context,
++			   _Unwind_FrameState *fs ATTRIBUTE_UNUSED)
++{
++  unsigned int *pc = context->ra;
++
++  if (pc[0] == 0x47fe0410	  /* mov $30,$16 */
++      && pc[2] == 0x00000083       /* callsys */
++      && (pc[1] == 0x201f0067      /* lda $0,NR_sigreturn */
++	  || pc[1] == 0x201f015f)) /* lda $0,NR_rt_sigreturn */
++    _Unwind_SetSignalFrame (context, 1);
++}
++#endif
+diff --git a/libgcc/config/sw_64/qrnnd.S b/libgcc/config/sw_64/qrnnd.S
+new file mode 100644
+index 000000000..d22b31b4e
+--- /dev/null
++++ b/libgcc/config/sw_64/qrnnd.S
+@@ -0,0 +1,181 @@
++ # Sw_64 __udiv_qrnnd
++ # Copyright (C) 1992-2020 Free Software Foundation, Inc.
++
++ # This file is part of GCC.
++
++ # The GNU MP Library is free software; you can redistribute it and/or modify
++ # it under the terms of the GNU General Public License as published by
++ # the Free Software Foundation; either version 3 of the License, or (at your
++ # option) any later version.
++
++ # This file is distributed in the hope that it will be useful, but
++ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
++ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
++ # License for more details.
++
++ # Under Section 7 of GPL version 3, you are granted additional
++ # permissions described in the GCC Runtime Library Exception, version
++ # 3.1, as published by the Free Software Foundation.
++
++ # You should have received a copy of the GNU General Public License and
++ # a copy of the GCC Runtime Library Exception along with this program;
++ # see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++ # <http://www.gnu.org/licenses/>.
++
++#ifdef __ELF__
++.section .note.GNU-stack,""
++#endif
++
++	.set noreorder
++	.set noat
++
++	.text
++
++	.globl __udiv_qrnnd
++	.ent __udiv_qrnnd
++#ifdef __VMS__
++__udiv_qrnnd..en:
++	.frame $29,0,$26,0
++	.prologue
++#else
++__udiv_qrnnd:
++	.frame $30,0,$26,0
++	.prologue 0
++#endif
++/*
++	ldiq -> ldi
++	addq->addl
++	subq->subl
++	cmovne qb,tmp,n1->selne qb,tmp,n1,n1
++	stq ->stl
++	cmoveq tmp,AT,n1(n0)->seleq tmp,AT,n1,n1(n0,n0)   */
++#define cnt	$2
++#define tmp	$3
++#define rem_ptr	$16
++#define n1	$17
++#define n0	$18
++#define d	$19
++#define qb	$20
++#define AT	$at
++
++	ldi	cnt,16
++	blt	d,$largedivisor
++
++$loop1:	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	d,n1,qb
++	subl	n1,d,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	d,n1,qb
++	subl	n1,d,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	d,n1,qb
++	subl	n1,d,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	d,n1,qb
++	subl	n1,d,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	subl	cnt,1,cnt
++	bgt	cnt,$loop1
++	stl	n1,0(rem_ptr)
++	bis	$31,n0,$0
++	ret	$31,($26),1
++
++$largedivisor:
++	and	n0,1,$4
++
++	srl	n0,1,n0
++	sll	n1,63,tmp
++	or	tmp,n0,n0
++	srl	n1,1,n1
++
++	and	d,1,$6
++	srl	d,1,$5
++	addl	$5,$6,$5
++
++$loop2:	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	$5,n1,qb
++	subl	n1,$5,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	$5,n1,qb
++	subl	n1,$5,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	$5,n1,qb
++	subl	n1,$5,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	cmplt	n0,0,tmp
++	addl	n1,n1,n1
++	bis	n1,tmp,n1
++	addl	n0,n0,n0
++	cmpule	$5,n1,qb
++	subl	n1,$5,tmp
++	selne	qb,tmp,n1,n1
++	bis	n0,qb,n0
++	subl	cnt,1,cnt
++	bgt	cnt,$loop2
++
++	addl	n1,n1,n1
++	addl	$4,n1,n1
++	bne	$6,$Odd
++	stl	n1,0(rem_ptr)
++	bis	$31,n0,$0
++	ret	$31,($26),1
++
++$Odd:
++	/* q' in n0. r' in n1 */
++	addl	n1,n0,n1
++
++	cmpult	n1,n0,tmp	# tmp := carry from addl
++	subl	n1,d,AT
++	addl	n0,tmp,n0
++	selne	tmp,AT,n1,n1
++
++	cmpult	n1,d,tmp
++	addl	n0,1,AT
++	seleq	tmp,AT,n0,n0
++	subl	n1,d,AT
++	seleq	tmp,AT,n1,n1
++
++	stl	n1,0(rem_ptr)
++	bis	$31,n0,$0
++	ret	$31,($26),1
++
++#ifdef __VMS__
++	.link
++	.align 3
++__udiv_qrnnd:
++	.pdesc	__udiv_qrnnd..en,null
++#endif
++	.end	__udiv_qrnnd
+diff --git a/libgcc/config/sw_64/t-ieee b/libgcc/config/sw_64/t-ieee
+new file mode 100644
+index 000000000..9b66e50ac
+--- /dev/null
++++ b/libgcc/config/sw_64/t-ieee
+@@ -0,0 +1,2 @@
++# All sw_64s get an IEEE complaint set of libraries.
++#HOST_LIBGCC2_CFLAGS += -mieee
+diff --git a/libgcc/config/sw_64/t-linux b/libgcc/config/sw_64/t-linux
+new file mode 100644
+index 000000000..0b7b7e6a1
+--- /dev/null
++++ b/libgcc/config/sw_64/t-linux
+@@ -0,0 +1,2 @@
++SHLIB_MAPFILES += $(srcdir)/config/sw_64/libgcc-sw_64-ldbl.ver
++
+diff --git a/libgcc/config/sw_64/t-sw_64 b/libgcc/config/sw_64/t-sw_64
+new file mode 100644
+index 000000000..dffba8ee7
+--- /dev/null
++++ b/libgcc/config/sw_64/t-sw_64
+@@ -0,0 +1,6 @@
++# This is a support routine for longlong.h, used by libgcc2.c.
++LIB2ADD += $(srcdir)/config/sw_64/qrnnd.S
++
++# When GAS-generated unwind tables are created, they get created
++# after the __FRAME_END__ terminator, which causes an ld error.
++CRTSTUFF_T_CFLAGS = -fno-unwind-tables
+diff --git a/libgcc/libgcc2.c b/libgcc/libgcc2.c
+index e0a9fd712..50aa1bf06 100644
+--- a/libgcc/libgcc2.c
++++ b/libgcc/libgcc2.c
+@@ -2187,7 +2187,7 @@ int mprotect (char *,int, int);
+ int
+ getpagesize (void)
+ {
+-#ifdef _ALPHA_
++#if defined _ALPHA_ || defined _SW_64_
+   return 8192;
+ #else
+   return 4096;
+-- 
+2.43.0
+
diff --git a/0006-Sw64-Port-libgfortran.patch b/0006-Sw64-Port-libgfortran.patch
new file mode 100644
index 0000000..8138768
--- /dev/null
+++ b/0006-Sw64-Port-libgfortran.patch
@@ -0,0 +1,55 @@
+From 730935d32c3203e65532fac66e0621769835c9a4 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:01:41 +0800
+Subject: [PATCH 06/13] Sw64 Port: libgfortran
+
+---
+ libgfortran/config/fpu-glibc.h | 6 +++---
+ libgfortran/configure.host     | 2 ++
+ 2 files changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/libgfortran/config/fpu-glibc.h b/libgfortran/config/fpu-glibc.h
+index 2abb0da6b..f41530597 100644
+--- a/libgfortran/config/fpu-glibc.h
++++ b/libgfortran/config/fpu-glibc.h
+@@ -446,7 +446,7 @@ set_fpu_state (void *state)
+ int
+ support_fpu_underflow_control (int kind __attribute__((unused)))
+ {
+-#if defined(__alpha__) && defined(FE_MAP_UMZ)
++#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ)
+   return (kind == 4 || kind == 8) ? 1 : 0;
+ #else
+   return 0;
+@@ -457,7 +457,7 @@ support_fpu_underflow_control (int kind __attribute__((unused)))
+ int
+ get_fpu_underflow_mode (void)
+ {
+-#if defined(__alpha__) && defined(FE_MAP_UMZ)
++#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ)
+ 
+   fenv_t state = __ieee_get_fp_control ();
+ 
+@@ -475,7 +475,7 @@ get_fpu_underflow_mode (void)
+ void
+ set_fpu_underflow_mode (int gradual __attribute__((unused)))
+ {
+-#if defined(__alpha__) && defined(FE_MAP_UMZ)
++#if (defined(__alpha__)|| defined(__sw_64__)) && defined(FE_MAP_UMZ)
+ 
+   fenv_t state = __ieee_get_fp_control ();
+ 
+diff --git a/libgfortran/configure.host b/libgfortran/configure.host
+index 5824f253e..85407b61e 100644
+--- a/libgfortran/configure.host
++++ b/libgfortran/configure.host
+@@ -56,4 +56,6 @@ case "${host_cpu}" in
+     ieee_flags="-mieee" ;;
+   sh*)
+     ieee_flags="-mieee" ;;
++  sw_64*)
++    ieee_flags="-mieee" ;;
+ esac
+-- 
+2.43.0
+
diff --git a/0007-Sw64-Port-libgo.patch b/0007-Sw64-Port-libgo.patch
new file mode 100644
index 0000000..8f28250
--- /dev/null
+++ b/0007-Sw64-Port-libgo.patch
@@ -0,0 +1,565 @@
+From 56d91a1b8837b20c8e73034928dfe54caf598dbd Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:02:13 +0800
+Subject: [PATCH 07/13] Sw64 Port: libgo
+
+---
+ libgo/configure                               |  7 +-
+ libgo/configure.ac                            |  7 +-
+ libgo/go/cmd/cgo/main.go                      |  2 +
+ libgo/go/cmd/internal/sys/arch.go             | 12 +++-
+ libgo/go/debug/elf/elf.go                     | 72 +++++++++++++++++++
+ libgo/go/debug/elf/elf_test.go                |  1 +
+ libgo/go/debug/elf/file.go                    | 49 +++++++++++++
+ libgo/go/encoding/xml/xml.go                  |  1 +
+ libgo/go/go/build/syslist.go                  |  2 +-
+ .../syscall/unix/getrandom_linux_sw_64.go     |  9 +++
+ libgo/go/net/listen_test.go                   |  2 +-
+ libgo/go/regexp/testdata/basic.dat            |  1 +
+ libgo/go/runtime/hash64.go                    |  2 +-
+ libgo/go/runtime/lfstack_64bit.go             |  2 +-
+ libgo/go/runtime/mpagealloc_64bit.go          |  2 +-
+ libgo/go/syscall/endian_little.go             |  2 +-
+ libgo/go/syscall/libcall_linux_sw_64.go       | 13 ++++
+ libgo/go/syscall/syscall_linux_sw_64.go       | 25 +++++++
+ libgo/goarch.sh                               |  5 ++
+ libgo/match.sh                                |  4 +-
+ libgo/mksysinfo.sh                            |  5 ++
+ libgo/runtime/go-signal.c                     |  4 +-
+ 22 files changed, 215 insertions(+), 14 deletions(-)
+ create mode 100644 libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
+ create mode 100644 libgo/go/syscall/libcall_linux_sw_64.go
+ create mode 100644 libgo/go/syscall/syscall_linux_sw_64.go
+
+diff --git a/libgo/configure b/libgo/configure
+index 2f787392a..51cff79ba 100644
+--- a/libgo/configure
++++ b/libgo/configure
+@@ -14070,10 +14070,10 @@ esac
+ #   - libgo/go/syscall/endian_XX.go
+ #   - possibly others
+ # - possibly update files in libgo/go/internal/syscall/unix
+-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm"
++ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm"
+ 
+ # All known GOARCH family values.
+-ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM"
++ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM"
+ 
+ GOARCH=unknown
+ case ${host} in
+@@ -14256,6 +14256,9 @@ else
+ fi
+ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+     ;;
++  sw_64*-*-*)
++    GOARCH=sw_64
++    ;;
+ esac
+ 
+ 
+diff --git a/libgo/configure.ac b/libgo/configure.ac
+index f800d44a0..91cfe3513 100644
+--- a/libgo/configure.ac
++++ b/libgo/configure.ac
+@@ -236,10 +236,10 @@ AC_SUBST(USE_DEJAGNU)
+ #   - libgo/go/syscall/endian_XX.go
+ #   - possibly others
+ # - possibly update files in libgo/go/internal/syscall/unix
+-ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 wasm"
++ALLGOARCH="386 alpha amd64 amd64p32 arm armbe arm64 arm64be ia64 m68k mips mipsle mips64 mips64le mips64p32 mips64p32le nios2 ppc ppc64 ppc64le riscv riscv64 s390 s390x sh shbe sparc sparc64 sw_64 wasm"
+ 
+ # All known GOARCH family values.
+-ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 WASM"
++ALLGOARCHFAMILY="I386 ALPHA AMD64 ARM ARM64 IA64 M68K MIPS MIPS64 NIOS2 PPC PPC64 RISCV RISCV64 S390 S390X SH SPARC SPARC64 SW_64 WASM"
+ 
+ GOARCH=unknown
+ case ${host} in
+@@ -361,6 +361,9 @@ AC_COMPILE_IFELSE([AC_LANG_SOURCE([
+ [GOARCH=sparc],
+ [GOARCH=sparc64])
+     ;;
++  sw_64*-*-*)
++    GOARCH=sw_64
++    ;;
+ esac
+ AC_SUBST(GOARCH)
+ AC_SUBST(ALLGOARCH)
+diff --git a/libgo/go/cmd/cgo/main.go b/libgo/go/cmd/cgo/main.go
+index 80f35681d..366abd106 100644
+--- a/libgo/go/cmd/cgo/main.go
++++ b/libgo/go/cmd/cgo/main.go
+@@ -191,6 +191,7 @@ var ptrSizeMap = map[string]int64{
+ 	"shbe":        4,
+ 	"sparc":       4,
+ 	"sparc64":     8,
++	"sw_64":       8,
+ }
+ 
+ var intSizeMap = map[string]int64{
+@@ -217,6 +218,7 @@ var intSizeMap = map[string]int64{
+ 	"shbe":        4,
+ 	"sparc":       4,
+ 	"sparc64":     8,
++	"sw_64":       8,
+ }
+ 
+ var cPrefix string
+diff --git a/libgo/go/cmd/internal/sys/arch.go b/libgo/go/cmd/internal/sys/arch.go
+index e8687363d..604bbec61 100644
+--- a/libgo/go/cmd/internal/sys/arch.go
++++ b/libgo/go/cmd/internal/sys/arch.go
+@@ -12,6 +12,7 @@ type ArchFamily byte
+ 
+ const (
+ 	NoArch ArchFamily = iota
++	SW_64
+ 	AMD64
+ 	ARM
+ 	ARM64
+@@ -169,8 +170,17 @@ var ArchWasm = &Arch{
+ 	RegSize:   8,
+ 	MinLC:     1,
+ }
+-
++/*TODO*/
++var ArchSW_64 = &Arch{
++	Name:      "sw_64",
++	Family:    SW_64,
++	ByteOrder: binary.LittleEndian,
++	PtrSize:   8,
++	RegSize:   8,
++	MinLC:     1,
++}
+ var Archs = [...]*Arch{
++	ArchSW_64,
+ 	Arch386,
+ 	ArchAMD64,
+ 	ArchARM,
+diff --git a/libgo/go/debug/elf/elf.go b/libgo/go/debug/elf/elf.go
+index 96a67ce73..c417537b9 100644
+--- a/libgo/go/debug/elf/elf.go
++++ b/libgo/go/debug/elf/elf.go
+@@ -6,6 +6,7 @@
+  * $FreeBSD: src/sys/sys/elf64.h,v 1.10.14.1 2005/12/30 22:13:58 marcel Exp $
+  * $FreeBSD: src/sys/sys/elf_common.h,v 1.15.8.1 2005/12/30 22:13:58 marcel Exp $
+  * $FreeBSD: src/sys/alpha/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $
++ * $FreeBSD: src/sys/sw_64/include/elf.h,v 1.14 2003/09/25 01:10:22 peter Exp $
+  * $FreeBSD: src/sys/amd64/include/elf.h,v 1.18 2004/08/03 08:21:48 dfr Exp $
+  * $FreeBSD: src/sys/arm/include/elf.h,v 1.5.2.1 2006/06/30 21:42:52 cognet Exp $
+  * $FreeBSD: src/sys/i386/include/elf.h,v 1.16 2004/08/02 19:12:17 dfr Exp $
+@@ -390,6 +391,8 @@ const (
+ 	EM_MIPS_RS4_BE Machine = 10     /* MIPS R4000 Big-Endian */
+ 	EM_ALPHA_STD   Machine = 41     /* Digital Alpha (standard value). */
+ 	EM_ALPHA       Machine = 0x9026 /* Alpha (written in the absence of an ABI) */
++	EM_SW_64_STD   Machine = 41     /* Digital Sw_64 (standard value). */
++	EM_SW_64       Machine = 0x9916 /* mieee-opt Sw_64 (written in the absence of an ABI) */
+ )
+ 
+ var machineStrings = []intName{
+@@ -581,6 +584,8 @@ var machineStrings = []intName{
+ 	{10, "EM_MIPS_RS4_BE"},
+ 	{41, "EM_ALPHA_STD"},
+ 	{0x9026, "EM_ALPHA"},
++	{41, "EM_SW_64_STD"},
++	{0x9916, "EM_SW_64"},
+ }
+ 
+ func (i Machine) String() string   { return stringName(uint32(i), machineStrings, false) }
+@@ -1463,6 +1468,73 @@ var ralphaStrings = []intName{
+ 
+ func (i R_ALPHA) String() string   { return stringName(uint32(i), ralphaStrings, false) }
+ func (i R_ALPHA) GoString() string { return stringName(uint32(i), ralphaStrings, true) }
++// Relocation types for SW_64.
++type R_SW_64 int
++
++const (
++	R_SW_64_NONE	   R_SW_64 = 0  /* No reloc */
++	R_SW_64_REFLONG	R_SW_64 = 1  /* Direct 32 bit */
++	R_SW_64_REFQUAD	R_SW_64 = 2  /* Direct 64 bit */
++	R_SW_64_GPREL32	R_SW_64 = 3  /* GP relative 32 bit */
++	R_SW_64_LITERAL	R_SW_64 = 4  /* GP relative 16 bit w/optimization */
++	R_SW_64_LITUSE	 R_SW_64 = 5  /* Optimization hint for LITERAL */
++	R_SW_64_GPDISP	 R_SW_64 = 6  /* Add displacement to GP */
++	R_SW_64_BRADDR	 R_SW_64 = 7  /* PC+4 relative 23 bit shifted */
++	R_SW_64_HINT	   R_SW_64 = 8  /* PC+4 relative 16 bit shifted */
++	R_SW_64_SREL16	 R_SW_64 = 9  /* PC relative 16 bit */
++	R_SW_64_SREL32	 R_SW_64 = 10 /* PC relative 32 bit */
++	R_SW_64_SREL64	 R_SW_64 = 11 /* PC relative 64 bit */
++	R_SW_64_OP_PUSH	R_SW_64 = 12 /* OP stack push */
++	R_SW_64_OP_STORE       R_SW_64 = 13 /* OP stack pop and store */
++	R_SW_64_OP_PSUB	R_SW_64 = 14 /* OP stack subtract */
++	R_SW_64_OP_PRSHIFT     R_SW_64 = 15 /* OP stack right shift */
++	R_SW_64_GPVALUE	R_SW_64 = 16
++	R_SW_64_GPRELHIGH      R_SW_64 = 17
++	R_SW_64_GPRELLOW       R_SW_64 = 18
++	R_SW_64_IMMED_GP_16    R_SW_64 = 19
++	R_SW_64_IMMED_GP_HI32  R_SW_64 = 20
++	R_SW_64_IMMED_SCN_HI32 R_SW_64 = 21
++	R_SW_64_IMMED_BR_HI32  R_SW_64 = 22
++	R_SW_64_IMMED_LO32     R_SW_64 = 23
++	R_SW_64_COPY	   R_SW_64 = 24 /* Copy sympol at runtime */
++	R_SW_64_GLOB_DAT       R_SW_64 = 25 /* Create GOT entry */
++	R_SW_64_JMP_SLOT       R_SW_64 = 26 /* Create PLT entry */
++	R_SW_64_RELATIVE       R_SW_64 = 27 /* Adjust by program base */
++)
++
++var rsw_64Strings = []intName{
++	{0, "R_SW_64_NONE"},
++	{1, "R_SW_64_REFLONG"},
++	{2, "R_SW_64_REFQUAD"},
++	{3, "R_SW_64_GPREL32"},
++	{4, "R_SW_64_LITERAL"},
++	{5, "R_SW_64_LITUSE"},
++	{6, "R_SW_64_GPDISP"},
++	{7, "R_SW_64_BRADDR"},
++	{8, "R_SW_64_HINT"},
++	{9, "R_SW_64_SREL16"},
++	{10, "R_SW_64_SREL32"},
++	{11, "R_SW_64_SREL64"},
++	{12, "R_SW_64_OP_PUSH"},
++	{13, "R_SW_64_OP_STORE"},
++	{14, "R_SW_64_OP_PSUB"},
++	{15, "R_SW_64_OP_PRSHIFT"},
++	{16, "R_SW_64_GPVALUE"},
++	{17, "R_SW_64_GPRELHIGH"},
++	{18, "R_SW_64_GPRELLOW"},
++	{19, "R_SW_64_IMMED_GP_16"},
++	{20, "R_SW_64_IMMED_GP_HI32"},
++	{21, "R_SW_64_IMMED_SCN_HI32"},
++	{22, "R_SW_64_IMMED_BR_HI32"},
++	{23, "R_SW_64_IMMED_LO32"},
++	{24, "R_SW_64_COPY"},
++	{25, "R_SW_64_GLOB_DAT"},
++	{26, "R_SW_64_JMP_SLOT"},
++	{27, "R_SW_64_RELATIVE"},
++}
++
++func (i R_SW_64) String() string   { return stringName(uint32(i), rsw_64Strings, false) }
++func (i R_SW_64) GoString() string { return stringName(uint32(i), rsw_64Strings, true) }
+ 
+ // Relocation types for ARM.
+ type R_ARM int
+diff --git a/libgo/go/debug/elf/elf_test.go b/libgo/go/debug/elf/elf_test.go
+index f8985a899..b4dccf386 100644
+--- a/libgo/go/debug/elf/elf_test.go
++++ b/libgo/go/debug/elf/elf_test.go
+@@ -31,6 +31,7 @@ var nameTests = []nameTest{
+ 	{STV_HIDDEN, "STV_HIDDEN"},
+ 	{R_X86_64_PC32, "R_X86_64_PC32"},
+ 	{R_ALPHA_OP_PUSH, "R_ALPHA_OP_PUSH"},
++	{R_SW_64_OP_PUSH, "R_SW_64_OP_PUSH"},
+ 	{R_ARM_THM_ABS5, "R_ARM_THM_ABS5"},
+ 	{R_386_GOT32, "R_386_GOT32"},
+ 	{R_PPC_GOT16_HI, "R_PPC_GOT16_HI"},
+diff --git a/libgo/go/debug/elf/file.go b/libgo/go/debug/elf/file.go
+index b9a8b1e0c..eea0f9aa1 100644
+--- a/libgo/go/debug/elf/file.go
++++ b/libgo/go/debug/elf/file.go
+@@ -627,6 +627,8 @@ func (f *File) applyRelocations(dst []byte, rels []byte) error {
+ 		return f.applyRelocationsSPARC64(dst, rels)
+ 	case f.Class == ELFCLASS64 && f.Machine == EM_ALPHA:
+ 		return f.applyRelocationsALPHA(dst, rels)
++	case f.Class == ELFCLASS64 && f.Machine == EM_SW_64:
++		return f.applyRelocationsSW_64(dst, rels)
+ 	default:
+ 		return errors.New("applyRelocations: not implemented")
+ 	}
+@@ -1238,6 +1240,53 @@ func (f *File) applyRelocationsALPHA(dst []byte, rels []byte) error {
+ 	return nil
+ }
+ 
++//SW_64 begin
++
++func (f *File) applyRelocationsSW_64(dst []byte, rels []byte) error {
++	// 24 is the size of Rela64.
++	if len(rels)%24 != 0 {
++	return errors.New("length of relocation section is not a multiple of 24")
++	}
++
++	symbols, _, err := f.getSymbols(SHT_SYMTAB)
++	if err != nil {
++	return err
++	}
++
++	b := bytes.NewReader(rels)
++	var rela Rela64
++	for b.Len() > 0 {
++		binary.Read(b, f.ByteOrder, &rela)
++		symNo := rela.Info >> 32
++		t := R_SW_64(rela.Info & 0xffff)
++
++		if symNo == 0 || symNo > uint64(len(symbols)) {
++			continue
++		}
++		sym := &symbols[symNo-1]
++		if SymType(sym.Info&0xf) != STT_SECTION {
++		       // We don't handle non-section relocations for now.
++		       continue
++		}
++
++		// There are relocations, so this must be a normal
++		// object file, and we only look at section symbols,
++		// so we assume that the symbol value is 0.
++		switch t {
++		case R_SW_64_REFQUAD:
++			if rela.Off+8 >= uint64(len(dst)) || rela.Addend < 0 {
++				continue
++			}
++			f.ByteOrder.PutUint64(dst[rela.Off:rela.Off+8], uint64(rela.Addend))
++		case R_SW_64_REFLONG:
++			if rela.Off+4 >= uint64(len(dst)) || rela.Addend < 0 {
++			}
++			f.ByteOrder.PutUint32(dst[rela.Off:rela.Off+4], uint32(rela.Addend))
++		}
++	}
++	return nil
++}
++//SW_64 end
+ func (f *File) DWARF() (*dwarf.Data, error) {
+ 	dwarfSuffix := func(s *Section) string {
+ 		switch {
+diff --git a/libgo/go/encoding/xml/xml.go b/libgo/go/encoding/xml/xml.go
+index 5e73dcf73..1a6e2860e 100644
+--- a/libgo/go/encoding/xml/xml.go
++++ b/libgo/go/encoding/xml/xml.go
+@@ -1719,6 +1719,7 @@ var htmlEntity = map[string]string{
+ 	"Psi":      "\u03A8",
+ 	"Omega":    "\u03A9",
+ 	"alpha":    "\u03B1",
++	"sw_64":    "\u03B1",
+ 	"beta":     "\u03B2",
+ 	"gamma":    "\u03B3",
+ 	"delta":    "\u03B4",
+diff --git a/libgo/go/go/build/syslist.go b/libgo/go/go/build/syslist.go
+index d72649b8b..c0975fa96 100644
+--- a/libgo/go/go/build/syslist.go
++++ b/libgo/go/go/build/syslist.go
+@@ -8,4 +8,4 @@ package build
+ // Do not remove from this list, as these are used for go/build filename matching.
+ 
+ const goosList = "aix android darwin dragonfly freebsd hurd illumos js linux nacl netbsd openbsd plan9 solaris windows zos "
+-const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le ppc riscv riscv64 s390 s390x sparc sparc64 wasm alpha m68k nios2 sh shbe "
++const goarchList = "386 amd64 amd64p32 arm armbe arm64 arm64be ppc64 ppc64le mips mipsle mips64 mips64le mips64p32 mips64p32le ppc riscv riscv64 s390 s390x sparc sparc64 wasm alpha sw_64 m68k nios2 sh shbe "
+diff --git a/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
+new file mode 100644
+index 000000000..9587b5aa4
+--- /dev/null
++++ b/libgo/go/internal/syscall/unix/getrandom_linux_sw_64.go
+@@ -0,0 +1,9 @@
++// Copyright 2016 The Go Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style
++// license that can be found in the LICENSE file.
++
++package unix
++
++// Linux getrandom system call number.
++// See GetRandom in getrandom_linux.go.
++const randomTrap uintptr = 511
+diff --git a/libgo/go/net/listen_test.go b/libgo/go/net/listen_test.go
+index d8c72096e..ba7808774 100644
+--- a/libgo/go/net/listen_test.go
++++ b/libgo/go/net/listen_test.go
+@@ -677,7 +677,7 @@ func multicastRIBContains(ip IP) (bool, error) {
+ 	case "aix", "dragonfly", "netbsd", "openbsd", "plan9", "solaris", "illumos", "windows":
+ 		return true, nil // not implemented yet
+ 	case "linux":
+-		if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" {
++		if runtime.GOARCH == "arm" || runtime.GOARCH == "alpha" || runtime.GOARCH == "sw_64" {
+ 			return true, nil // not implemented yet
+ 		}
+ 	}
+diff --git a/libgo/go/regexp/testdata/basic.dat b/libgo/go/regexp/testdata/basic.dat
+index 7859290ba..061c403d6 100644
+--- a/libgo/go/regexp/testdata/basic.dat
++++ b/libgo/go/regexp/testdata/basic.dat
+@@ -157,6 +157,7 @@ E	a[bcd]*dcdcde		adcdcde		(0,7)
+ E	(ab|a)b*c		abc		(0,3)(0,2)
+ E	((a)(b)c)(d)		abcd		(0,4)(0,3)(0,1)(1,2)(3,4)
+ BE	[A-Za-z_][A-Za-z0-9_]*	alpha		(0,5)
++BE	[A-Za-z_][A-Za-z0-9_]*	sw_64		(0,5)
+ E	^a(bc+|b[eh])g|.h$	abh		(1,3)
+ E	(bc+d$|ef*g.|h?i(j|k))	effgz		(0,5)(0,5)
+ E	(bc+d$|ef*g.|h?i(j|k))	ij		(0,2)(0,2)(1,2)
+diff --git a/libgo/go/runtime/hash64.go b/libgo/go/runtime/hash64.go
+index 704bbe6f6..d7b9e0b22 100644
+--- a/libgo/go/runtime/hash64.go
++++ b/libgo/go/runtime/hash64.go
+@@ -6,7 +6,7 @@
+ //   xxhash: https://code.google.com/p/xxhash/
+ // cityhash: https://code.google.com/p/cityhash/
+ 
+-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64
++// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm alpha sw_64 amd64p32 arm64be ia64 mips64p32 mips64p32le sparc64
+ 
+ package runtime
+ 
+diff --git a/libgo/go/runtime/lfstack_64bit.go b/libgo/go/runtime/lfstack_64bit.go
+index af9e7d164..d572e6656 100644
+--- a/libgo/go/runtime/lfstack_64bit.go
++++ b/libgo/go/runtime/lfstack_64bit.go
+@@ -2,7 +2,7 @@
+ // Use of this source code is governed by a BSD-style
+ // license that can be found in the LICENSE file.
+ 
+-// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sparc64 ia64
++// +build amd64 arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x wasm arm64be alpha sw_64 sparc64 ia64
+ 
+ package runtime
+ 
+diff --git a/libgo/go/runtime/mpagealloc_64bit.go b/libgo/go/runtime/mpagealloc_64bit.go
+index 385b7b3e7..ceed0f442 100644
+--- a/libgo/go/runtime/mpagealloc_64bit.go
++++ b/libgo/go/runtime/mpagealloc_64bit.go
+@@ -2,7 +2,7 @@
+ // Use of this source code is governed by a BSD-style
+ // license that can be found in the LICENSE file.
+ 
+-// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64
++// +build amd64 !darwin,arm64 mips64 mips64le ppc64 ppc64le riscv64 s390x arm64be alpha sparc64 ia64 sw_64
+ 
+ // See mpagealloc_32bit.go for why darwin/arm64 is excluded here.
+ 
+diff --git a/libgo/go/syscall/endian_little.go b/libgo/go/syscall/endian_little.go
+index 0cd2d7524..b67d48079 100644
+--- a/libgo/go/syscall/endian_little.go
++++ b/libgo/go/syscall/endian_little.go
+@@ -2,7 +2,7 @@
+ // Use of this source code is governed by a BSD-style
+ // license that can be found in the LICENSE file.
+ //
+-// +build 386 alpha amd64 amd64p32 arm arm64 ia64 mips64le mipsle mips64p32le nios2 ppc64le riscv64 sh wasm
++// +build 386 alpha sw_64 amd64 amd64p32 arm arm64 ia64 mips64le mipsle mips64p32le nios2 ppc64le riscv64 sh wasm
+ 
+ package syscall
+ 
+diff --git a/libgo/go/syscall/libcall_linux_sw_64.go b/libgo/go/syscall/libcall_linux_sw_64.go
+new file mode 100644
+index 000000000..f6bb7be29
+--- /dev/null
++++ b/libgo/go/syscall/libcall_linux_sw_64.go
+@@ -0,0 +1,13 @@
++// Copyright 2012 The Go Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style
++// license that can be found in the LICENSE file.
++
++// GNU/Linux library calls Sw_64 specific.
++
++package syscall
++
++//sys	Ioperm(from int, num int, on int) (err error)
++//ioperm(from _C_long, num _C_long, on _C_int) _C_int
++
++//sys	Iopl(level int) (err error)
++//iopl(level _C_int) _C_int
+diff --git a/libgo/go/syscall/syscall_linux_sw_64.go b/libgo/go/syscall/syscall_linux_sw_64.go
+new file mode 100644
+index 000000000..5a87d687d
+--- /dev/null
++++ b/libgo/go/syscall/syscall_linux_sw_64.go
+@@ -0,0 +1,25 @@
++// syscall_linux_sw_64.go -- GNU/Linux SW_64 specific support
++
++// Copyright 2011 The Go Authors. All rights reserved.
++// Use of this source code is governed by a BSD-style
++// license that can be found in the LICENSE file.
++
++package syscall
++
++import "unsafe"
++
++func (r *PtraceRegs) PC() uint64 {
++	return r.Pc
++}
++
++func (r *PtraceRegs) SetPC(pc uint64) {
++	r.Pc = pc
++}
++
++func PtraceGetRegs(pid int, regsout *PtraceRegs) (err error) {
++	return ptrace(PTRACE_GETREGS, pid, 0, uintptr(unsafe.Pointer(regsout)))
++}
++
++func PtraceSetRegs(pid int, regs *PtraceRegs) (err error) {
++	return ptrace(PTRACE_SETREGS, pid, 0, uintptr(unsafe.Pointer(regs)))
++}
+diff --git a/libgo/goarch.sh b/libgo/goarch.sh
+index a5b6217c9..7013301f4 100644
+--- a/libgo/goarch.sh
++++ b/libgo/goarch.sh
+@@ -52,6 +52,11 @@ case $goarch in
+ 	defaultphyspagesize=8192
+ 	pcquantum=4
+ 	;;
++    sw_64)
++	family=SW_64
++	defaultphyspagesize=8192
++	pcquantum=4
++	;;
+     amd64)
+ 	family=AMD64
+ 	;;
+diff --git a/libgo/match.sh b/libgo/match.sh
+index cd35942f8..028ea11a3 100644
+--- a/libgo/match.sh
++++ b/libgo/match.sh
+@@ -116,7 +116,7 @@ for f in $gofiles; do
+ 	aix | android | darwin | dragonfly | freebsd | illumos | hurd | js | linux | nacl | netbsd | openbsd | plan9 | solaris | windows)
+ 	    tag1=nonmatchingtag
+ 	    ;;
+-	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
++	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | sw_64 | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
+ 	    tag1=nonmatchingtag
+ 	    ;;
+     esac
+@@ -128,7 +128,7 @@ for f in $gofiles; do
+ 	aix | android | darwin | dragonfly | freebsd | hurd | illumos | js | linux | nacl | netbsd | openbsd | plan9 | solaris | windows)
+ 	    tag2=nonmatchingtag
+ 	    ;;
+-	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
++	386 | amd64 | amd64p32 | arm | armbe | arm64 | arm64be | alpha | sw_64 | ia64 | m68k | mips | mipsle | mips64 | mips64le | mips64p32 | mips64p32le | nios2 | ppc | ppc64 | ppc64le | riscv64 | s390 | s390x | sh | shbe | sparc | sparc64 | wasm)
+ 	    tag2=nonmatchingtag
+ 	    ;;
+     esac
+diff --git a/libgo/mksysinfo.sh b/libgo/mksysinfo.sh
+index bd2ba32cb..ce2d55710 100644
+--- a/libgo/mksysinfo.sh
++++ b/libgo/mksysinfo.sh
+@@ -353,7 +353,12 @@ if test "$regs" = ""; then
+   # mips*
+   regs=`grep '^type _pt_regs struct' gen-sysinfo.go || true`
+ fi
++if test "$regs" = ""; then
++  # sw_64*
++  regs=`grep '^type _user_pt_regs struct' gen-sysinfo.go || true`
++fi
+ if test "$regs" != ""; then
++  regs=`echo $regs | sed -e 's/type _user_pt_regs struct//'`
+   regs=`echo $regs | sed -e 's/type _pt_regs struct//'`
+   regs=`echo $regs |
+     sed -e 's/type __*user_regs_struct struct //' -e 's/[{}]//g'`
+diff --git a/libgo/runtime/go-signal.c b/libgo/runtime/go-signal.c
+index b429fdb24..9f7eb1b8f 100644
+--- a/libgo/runtime/go-signal.c
++++ b/libgo/runtime/go-signal.c
+@@ -223,6 +223,8 @@ getSiginfo(siginfo_t *info, void *context __attribute__((unused)))
+ 	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.gregs[REG_EIP];
+ #elif defined(__alpha__) && defined(__linux__)
+ 	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc;
++#elif defined(__sw_64__) && defined(__linux__)
++	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.sc_pc;
+ #elif defined(__PPC__) && defined(__linux__)
+ 	ret.sigpc = ((ucontext_t*)(context))->uc_mcontext.regs->nip;
+ #elif defined(__PPC__) && defined(_AIX)
+@@ -296,7 +298,7 @@ dumpregs(siginfo_t *info __attribute__((unused)), void *context __attribute__((u
+ 		runtime_printf("fs     %x\n", m->gregs[REG_FS]);
+ 		runtime_printf("gs     %x\n", m->gregs[REG_GS]);
+ 	  }
+-#elif defined(__alpha__) && defined(__linux__)
++#elif (defined(__alpha__)||defined(__sw_64__)) && defined(__linux__)
+ 	{
+ 		mcontext_t *m = &((ucontext_t*)(context))->uc_mcontext;
+ 
+-- 
+2.43.0
+
diff --git a/0008-Sw64-Port-libgomp.patch b/0008-Sw64-Port-libgomp.patch
new file mode 100644
index 0000000..ad59b16
--- /dev/null
+++ b/0008-Sw64-Port-libgomp.patch
@@ -0,0 +1,166 @@
+From 2eb4a09a897fae5931d58234a4b378be39bf6420 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:02:36 +0800
+Subject: [PATCH 08/13] Sw64 Port: libgomp
+
+---
+ libgomp/config/linux/sw_64/futex.h | 102 +++++++++++++++++++++++++++++
+ libgomp/configure                  |   6 ++
+ libgomp/configure.tgt              |   4 ++
+ libgomp/libgomp.spec.in            |   3 +-
+ 4 files changed, 114 insertions(+), 1 deletion(-)
+ create mode 100644 libgomp/config/linux/sw_64/futex.h
+
+diff --git a/libgomp/config/linux/sw_64/futex.h b/libgomp/config/linux/sw_64/futex.h
+new file mode 100644
+index 000000000..cd19a9bb4
+--- /dev/null
++++ b/libgomp/config/linux/sw_64/futex.h
+@@ -0,0 +1,102 @@
++/* Copyright (C) 2005-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson <rth@redhat.com>.
++
++   This file is part of the GNU Offloading and Multi Processing Library
++   (libgomp).
++
++   Libgomp is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3, or (at your option)
++   any later version.
++
++   Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Provide target-specific access to the futex system call.  */
++
++#ifndef SYS_futex
++#define SYS_futex 394
++#endif
++
++static inline void
++futex_wait (int *addr, int val)
++{
++  register long sc_0 __asm__("$0");
++  register long sc_16 __asm__("$16");
++  register long sc_17 __asm__("$17");
++  register long sc_18 __asm__("$18");
++  register long sc_19 __asm__("$19");
++
++  sc_0 = SYS_futex;
++  sc_16 = (long) addr;
++  sc_17 = gomp_futex_wait;
++  sc_18 = val;
++  sc_19 = 0;
++  __asm volatile("callsys"
++		 : "=r"(sc_0), "=r"(sc_19)
++		 : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
++		 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
++		   "$24", "$25", "$27", "$28", "memory");
++  if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
++    {
++      gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
++      gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
++      sc_0 = SYS_futex;
++      sc_17 &= ~FUTEX_PRIVATE_FLAG;
++      sc_19 = 0;
++      __asm volatile("callsys"
++		     : "=r"(sc_0), "=r"(sc_19)
++		     : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
++		     : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22",
++		       "$23", "$24", "$25", "$27", "$28", "memory");
++    }
++}
++
++static inline void
++futex_wake (int *addr, int count)
++{
++  register long sc_0 __asm__("$0");
++  register long sc_16 __asm__("$16");
++  register long sc_17 __asm__("$17");
++  register long sc_18 __asm__("$18");
++  register long sc_19 __asm__("$19");
++
++  sc_0 = SYS_futex;
++  sc_16 = (long) addr;
++  sc_17 = gomp_futex_wake;
++  sc_18 = count;
++  __asm volatile("callsys"
++		 : "=r"(sc_0), "=r"(sc_19)
++		 : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18)
++		 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
++		   "$24", "$25", "$27", "$28", "memory");
++  if (__builtin_expect (sc_19, 0) && sc_0 == ENOSYS)
++    {
++      gomp_futex_wait &= ~FUTEX_PRIVATE_FLAG;
++      gomp_futex_wake &= ~FUTEX_PRIVATE_FLAG;
++      sc_0 = SYS_futex;
++      sc_17 &= ~FUTEX_PRIVATE_FLAG;
++      __asm volatile("callsys"
++		     : "=r"(sc_0), "=r"(sc_19)
++		     : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18)
++		     : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22",
++		       "$23", "$24", "$25", "$27", "$28", "memory");
++    }
++}
++
++static inline void
++cpu_relax (void)
++{
++  __asm volatile("" : : : "memory");
++}
+diff --git a/libgomp/configure b/libgomp/configure
+index b03036c27..7d8f769d0 100644
+--- a/libgomp/configure
++++ b/libgomp/configure
+@@ -11844,6 +11844,12 @@ case `echo $GFORTRAN` in
+       FC=no
+     fi ;;
+ esac
++case "${target}" in
++  sw_64-*-*)
++	FC="$GFORTRAN"
++	;;
++*)
++esac
+ ac_ext=${ac_fc_srcext-f}
+ ac_compile='$FC -c $FCFLAGS $ac_fcflags_srcext conftest.$ac_ext >&5'
+ ac_link='$FC -o conftest$ac_exeext $FCFLAGS $LDFLAGS $ac_fcflags_srcext conftest.$ac_ext $LIBS >&5'
+diff --git a/libgomp/configure.tgt b/libgomp/configure.tgt
+index e5b558be0..8a1a8565e 100644
+--- a/libgomp/configure.tgt
++++ b/libgomp/configure.tgt
+@@ -76,6 +76,10 @@ if test x$enable_linux_futex = xyes; then
+ 	config_path="linux/s390 linux posix"
+ 	;;
+ 
++    sw_64*-*-linux*)
++	config_path="linux/sw_64 linux posix"
++	;;
++
+     tile*-*-linux*)
+ 	config_path="linux/tile linux posix"
+ 	;;
+diff --git a/libgomp/libgomp.spec.in b/libgomp/libgomp.spec.in
+index 5651603f4..738895d59 100644
+--- a/libgomp/libgomp.spec.in
++++ b/libgomp/libgomp.spec.in
+@@ -1,3 +1,4 @@
+ # This spec file is read by gcc when linking.  It is used to specify the
+ # standard libraries we need in order to link with libgomp.
+-*link_gomp: @link_gomp@
++#*link_gomp: @link_gomp@
++*link_gomp: @link_gomp@ --whole-archive -lpthread --no-whole-archive
+-- 
+2.43.0
+
diff --git a/0009-Sw64-Port-libitm.patch b/0009-Sw64-Port-libitm.patch
new file mode 100644
index 0000000..7667256
--- /dev/null
+++ b/0009-Sw64-Port-libitm.patch
@@ -0,0 +1,260 @@
+From 4ee7e5bc62e1aa6be86f5c139cef2e1c3868f7ca Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:03:06 +0800
+Subject: [PATCH 09/13] Sw64 Port: libitm
+
+---
+ libitm/config/linux/sw_64/futex_bits.h |  56 +++++++++++++
+ libitm/config/sw_64/sjlj.S             | 112 +++++++++++++++++++++++++
+ libitm/config/sw_64/target.h           |  44 ++++++++++
+ libitm/configure.tgt                   |   1 +
+ 4 files changed, 213 insertions(+)
+ create mode 100644 libitm/config/linux/sw_64/futex_bits.h
+ create mode 100644 libitm/config/sw_64/sjlj.S
+ create mode 100644 libitm/config/sw_64/target.h
+
+diff --git a/libitm/config/linux/sw_64/futex_bits.h b/libitm/config/linux/sw_64/futex_bits.h
+new file mode 100644
+index 000000000..5688fc17a
+--- /dev/null
++++ b/libitm/config/linux/sw_64/futex_bits.h
+@@ -0,0 +1,56 @@
++/* Copyright (C) 2008-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson <rth@redhat.com>.
++
++   This file is part of the GNU Transactional Memory Library (libitm).
++
++   Libitm is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3 of the License, or
++   (at your option) any later version.
++
++   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++/* Provide target-specific access to the futex system call.  */
++
++#ifndef SYS_futex
++#define SYS_futex 394
++#endif
++
++static inline long
++sys_futex0 (std::atomic<int> *addr, int op, int val)
++{
++  register long sc_0 __asm__("$0");
++  register long sc_16 __asm__("$16");
++  register long sc_17 __asm__("$17");
++  register long sc_18 __asm__("$18");
++  register long sc_19 __asm__("$19");
++  long res;
++
++  sc_0 = SYS_futex;
++  sc_16 = (long) addr;
++  sc_17 = op;
++  sc_18 = val;
++  sc_19 = 0;
++  __asm volatile("callsys"
++		 : "=r"(sc_0), "=r"(sc_19)
++		 : "0"(sc_0), "r"(sc_16), "r"(sc_17), "r"(sc_18), "1"(sc_19)
++		 : "$1", "$2", "$3", "$4", "$5", "$6", "$7", "$8", "$22", "$23",
++		   "$24", "$25", "$27", "$28", "memory");
++
++  res = sc_0;
++  if (__builtin_expect (sc_19, 0))
++    res = -res;
++  return res;
++}
+diff --git a/libitm/config/sw_64/sjlj.S b/libitm/config/sw_64/sjlj.S
+new file mode 100644
+index 000000000..5c62e3d23
+--- /dev/null
++++ b/libitm/config/sw_64/sjlj.S
+@@ -0,0 +1,112 @@
++/* Copyright (C) 2009-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson <rth@redhat.com>.
++
++   This file is part of the GNU Transactional Memory Library (libitm).
++
++   Libitm is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3 of the License, or
++   (at your option) any later version.
++
++   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++	.text
++	.align 4
++	.globl	_ITM_beginTransaction
++	.ent	_ITM_beginTransaction
++
++#define FRAME	144
++
++_ITM_beginTransaction:
++	ldgp	$29, 0($27)
++	subl	$30, FRAME, $30
++	.frame	$30, FRAME, $26, 0
++	.mask	0x04000000, 0
++	stl	$26, 0($30)
++	.prologue 1
++
++	stl	$9, 8($30)
++	stl	$10, 16($30)
++	addl	$30, FRAME, $0
++	stl	$11, 24($30)
++
++	stl	$12, 32($30)
++	stl	$13, 40($30)
++	stl	$14, 48($30)
++	stl	$15, 56($30)
++
++	stl	$0, 64($30)
++	fstd	$f2, 72($30)
++	fstd	$f3, 80($30)
++	fstd	$f4, 88($30)
++
++	fstd	$f5, 96($30)
++	fstd	$f6, 104($30)
++	fstd	$f7, 112($30)
++	fstd	$f8, 120($30)
++
++	fstd	$f9, 128($30)
++	mov	$30, $17
++#ifdef __PIC__
++	unop
++	bsr	$26, GTM_begin_transaction !samegp
++#else
++	call	$26, GTM_begin_transaction
++	ldgp	$29, 0($26)
++#endif
++
++	ldl	$26, 0($30)
++	addl	$30, FRAME, $30
++	ret
++.end _ITM_beginTransaction
++
++	.align 4
++	.globl	GTM_longjmp
++#ifdef __ELF__
++	.hidden	GTM_longjmp
++#endif
++	.ent	GTM_longjmp
++
++GTM_longjmp:
++	.prologue 0
++	ldl	$26, 0($17)
++	ldl	$9, 8($17)
++	ldl	$10, 16($17)
++	ldl	$11, 24($17)
++
++	ldl	$12, 32($17)
++	ldl	$13, 40($17)
++	ldl	$14, 48($17)
++	ldl	$15, 56($17)
++
++	ldl	$1, 64($17)
++	fldd	$f2, 72($17)
++	fldd	$f3, 80($17)
++	fldd	$f4, 88($17)
++
++	fldd	$f5, 96($17)
++	fldd	$f6, 104($17)
++	fldd	$f7, 112($17)
++	fldd	$f8, 120($17)
++
++	fldd	$f9, 128($17)
++	mov	$16, $0
++	mov	$1, $30
++	ret
++.end GTM_longjmp
++
++#ifdef __linux__
++.section .note.GNU-stack, "", @progbits
++#endif
+diff --git a/libitm/config/sw_64/target.h b/libitm/config/sw_64/target.h
+new file mode 100644
+index 000000000..4cf8d8d41
+--- /dev/null
++++ b/libitm/config/sw_64/target.h
+@@ -0,0 +1,44 @@
++/* Copyright (C) 2009-2020 Free Software Foundation, Inc.
++   Contributed by Richard Henderson <rth@redhat.com>.
++
++   This file is part of the GNU Transactional Memory Library (libitm).
++
++   Libitm is free software; you can redistribute it and/or modify it
++   under the terms of the GNU General Public License as published by
++   the Free Software Foundation; either version 3 of the License, or
++   (at your option) any later version.
++
++   Libitm is distributed in the hope that it will be useful, but WITHOUT ANY
++   WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
++   FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
++   more details.
++
++   Under Section 7 of GPL version 3, you are granted additional
++   permissions described in the GCC Runtime Library Exception, version
++   3.1, as published by the Free Software Foundation.
++
++   You should have received a copy of the GNU General Public License and
++   a copy of the GCC Runtime Library Exception along with this program;
++   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
++   <http://www.gnu.org/licenses/>.  */
++
++namespace GTM HIDDEN {
++
++typedef struct gtm_jmpbuf
++{
++  unsigned long pc;
++  unsigned long s[7];
++  void *cfa;
++  unsigned long f[8];
++} gtm_jmpbuf;
++
++/* The size of one line in hardware caches (in bytes). */
++#define HW_CACHELINE_SIZE 64
++
++static inline void
++cpu_relax (void)
++{
++  __asm volatile("" : : : "memory");
++}
++
++} // namespace GTMHIDDEN
+diff --git a/libitm/configure.tgt b/libitm/configure.tgt
+index d1beb5c9e..30db505a7 100644
+--- a/libitm/configure.tgt
++++ b/libitm/configure.tgt
+@@ -121,6 +121,7 @@ case "${target_cpu}" in
+   *)
+ 	ARCH="${target_cpu}"
+ 	;;
++  sw_64*)		ARCH=sw_64 ;;
+ esac
+ 
+ # For the benefit of top-level configure, determine if the cpu is supported.
+-- 
+2.43.0
+
diff --git a/0010-Sw64-Port-libstdc.patch b/0010-Sw64-Port-libstdc.patch
new file mode 100644
index 0000000..da22f14
--- /dev/null
+++ b/0010-Sw64-Port-libstdc.patch
@@ -0,0 +1,169 @@
+From 1efcb0bcbef4c1cc60cb700e7fe3cd28379eadb0 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:03:48 +0800
+Subject: [PATCH 10/13] Sw64 Port: libstdc++
+
+---
+ libstdc++-v3/acinclude.m4                    |  2 +-
+ libstdc++-v3/configure                       |  5 +++--
+ libstdc++-v3/configure.ac                    |  3 ++-
+ libstdc++-v3/configure.host                  |  3 +++
+ libstdc++-v3/include/bits/hashtable_policy.h | 10 +++++-----
+ libstdc++-v3/src/c++11/hashtable_c++0x.cc    | 12 ++++++------
+ 6 files changed, 20 insertions(+), 15 deletions(-)
+
+diff --git a/libstdc++-v3/acinclude.m4 b/libstdc++-v3/acinclude.m4
+index b6557a434..302cce07e 100644
+--- a/libstdc++-v3/acinclude.m4
++++ b/libstdc++-v3/acinclude.m4
+@@ -4787,7 +4787,7 @@ AC_DEFUN([GLIBCXX_CHECK_EXCEPTION_PTR_SYMVER], [
+     AC_MSG_CHECKING([for first version to support std::exception_ptr])
+     case ${target} in
+       aarch64-*-* | alpha-*-* | hppa*-*-* | i?86-*-* | x86_64-*-* | \
+-      m68k-*-* | powerpc*-*-* | s390*-*-* | *-*-solaris* )
++      m68k-*-* | powerpc*-*-* | s390*-*-* | sw_64-*-* | *-*-solaris* )
+         ac_exception_ptr_since_gcc46=yes
+         ;;
+       *)
+diff --git a/libstdc++-v3/configure b/libstdc++-v3/configure
+index 766a0a8d5..f5e60c339 100644
+--- a/libstdc++-v3/configure
++++ b/libstdc++-v3/configure
+@@ -74627,7 +74627,8 @@ case "$target" in
+   powerpc*-*-linux* | \
+   sparc*-*-linux* | \
+   s390*-*-linux* | \
+-  alpha*-*-linux*)
++  alpha*-*-linux* | \
++  sw_64*-*-linux*)
+   cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+ 
+@@ -76289,7 +76290,7 @@ _ACEOF
+ $as_echo_n "checking for first version to support std::exception_ptr... " >&6; }
+     case ${target} in
+       aarch64-*-* | alpha-*-* | hppa*-*-* | i?86-*-* | x86_64-*-* | \
+-      m68k-*-* | powerpc*-*-* | s390*-*-* | *-*-solaris* )
++      m68k-*-* | powerpc*-*-* | s390*-*-* | sw_64-*-* | *-*-solaris* )
+         ac_exception_ptr_since_gcc46=yes
+         ;;
+       *)
+diff --git a/libstdc++-v3/configure.ac b/libstdc++-v3/configure.ac
+index 07cf05b68..0ea7f2991 100644
+--- a/libstdc++-v3/configure.ac
++++ b/libstdc++-v3/configure.ac
+@@ -403,7 +403,8 @@ case "$target" in
+   powerpc*-*-linux* | \
+   sparc*-*-linux* | \
+   s390*-*-linux* | \
+-  alpha*-*-linux*)
++  alpha*-*-linux* | \
++  sw_64*-*-linux*)
+   AC_TRY_COMPILE(, [
+ #if !defined __LONG_DOUBLE_128__ || (defined(__sparc__) && defined(__arch64__))
+ #error no need for long double compatibility
+diff --git a/libstdc++-v3/configure.host b/libstdc++-v3/configure.host
+index 898db37d9..52f7cf225 100644
+--- a/libstdc++-v3/configure.host
++++ b/libstdc++-v3/configure.host
+@@ -123,6 +123,9 @@ case "${host_cpu}" in
+   sparc* | ultrasparc)
+     try_cpu=sparc
+     ;;
++  sw_64*)
++    try_cpu=sw_64
++    ;;
+   *)
+     if test -d ${glibcxx_srcdir}/config/cpu/${host_cpu}; then
+       try_cpu=${host_cpu}
+diff --git a/libstdc++-v3/include/bits/hashtable_policy.h b/libstdc++-v3/include/bits/hashtable_policy.h
+index ef1201349..565f2ad80 100644
+--- a/libstdc++-v3/include/bits/hashtable_policy.h
++++ b/libstdc++-v3/include/bits/hashtable_policy.h
+@@ -460,7 +460,7 @@ namespace __detail
+     // Return a bucket count appropriate for n elements
+     std::size_t
+     _M_bkt_for_elements(std::size_t __n) const
+-    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }
++    { return __builtin_ceil(__n / (double)_M_max_load_factor); }
+ 
+     // __n_bkt is current bucket count, __n_elt is current element count,
+     // and __n_ins is number of elements to be inserted.  Do we need to
+@@ -560,7 +560,7 @@ namespace __detail
+ 	_M_next_resize = numeric_limits<size_t>::max();
+       else
+ 	_M_next_resize
+-	  = __builtin_floorl(__res * (long double)_M_max_load_factor);
++	  = __builtin_floor(__res * (double)_M_max_load_factor);
+ 
+       return __res;
+     }
+@@ -568,7 +568,7 @@ namespace __detail
+     // Return a bucket count appropriate for n elements
+     std::size_t
+     _M_bkt_for_elements(std::size_t __n) const noexcept
+-    { return __builtin_ceill(__n / (long double)_M_max_load_factor); }
++    { return __builtin_ceil(__n / (double)_M_max_load_factor); }
+ 
+     // __n_bkt is current bucket count, __n_elt is current element count,
+     // and __n_ins is number of elements to be inserted.  Do we need to
+@@ -588,11 +588,11 @@ namespace __detail
+ 	      / (long double)_M_max_load_factor;
+ 	  if (__min_bkts >= __n_bkt)
+ 	    return { true,
+-	      _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
++	      _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
+ 						__n_bkt * _S_growth_factor)) };
+ 
+ 	  _M_next_resize
+-	    = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
++	    = __builtin_floor(__n_bkt * (double)_M_max_load_factor);
+ 	  return { false, 0 };
+ 	}
+       else
+diff --git a/libstdc++-v3/src/c++11/hashtable_c++0x.cc b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+index de8e2c7cb..5584efa71 100644
+--- a/libstdc++-v3/src/c++11/hashtable_c++0x.cc
++++ b/libstdc++-v3/src/c++11/hashtable_c++0x.cc
+@@ -58,7 +58,7 @@ namespace __detail
+ 	  return 1;
+ 
+ 	_M_next_resize =
+-	  __builtin_floorl(__fast_bkt[__n] * (long double)_M_max_load_factor);
++	  __builtin_floor(__fast_bkt[__n] * (double)_M_max_load_factor);
+ 	return __fast_bkt[__n];
+       }
+ 
+@@ -81,7 +81,7 @@ namespace __detail
+       _M_next_resize = numeric_limits<size_t>::max();
+     else
+       _M_next_resize =
+-	__builtin_floorl(*__next_bkt * (long double)_M_max_load_factor);
++	__builtin_floor(*__next_bkt * (double)_M_max_load_factor);
+ 
+     return *__next_bkt;
+   }
+@@ -105,16 +105,16 @@ namespace __detail
+ 	// If _M_next_resize is 0 it means that we have nothing allocated so
+ 	// far and that we start inserting elements. In this case we start
+ 	// with an initial bucket size of 11.
+-	long double __min_bkts
++	double __min_bkts
+ 	  = std::max<std::size_t>(__n_elt + __n_ins, _M_next_resize ? 0 : 11)
+-	  / (long double)_M_max_load_factor;
++	  / (double)_M_max_load_factor;
+ 	if (__min_bkts >= __n_bkt)
+ 	  return { true,
+-	    _M_next_bkt(std::max<std::size_t>(__builtin_floorl(__min_bkts) + 1,
++	    _M_next_bkt(std::max<std::size_t>(__builtin_floor(__min_bkts) + 1,
+ 					      __n_bkt * _S_growth_factor)) };
+ 
+ 	_M_next_resize
+-	  = __builtin_floorl(__n_bkt * (long double)_M_max_load_factor);
++	  = __builtin_floor(__n_bkt * (double)_M_max_load_factor);
+ 	return { false, 0 };
+       }
+     else
+-- 
+2.43.0
+
diff --git a/0011-Sw64-Port-set-raise-FPE-when-DivbyZero-on-Sw_64-plat.patch b/0011-Sw64-Port-set-raise-FPE-when-DivbyZero-on-Sw_64-plat.patch
new file mode 100644
index 0000000..6f1c35a
--- /dev/null
+++ b/0011-Sw64-Port-set-raise-FPE-when-DivbyZero-on-Sw_64-plat.patch
@@ -0,0 +1,26 @@
+From fbfaea95ad718a602f0df362428be3c40a3f6395 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:04:50 +0800
+Subject: [PATCH 11/13] Sw64 Port: set raise FPE when DivbyZero on Sw_64
+ platform
+
+---
+ intl/dcigettext.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/intl/dcigettext.c b/intl/dcigettext.c
+index a8d4a14d2..281f9340b 100644
+--- a/intl/dcigettext.c
++++ b/intl/dcigettext.c
+@@ -73,7 +73,7 @@ extern int errno;
+   /* Guess whether integer division by zero raises signal SIGFPE.
+      Set to 1 only if you know for sure.  In case of doubt, set to 0.  */
+ # if defined __alpha__ || defined __arm__ || defined __i386__ \
+-     || defined __m68k__ || defined __s390__
++     || defined __m68k__ || defined __s390__ || defined __sw_64__
+ #  define INTDIV0_RAISES_SIGFPE 1
+ # else
+ #  define INTDIV0_RAISES_SIGFPE 0
+-- 
+2.43.0
+
diff --git a/0012-Sw64-Port-add-lex-builtin-support-in-libcpp.patch b/0012-Sw64-Port-add-lex-builtin-support-in-libcpp.patch
new file mode 100644
index 0000000..144bda2
--- /dev/null
+++ b/0012-Sw64-Port-add-lex-builtin-support-in-libcpp.patch
@@ -0,0 +1,34 @@
+From 8b607244e511772c5cda09b14ffbf3c938f3e66c Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:05:36 +0800
+Subject: [PATCH 12/13] Sw64 Port: add lex builtin support in libcpp
+
+---
+ libcpp/lex.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/libcpp/lex.c b/libcpp/lex.c
+index 665297af7..df0329f61 100644
+--- a/libcpp/lex.c
++++ b/libcpp/lex.c
+@@ -168,6 +168,8 @@ acc_char_cmp (word_type val, word_type c)
+   /* We can get exact results using a compare-bytes instruction.  
+      Get (val == c) via (0 >= (val ^ c)).  */
+   return __builtin_alpha_cmpbge (0, val ^ c);
++#elif defined(__GNUC__) && defined(__sw_64__)
++  return __builtin_sw_64_cmpbge (0, val ^ c);
+ #else
+   word_type magic = 0x7efefefeU;
+   if (sizeof(word_type) == 8)
+@@ -186,7 +188,7 @@ static inline int
+ acc_char_index (word_type cmp ATTRIBUTE_UNUSED,
+ 		word_type val ATTRIBUTE_UNUSED)
+ {
+-#if defined(__GNUC__) && defined(__alpha__) && !WORDS_BIGENDIAN
++#if defined(__GNUC__) && (defined(__alpha__) ||  defined(__sw_64__))&& !WORDS_BIGENDIAN
+   /* The cmpbge instruction sets *bits* of the result corresponding to
+      matches in the bytes with no false positives.  */
+   return __builtin_ctzl (cmp);
+-- 
+2.43.0
+
diff --git a/0013-Sw64-Port-libsanitizer.patch b/0013-Sw64-Port-libsanitizer.patch
new file mode 100644
index 0000000..5c50f02
--- /dev/null
+++ b/0013-Sw64-Port-libsanitizer.patch
@@ -0,0 +1,1183 @@
+From e761f00751d2f2263344b52593402ae90f9b0b35 Mon Sep 17 00:00:00 2001
+From: swcompiler <lc@wxiat.com>
+Date: Tue, 15 Oct 2024 14:06:04 +0800
+Subject: [PATCH 13/13] Sw64 Port: libsanitizer
+
+---
+ libsanitizer/asan/asan_allocator.h            |   5 +
+ libsanitizer/asan/asan_interceptors.cpp       |   2 +
+ libsanitizer/asan/asan_mapping.h              |   3 +
+ libsanitizer/configure.tgt                    |   4 +
+ libsanitizer/lsan/lsan_allocator.cpp          |   2 +-
+ libsanitizer/lsan/lsan_allocator.h            |   2 +-
+ libsanitizer/lsan/lsan_common.cpp             |   2 +
+ libsanitizer/lsan/lsan_common.h               |   2 +-
+ .../sanitizer_common_interceptors.inc         |   4 +
+ .../sanitizer_common_syscalls.inc             |   6 +-
+ .../sanitizer_common/sanitizer_linux.cpp      | 132 +++++++++-
+ .../sanitizer_common/sanitizer_linux.h        |   2 +-
+ .../sanitizer_linux_libcdep.cpp               |   8 +-
+ .../sanitizer_common/sanitizer_platform.h     |   8 +
+ .../sanitizer_platform_interceptors.h         |   6 +-
+ .../sanitizer_platform_limits_linux.cpp       |   2 +-
+ .../sanitizer_platform_limits_posix.cpp       |  16 +-
+ .../sanitizer_platform_limits_posix.h         |  22 +-
+ .../sanitizer_common/sanitizer_stacktrace.h   |   2 +
+ .../sanitizer_stoptheworld_linux_libcdep.cpp  |   7 +-
+ .../sanitizer_symbolizer_libcdep.cpp          |   2 +
+ libsanitizer/tsan/Makefile.am                 |   2 +-
+ libsanitizer/tsan/Makefile.in                 |   2 +-
+ libsanitizer/tsan/tsan_interceptors_posix.cpp |  14 +-
+ libsanitizer/tsan/tsan_platform.h             |  38 +++
+ libsanitizer/tsan/tsan_platform_linux.cpp     |   4 +
+ libsanitizer/tsan/tsan_platform_posix.cpp     |   3 +
+ libsanitizer/tsan/tsan_rtl.h                  |   2 +-
+ libsanitizer/tsan/tsan_rtl_sw64.S             | 236 ++++++++++++++++++
+ 29 files changed, 496 insertions(+), 44 deletions(-)
+ create mode 100644 libsanitizer/tsan/tsan_rtl_sw64.S
+
+diff --git a/libsanitizer/asan/asan_allocator.h b/libsanitizer/asan/asan_allocator.h
+index b37d8ef4e..78ab20f4d 100644
+--- a/libsanitizer/asan/asan_allocator.h
++++ b/libsanitizer/asan/asan_allocator.h
+@@ -146,6 +146,11 @@ typedef DefaultSizeClassMap SizeClassMap;
+ const uptr kAllocatorSpace = ~(uptr)0;
+ const uptr kAllocatorSize  =  0x8000000000ULL;  // 500G
+ typedef DefaultSizeClassMap SizeClassMap;
++# elif SANITIZER_SW64
++// If kSpaceBeg is ~0 then SpaceBeg is chosen dynamically my mmap.
++const uptr kAllocatorSpace = ~(uptr)0;
++const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
++typedef DefaultSizeClassMap SizeClassMap;
+ # else
+ const uptr kAllocatorSpace = 0x600000000000ULL;
+ const uptr kAllocatorSize  =  0x40000000000ULL;  // 4T.
+diff --git a/libsanitizer/asan/asan_interceptors.cpp b/libsanitizer/asan/asan_interceptors.cpp
+index b19cf25c7..0f8cf179e 100644
+--- a/libsanitizer/asan/asan_interceptors.cpp
++++ b/libsanitizer/asan/asan_interceptors.cpp
+@@ -41,6 +41,8 @@
+ #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
+ #elif defined(__mips__) && SANITIZER_LINUX
+ #define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.2"
++#elif defined(__sw_64__)
++#define ASAN_PTHREAD_CREATE_VERSION "GLIBC_2.1"
+ #endif
+ 
+ namespace __asan {
+diff --git a/libsanitizer/asan/asan_mapping.h b/libsanitizer/asan/asan_mapping.h
+index 09be90427..44187e375 100644
+--- a/libsanitizer/asan/asan_mapping.h
++++ b/libsanitizer/asan/asan_mapping.h
+@@ -163,6 +163,7 @@ static const u64 kDefaultShort64bitShadowOffset =
+ static const u64 kAArch64_ShadowOffset64 = 1ULL << 36;
+ static const u64 kMIPS32_ShadowOffset32 = 0x0aaa0000;
+ static const u64 kMIPS64_ShadowOffset64 = 1ULL << 37;
++static const u64 kSW64_ShadowOffset64 = 1ULL << 49;
+ static const u64 kPPC64_ShadowOffset64 = 1ULL << 41;
+ static const u64 kSystemZ_ShadowOffset64 = 1ULL << 52;
+ static const u64 kSPARC64_ShadowOffset64 = 1ULL << 43;  // 0x80000000000
+@@ -210,6 +211,8 @@ static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
+ #    define SHADOW_OFFSET kAArch64_ShadowOffset64
+ #  elif defined(__powerpc64__)
+ #    define SHADOW_OFFSET kPPC64_ShadowOffset64
++#  elif defined(__sw_64__)
++#    define SHADOW_OFFSET kSW64_ShadowOffset64
+ #  elif defined(__s390x__)
+ #    define SHADOW_OFFSET kSystemZ_ShadowOffset64
+ #  elif SANITIZER_FREEBSD
+diff --git a/libsanitizer/configure.tgt b/libsanitizer/configure.tgt
+index fa30065b5..9ebad0020 100644
+--- a/libsanitizer/configure.tgt
++++ b/libsanitizer/configure.tgt
+@@ -47,6 +47,10 @@ case "${target}" in
+ 	;;
+   arm*-*-linux*)
+ 	;;
++  sw_64*-*-linux*)
++       TSAN_SUPPORTED=yes
++       LSAN_SUPPORTED=yes
++	;;
+   mips*64*-*-linux*)
+ 	# This clause is only here to not match the supported mips*-*-linux*.
+ 	UNSUPPORTED=1
+diff --git a/libsanitizer/lsan/lsan_allocator.cpp b/libsanitizer/lsan/lsan_allocator.cpp
+index d86c39213..b3ce8dc81 100644
+--- a/libsanitizer/lsan/lsan_allocator.cpp
++++ b/libsanitizer/lsan/lsan_allocator.cpp
+@@ -28,7 +28,7 @@ extern "C" void *memset(void *ptr, int value, uptr num);
+ namespace __lsan {
+ #if defined(__i386__) || defined(__arm__)
+ static const uptr kMaxAllowedMallocSize = 1UL << 30;
+-#elif defined(__mips64) || defined(__aarch64__)
++#elif defined(__mips64) || defined(__aarch64__) || defined(__sw_64__)
+ static const uptr kMaxAllowedMallocSize = 4UL << 30;
+ #else
+ static const uptr kMaxAllowedMallocSize = 8UL << 30;
+diff --git a/libsanitizer/lsan/lsan_allocator.h b/libsanitizer/lsan/lsan_allocator.h
+index e13970997..a5363392e 100644
+--- a/libsanitizer/lsan/lsan_allocator.h
++++ b/libsanitizer/lsan/lsan_allocator.h
+@@ -50,7 +50,7 @@ struct ChunkMetadata {
+ };
+ 
+ #if defined(__mips64) || defined(__aarch64__) || defined(__i386__) || \
+-    defined(__arm__)
++    defined(__arm__) || defined(__sw_64__)
+ template <typename AddressSpaceViewTy>
+ struct AP32 {
+   static const uptr kSpaceBeg = 0;
+diff --git a/libsanitizer/lsan/lsan_common.cpp b/libsanitizer/lsan/lsan_common.cpp
+index 9ff9f4c5d..a86141326 100644
+--- a/libsanitizer/lsan/lsan_common.cpp
++++ b/libsanitizer/lsan/lsan_common.cpp
+@@ -138,6 +138,8 @@ static inline bool CanBeAHeapPointer(uptr p) {
+   return ((p >> 47) == 0);
+ #elif defined(__mips64)
+   return ((p >> 40) == 0);
++#elif defined(__sw_64__)
++  return ((p >> 52) == 0);
+ #elif defined(__aarch64__)
+   unsigned runtimeVMA =
+     (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1);
+diff --git a/libsanitizer/lsan/lsan_common.h b/libsanitizer/lsan/lsan_common.h
+index d24abe31b..ed09db215 100644
+--- a/libsanitizer/lsan/lsan_common.h
++++ b/libsanitizer/lsan/lsan_common.h
+@@ -32,7 +32,7 @@
+ #if (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC) && \
+     (SANITIZER_WORDSIZE == 64) &&                               \
+     (defined(__x86_64__) || defined(__mips64) || defined(__aarch64__) || \
+-     defined(__powerpc64__))
++     defined(__powerpc64__) || defined(__sw_64__))
+ #define CAN_SANITIZE_LEAKS 1
+ #elif defined(__i386__) && \
+     (SANITIZER_LINUX && !SANITIZER_ANDROID || SANITIZER_MAC)
+diff --git a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
+index 50e3558b5..283529f00 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
++++ b/libsanitizer/sanitizer_common/sanitizer_common_interceptors.inc
+@@ -4516,7 +4516,11 @@ INTERCEPTOR(int, shmctl, int shmid, int cmd, void *buf) {
+   }
+   return res;
+ }
++#ifdef SANITIZER_SW64
++#define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION_VER(shmctl, "GLIBC_2.2");
++#else
+ #define INIT_SHMCTL COMMON_INTERCEPT_FUNCTION(shmctl);
++#endif
+ #else
+ #define INIT_SHMCTL
+ #endif
+diff --git a/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc b/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc
+index 31ff48cfd..e83569b99 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc
++++ b/libsanitizer/sanitizer_common/sanitizer_common_syscalls.inc
+@@ -2296,7 +2296,8 @@ POST_SYSCALL(ni_syscall)(long res) {}
+ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
+ #if !SANITIZER_ANDROID && \
+     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
+-     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__))
++     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
++     defined(__sw_64__))
+   if (data) {
+     if (request == ptrace_setregs) {
+       PRE_READ((void *)data, struct_user_regs_struct_sz);
+@@ -2317,7 +2318,8 @@ PRE_SYSCALL(ptrace)(long request, long pid, long addr, long data) {
+ POST_SYSCALL(ptrace)(long res, long request, long pid, long addr, long data) {
+ #if !SANITIZER_ANDROID && \
+     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
+-     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__))
++     defined(__powerpc64__) || defined(__aarch64__) || defined(__s390__) || \
++     defined(__sw_64__))
+   if (res >= 0 && data) {
+     // Note that this is different from the interceptor in
+     // sanitizer_common_interceptors.inc.
+diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
+index 15ccd738d..4ce47654d 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_linux.cpp
++++ b/libsanitizer/sanitizer_common/sanitizer_linux.cpp
+@@ -42,6 +42,16 @@
+ #undef stat
+ #endif
+ 
++#if defined(__sw_64__)
++#define stat kernel_stat
++#define stat64 kernel_stat64
++#include <asm/stat.h>
++#undef stat
++#undef stat64
++#include <cstring>
++#include <cstdio>
++#endif
++
+ #include <dlfcn.h>
+ #include <errno.h>
+ #include <fcntl.h>
+@@ -250,7 +260,7 @@ static void stat64_to_stat(struct stat64 *in, struct stat *out) {
+ }
+ #endif
+ 
+-#if defined(__mips64)
++#if defined(__mips64) || defined(__sw_64__)
+ // Undefine compatibility macros from <sys/stat.h>
+ // so that they would not clash with the kernel_stat
+ // st_[a|m|c]time fields
+@@ -278,6 +288,12 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
+   out->st_size = in->st_size;
+   out->st_blksize = in->st_blksize;
+   out->st_blocks = in->st_blocks;
++#if defined(__sw_64__)
++  // There's no nsecs in sw_64's struct stat
++  out->st_atim.tv_sec = in->st_atime;
++  out->st_mtim.tv_sec = in->st_mtime;
++  out->st_ctim.tv_sec = in->st_ctime;
++#else
+ #if defined(__USE_MISC)     || \
+     defined(__USE_XOPEN2K8) || \
+     defined(SANITIZER_ANDROID)
+@@ -295,6 +311,7 @@ static void kernel_stat_to_stat(struct kernel_stat *in, struct stat *out) {
+   out->st_ctime = in->st_ctime;
+   out->st_atimensec = in->st_ctime_nsec;
+ #endif
++#endif
+ }
+ #endif
+ 
+@@ -305,8 +322,8 @@ uptr internal_stat(const char *path, void *buf) {
+   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
+                           0);
+ #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
+-# if defined(__mips64)
+-  // For mips64, stat syscall fills buffer in the format of kernel_stat
++# if defined(__mips64)  || defined(__sw_64__)
++  // For mips64 and sw_64, stat syscall fills buffer in the format of kernel_stat
+   struct kernel_stat kbuf;
+   int res = internal_syscall(SYSCALL(stat), path, &kbuf);
+   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+@@ -330,8 +347,8 @@ uptr internal_lstat(const char *path, void *buf) {
+   return internal_syscall(SYSCALL(newfstatat), AT_FDCWD, (uptr)path, (uptr)buf,
+                           AT_SYMLINK_NOFOLLOW);
+ #elif SANITIZER_LINUX_USES_64BIT_SYSCALLS
+-# if SANITIZER_MIPS64
+-  // For mips64, lstat syscall fills buffer in the format of kernel_stat
++# if SANITIZER_MIPS64 || SANITIZER_SW64
++  // For mips64 and sw_64, lstat syscall fills buffer in the format of kernel_stat
+   struct kernel_stat kbuf;
+   int res = internal_syscall(SYSCALL(lstat), path, &kbuf);
+   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+@@ -350,8 +367,8 @@ uptr internal_lstat(const char *path, void *buf) {
+ uptr internal_fstat(fd_t fd, void *buf) {
+ #if SANITIZER_FREEBSD || SANITIZER_OPENBSD || \
+     SANITIZER_LINUX_USES_64BIT_SYSCALLS
+-#if SANITIZER_MIPS64 && !SANITIZER_OPENBSD
+-  // For mips64, fstat syscall fills buffer in the format of kernel_stat
++#if (SANITIZER_MIPS64 || SANITIZER_SW64) && !SANITIZER_OPENBSD
++  // For mips64 and sw_64, fstat syscall fills buffer in the format of kernel_stat
+   struct kernel_stat kbuf;
+   int res = internal_syscall(SYSCALL(fstat), fd, &kbuf);
+   kernel_stat_to_stat(&kbuf, (struct stat *)buf);
+@@ -727,6 +744,19 @@ uptr internal_waitpid(int pid, int *status, int options) {
+                           0 /* rusage */);
+ }
+ 
++#ifdef __sw_64__
++uptr internal_getpid() {
++  return internal_syscall(SYSCALL(getxpid));
++}
++
++uptr internal_getppid() {
++  uptr ppid;
++  internal_syscall(SYSCALL(getxpid));
++  asm("mov $20, %0\n"
++     :"=r"(ppid));
++  return ppid;
++}
++#else
+ uptr internal_getpid() {
+   return internal_syscall(SYSCALL(getpid));
+ }
+@@ -734,6 +764,7 @@ uptr internal_getpid() {
+ uptr internal_getppid() {
+   return internal_syscall(SYSCALL(getppid));
+ }
++#endif
+ 
+ uptr internal_getdents(fd_t fd, struct linux_dirent *dirp, unsigned int count) {
+ #if SANITIZER_FREEBSD
+@@ -760,7 +791,7 @@ uptr internal_sigaltstack(const void *ss, void *oss) {
+ }
+ 
+ int internal_fork() {
+-#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS
++#if SANITIZER_USES_CANONICAL_LINUX_SYSCALLS || SANITIZER_SW64
+   return internal_syscall(SYSCALL(clone), SIGCHLD, 0);
+ #else
+   return internal_syscall(SYSCALL(fork));
+@@ -816,7 +847,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
+     // rt_sigaction, so we need to do the same (we'll need to reimplement the
+     // restorers; for x86_64 the restorer address can be obtained from
+     // oldact->sa_restorer upon a call to sigaction(xxx, NULL, oldact).
+-#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
++#if (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64
+     k_act.sa_restorer = u_act->sa_restorer;
+ #endif
+   }
+@@ -832,7 +863,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact) {
+     internal_memcpy(&u_oldact->sa_mask, &k_oldact.sa_mask,
+                     sizeof(__sanitizer_kernel_sigset_t));
+     u_oldact->sa_flags = k_oldact.sa_flags;
+-#if !SANITIZER_ANDROID || !SANITIZER_MIPS32
++#if (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64
+     u_oldact->sa_restorer = k_oldact.sa_restorer;
+ #endif
+   }
+@@ -1035,6 +1066,11 @@ uptr GetMaxVirtualAddress() {
+   return (1ULL << (MostSignificantSetBitIndex(GET_CURRENT_FRAME()) + 1)) - 1;
+ # elif defined(__mips64)
+   return (1ULL << 40) - 1;  // 0x000000ffffffffffUL;
++# elif defined(__sw_64__)
++// SW64 has a 42-bit user address space(4TiB)
++// according to TASK_SIZE in kernel.
++// In sw6b PGTABLE is SW_4LEVEL.
++  return (1ULL << 52) - 1;  // 0x000fffffffffffffUL;
+ # elif defined(__s390x__)
+   return (1ULL << 53) - 1;  // 0x001fffffffffffffUL;
+ #elif defined(__sparc__)
+@@ -1326,6 +1362,72 @@ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+                        : "memory", "$29" );
+   return res;
+ }
++#elif defined(__sw_64__)
++uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
++		    int *parent_tidptr, void *newtls, int *child_tidptr) {
++  long long res;
++  if (!fn || !child_stack)
++    return -EINVAL;
++  child_stack = (char *)child_stack - 4 * sizeof(unsigned long long);
++  ((unsigned long long *)child_stack)[0] = (uptr)fn;
++  ((unsigned long long *)child_stack)[1] = (uptr)arg;
++  ((unsigned long long *)child_stack)[2] = (uptr)flags;
++
++  register void *r20 __asm__("$20") = newtls;
++  register int *r22 __asm__("$22") = child_tidptr;
++
++  __asm__ __volatile__(
++										/* $v0 = syscall($v0 = __NR_clone,
++			* $a0 = flags,
++			* $a1 = child_stack,
++			* $a2 = parent_tidptr,
++			* $a3 = child_tidptr,
++			* $a4 = new_tls)
++			*/
++		       "mov %[flag],$16\n"
++		       "mov %[usp],$17\n"
++		       "mov %[ptid],$18\n"
++		       "ldl $19,0($sp)\n"
++		       "mov %5,$20\n"
++		       /* Store the fifth argument on stack
++			* if we are using 32-bit abi.
++			*/
++		       "ldi $0,%[NR_clone];\n"
++		       "sys_call 0x83;\n"
++
++		       /* if ($v0 != 0)
++			* return;
++			*/
++		       "bne $0,1f;\n"
++		       "mov $31,$15;\n"
++		       /* Call "fn(arg)". */
++		       "ldl $27,0($sp);\n"
++		       "ldl $16,8($sp);\n"
++		       "ldi $sp,32($sp);\n"
++
++		       "call $26,($27),0;\n"
++		       "ldgp  $29, 0($26);\n"
++
++		       /* Call _exit($v0). */
++		       "mov $0,$16;\n"
++		       "ldi $0,%[NR_exit];\n"
++		       "sys_call 0x83;\n"
++
++		       /* Return to parent. */
++		     "1:\n"
++		       : "=r" (res)
++		       : [flag]"r"(flags),
++			 [usp]"r"(child_stack),
++			 [ptid]"r"(parent_tidptr),
++			 "r"(r20),
++			 "r"(r22),
++			 [NR_clone]"i"(__NR_clone),
++			 [NR_exit]"i"(__NR_exit)
++		       : "memory", "$30");
++
++  return res;
++}
++
+ #elif defined(__aarch64__)
+ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+                     int *parent_tidptr, void *newtls, int *child_tidptr) {
+@@ -1879,6 +1981,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
+   *pc = ucontext->uc_mcontext.pc;
+   *bp = ucontext->uc_mcontext.regs[29];
+   *sp = ucontext->uc_mcontext.sp;
++#elif defined(__sw_64__)
++  ucontext_t *ucontext = (ucontext_t*)context;
++  *pc = ucontext->uc_mcontext.sc_pc;
++  *bp = ucontext->uc_mcontext.sc_regs[15];
++  *sp = ucontext->uc_mcontext.sc_regs[30];
+ #elif defined(__hppa__)
+   ucontext_t *ucontext = (ucontext_t*)context;
+   *pc = ucontext->uc_mcontext.sc_iaoq[0];
+@@ -1966,6 +2073,11 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
+   *pc = ucontext->uc_mcontext.pc;
+   *bp = ucontext->uc_mcontext.gregs[30];
+   *sp = ucontext->uc_mcontext.gregs[29];
++#elif defined(__sw_64__)
++  ucontext_t *ucontext = (ucontext_t*)context;
++  *pc = ucontext->uc_mcontext.sc_pc;
++  *bp = ucontext->uc_mcontext.sc_regs[15];
++  *sp = ucontext->uc_mcontext.sc_regs[30];
+ #elif defined(__s390__)
+   ucontext_t *ucontext = (ucontext_t*)context;
+ # if defined(__s390x__)
+diff --git a/libsanitizer/sanitizer_common/sanitizer_linux.h b/libsanitizer/sanitizer_common/sanitizer_linux.h
+index c28347ad9..05976a700 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_linux.h
++++ b/libsanitizer/sanitizer_common/sanitizer_linux.h
+@@ -61,7 +61,7 @@ int internal_sigaction_norestorer(int signum, const void *act, void *oldact);
+ void internal_sigdelset(__sanitizer_sigset_t *set, int signum);
+ #if defined(__x86_64__) || defined(__mips__) || defined(__aarch64__) \
+   || defined(__powerpc64__) || defined(__s390__) || defined(__i386__) \
+-  || defined(__arm__)
++  || defined(__arm__) || defined(__sw_64__)
+ uptr internal_clone(int (*fn)(void *), void *child_stack, int flags, void *arg,
+                     int *parent_tidptr, void *newtls, int *child_tidptr);
+ #endif
+diff --git a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
+index e09d568d8..18e7555ba 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
++++ b/libsanitizer/sanitizer_common/sanitizer_linux_libcdep.cpp
+@@ -262,7 +262,7 @@ void InitTlsSize() { }
+ 
+ #if (defined(__x86_64__) || defined(__i386__) || defined(__mips__) ||          \
+      defined(__aarch64__) || defined(__powerpc64__) || defined(__s390__) ||    \
+-     defined(__arm__)) &&                                                      \
++     defined(__arm__) || defined(__sw_64__)) &&				\
+     SANITIZER_LINUX && !SANITIZER_ANDROID
+ // sizeof(struct pthread) from glibc.
+ static atomic_uintptr_t thread_descriptor_size;
+@@ -309,6 +309,8 @@ uptr ThreadDescriptorSize() {
+   val = 1776; // from glibc.ppc64le 2.20-8.fc21
+ #elif defined(__s390__)
+   val = FIRST_32_SECOND_64(1152, 1776); // valid for glibc 2.22
++#elif defined(__sw_64__)
++  val = 1776;
+ #endif
+   if (val)
+     atomic_store_relaxed(&thread_descriptor_size, val);
+@@ -356,7 +358,7 @@ uptr ThreadSelf() {
+                 rdhwr %0,$29;\
+                 .set pop" : "=r" (thread_pointer));
+   descr_addr = thread_pointer - kTlsTcbOffset - TlsPreTcbSize();
+-# elif defined(__aarch64__) || defined(__arm__)
++# elif defined(__aarch64__) || defined(__arm__) || defined(__sw_64__)
+   descr_addr = reinterpret_cast<uptr>(__builtin_thread_pointer()) -
+                                       ThreadDescriptorSize();
+ # elif defined(__s390__)
+@@ -435,7 +437,7 @@ static void GetTls(uptr *addr, uptr *size) {
+   *addr -= *size;
+   *addr += ThreadDescriptorSize();
+ # elif defined(__mips__) || defined(__aarch64__) || defined(__powerpc64__) \
+-    || defined(__arm__)
++    || defined(__arm__) || defined(__sw_64__)
+   *addr = ThreadSelf();
+   *size = GetTlsSize();
+ # else
+diff --git a/libsanitizer/sanitizer_common/sanitizer_platform.h b/libsanitizer/sanitizer_common/sanitizer_platform.h
+index c68bfa258..dea617abd 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_platform.h
++++ b/libsanitizer/sanitizer_common/sanitizer_platform.h
+@@ -147,6 +147,12 @@
+ # define SANITIZER_MIPS64 0
+ #endif
+ 
++#if defined(__sw_64__)
++# define SANITIZER_SW64 1
++#else
++# define SANITIZER_SW64 0
++#endif
++
+ #if defined(__s390__)
+ # define SANITIZER_S390 1
+ # if defined(__s390x__)
+@@ -242,6 +248,8 @@
+ # endif
+ #elif defined(__sparc__)
+ #define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 52)
++#elif defined(__sw_64__)
++# define SANITIZER_MMAP_RANGE_SIZE 1ULL << 52
+ #else
+ # define SANITIZER_MMAP_RANGE_SIZE FIRST_32_SECOND_64(1ULL << 32, 1ULL << 47)
+ #endif
+diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
+index 61a6b82ef..820d458be 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
++++ b/libsanitizer/sanitizer_common/sanitizer_platform_interceptors.h
+@@ -225,7 +225,11 @@
+ #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
+ #define SANITIZER_INTERCEPT_TIME SI_POSIX
+ #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID || SI_SOLARIS
++#if SANITIZER_SW64
++#define SANITIZER_INTERCEPT_GLOB64 0
++#else
+ #define SANITIZER_INTERCEPT_GLOB64 SI_LINUX_NOT_ANDROID
++#endif
+ #define SANITIZER_INTERCEPT_WAIT SI_POSIX
+ #define SANITIZER_INTERCEPT_INET SI_POSIX
+ #define SANITIZER_INTERCEPT_PTHREAD_GETSCHEDPARAM (SI_POSIX && !SI_OPENBSD)
+@@ -261,7 +265,7 @@
+ #if SI_LINUX_NOT_ANDROID && \
+   (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
+     defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+-    defined(__s390__))
++    defined(__s390__) || defined(__sw_64__))
+ #define SANITIZER_INTERCEPT_PTRACE 1
+ #else
+ #define SANITIZER_INTERCEPT_PTRACE 0
+diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
+index f22f50391..7a3e3ab60 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
++++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_linux.cpp
+@@ -68,7 +68,7 @@ namespace __sanitizer {
+ 
+ #if !defined(__powerpc64__) && !defined(__x86_64__) && !defined(__aarch64__)\
+                             && !defined(__mips__) && !defined(__s390__)\
+-                            && !defined(__sparc__) && !defined(__riscv)
++			    && !defined(__sparc__) && !defined(__riscv) && !defined(__sw_64__)
+ COMPILER_CHECK(struct___old_kernel_stat_sz == sizeof(struct __old_kernel_stat));
+ #endif
+ 
+diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
+index 8b4162bcd..5585755f3 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
++++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.cpp
+@@ -92,7 +92,7 @@
+ #if SANITIZER_LINUX
+ # include <utime.h>
+ # include <sys/ptrace.h>
+-# if defined(__mips64) || defined(__aarch64__) || defined(__arm__)
++# if defined(__mips64) || defined(__aarch64__) || defined(__arm__) || defined(__sw_64__) // for pt_regs
+ #  include <asm/ptrace.h>
+ #  ifdef __arm__
+ typedef struct user_fpregs elf_fpregset_t;
+@@ -128,7 +128,7 @@ typedef struct user_fpregs elf_fpregset_t;
+ #include <sys/shm.h>
+ #include <sys/statvfs.h>
+ #include <sys/timex.h>
+-#if defined(__mips64)
++#if defined(__mips64) || defined(__sw_64__) // for elf_gregset_t
+ # include <sys/procfs.h>
+ #endif
+ #include <sys/user.h>
+@@ -232,7 +232,7 @@ namespace __sanitizer {
+   // has been removed from glibc 2.28.
+ #if defined(__aarch64__) || defined(__s390x__) || defined (__mips64) \
+   || defined(__powerpc64__) || defined(__arch64__) || defined(__sparcv9) \
+-  || defined(__x86_64__) || (defined(__riscv) && __riscv_xlen == 64)
++  || defined(__x86_64__) || (defined(__riscv) && __riscv_xlen == 64) || defined(__sw_64__)
+ #define SIZEOF_STRUCT_USTAT 32
+ #elif defined(__arm__) || defined(__i386__) || defined(__mips__) \
+   || defined(__powerpc__) || defined(__s390__) || defined(__sparc__)
+@@ -307,11 +307,11 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
+ #if SANITIZER_LINUX && !SANITIZER_ANDROID && \
+     (defined(__i386) || defined(__x86_64) || defined(__mips64) || \
+       defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+-      defined(__s390__))
++      defined(__s390__) || defined(__sw_64__))
+ #if defined(__mips64) || defined(__powerpc64__) || defined(__arm__)
+   unsigned struct_user_regs_struct_sz = sizeof(struct pt_regs);
+   unsigned struct_user_fpregs_struct_sz = sizeof(elf_fpregset_t);
+-#elif defined(__aarch64__)
++#elif defined(__aarch64__) || defined(__sw_64__)
+   unsigned struct_user_regs_struct_sz = sizeof(struct user_pt_regs);
+   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpsimd_state);
+ #elif defined(__s390__)
+@@ -322,12 +322,12 @@ unsigned struct_ElfW_Phdr_sz = sizeof(Elf_Phdr);
+   unsigned struct_user_fpregs_struct_sz = sizeof(struct user_fpregs_struct);
+ #endif // __mips64 || __powerpc64__ || __aarch64__
+ #if defined(__x86_64) || defined(__mips64) || defined(__powerpc64__) || \
+-    defined(__aarch64__) || defined(__arm__) || defined(__s390__)
++    defined(__aarch64__) || defined(__arm__) || defined(__s390__) || defined(__sw_64__)
+   unsigned struct_user_fpxregs_struct_sz = 0;
+ #else
+   unsigned struct_user_fpxregs_struct_sz = sizeof(struct user_fpxregs_struct);
+ #endif // __x86_64 || __mips64 || __powerpc64__ || __aarch64__ || __arm__
+-// || __s390__
++// || __s390__ || __sw_64__
+ #ifdef __arm__
+   unsigned struct_user_vfpregs_struct_sz = ARM_VFPREGS_SIZE;
+ #else
+@@ -1059,7 +1059,7 @@ CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_mask);
+ // didn't exist.
+ CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_flags);
+ #endif
+-#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32)
++#if SANITIZER_LINUX && (!SANITIZER_ANDROID || !SANITIZER_MIPS32) && !SANITIZER_SW64
+ CHECK_STRUCT_SIZE_AND_OFFSET(sigaction, sa_restorer);
+ #endif
+ 
+diff --git a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
+index d82fd5e40..9c572f4d3 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
++++ b/libsanitizer/sanitizer_common/sanitizer_platform_limits_posix.h
+@@ -73,6 +73,9 @@ const unsigned struct_kernel_stat64_sz = 104;
+ #elif defined(__aarch64__)
+ const unsigned struct_kernel_stat_sz = 128;
+ const unsigned struct_kernel_stat64_sz = 104;
++#elif defined(__sw_64__)
++const unsigned struct_kernel_stat_sz = 80;
++const unsigned struct_kernel_stat64_sz = 136;
+ #elif defined(__powerpc__) && !defined(__powerpc64__)
+ const unsigned struct_kernel_stat_sz = 72;
+ const unsigned struct_kernel_stat64_sz = 104;
+@@ -101,6 +104,9 @@ const unsigned struct_kernel_stat64_sz = 104;
+ #elif defined(__riscv) && __riscv_xlen == 64
+ const unsigned struct_kernel_stat_sz = 128;
+ const unsigned struct_kernel_stat64_sz = 104;
++#elif defined(__sw_64__)
++const unsigned struct_kernel_stat_sz = 80;
++const unsigned struct_kernel_stat64_sz = 136;
+ #endif
+ struct __sanitizer_perf_event_attr {
+   unsigned type;
+@@ -259,15 +265,15 @@ struct __sanitizer_shmid_ds {
+   u64 shm_ctime;
+ #else
+   uptr shm_atime;
+-#if !defined(_LP64) && !defined(__mips__)
++#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__)
+   uptr __unused1;
+ #endif
+   uptr shm_dtime;
+-#if !defined(_LP64) && !defined(__mips__)
++#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__)
+   uptr __unused2;
+ #endif
+   uptr shm_ctime;
+-#if !defined(_LP64) && !defined(__mips__)
++#if !defined(_LP64) && !defined(__mips__) && !defined(__sw_64__)
+   uptr __unused3;
+ #endif
+ #endif
+@@ -509,7 +515,7 @@ typedef int __sanitizer_clockid_t;
+ 
+ #if SANITIZER_LINUX
+ #if defined(_LP64) || defined(__x86_64__) || defined(__powerpc__) || \
+-    defined(__mips__)
++    defined(__mips__) && !defined(__sw_64__)
+ typedef unsigned __sanitizer___kernel_uid_t;
+ typedef unsigned __sanitizer___kernel_gid_t;
+ #else
+@@ -522,7 +528,7 @@ typedef long long __sanitizer___kernel_off_t;
+ typedef long __sanitizer___kernel_off_t;
+ #endif
+ 
+-#if defined(__powerpc__) || defined(__mips__)
++#if defined(__powerpc__) || defined(__mips__) && !defined(__sw_64__)
+ typedef unsigned int __sanitizer___kernel_old_uid_t;
+ typedef unsigned int __sanitizer___kernel_old_gid_t;
+ #else
+@@ -634,7 +640,7 @@ struct __sanitizer_sigaction {
+ #endif
+ #endif
+ #endif
+-#if SANITIZER_LINUX
++#if SANITIZER_LINUX && !defined(__sw_64__)
+   void (*sa_restorer)();
+ #endif
+ #if defined(__mips__) && (SANITIZER_WORDSIZE == 32)
+@@ -797,7 +803,7 @@ typedef void __sanitizer_FILE;
+ #if SANITIZER_LINUX && !SANITIZER_ANDROID &&                               \
+     (defined(__i386) || defined(__x86_64) || defined(__mips64) ||          \
+      defined(__powerpc64__) || defined(__aarch64__) || defined(__arm__) || \
+-     defined(__s390__))
++     defined(__s390__) || defined(__sw_64__))
+ extern unsigned struct_user_regs_struct_sz;
+ extern unsigned struct_user_fpregs_struct_sz;
+ extern unsigned struct_user_fpxregs_struct_sz;
+@@ -883,7 +889,7 @@ struct __sanitizer_cookie_io_functions_t {
+ #define IOC_NRBITS 8
+ #define IOC_TYPEBITS 8
+ #if defined(__powerpc__) || defined(__powerpc64__) || defined(__mips__) || \
+-    defined(__sparc__)
++    defined(__sparc__) || defined(__sw_64__)
+ #define IOC_SIZEBITS 13
+ #define IOC_DIRBITS 3
+ #define IOC_NONE 1U
+diff --git a/libsanitizer/sanitizer_common/sanitizer_stacktrace.h b/libsanitizer/sanitizer_common/sanitizer_stacktrace.h
+index f1f29e9f3..67ba06cf7 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_stacktrace.h
++++ b/libsanitizer/sanitizer_common/sanitizer_stacktrace.h
+@@ -22,6 +22,8 @@ static const u32 kStackTraceMax = 256;
+ 
+ #if SANITIZER_LINUX && defined(__mips__)
+ # define SANITIZER_CAN_FAST_UNWIND 0
++#elif defined(__sw_64__)
++# define SANITIZER_CAN_FAST_UNWIND 0
+ #elif SANITIZER_WINDOWS
+ # define SANITIZER_CAN_FAST_UNWIND 0
+ #elif SANITIZER_OPENBSD
+diff --git a/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+index 651d5056d..0cdfa8fad 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
++++ b/libsanitizer/sanitizer_common/sanitizer_stoptheworld_linux_libcdep.cpp
+@@ -16,7 +16,7 @@
+ #if SANITIZER_LINUX && (defined(__x86_64__) || defined(__mips__) || \
+                         defined(__aarch64__) || defined(__powerpc64__) || \
+                         defined(__s390__) || defined(__i386__) || \
+-                        defined(__arm__))
++			defined(__arm__) || defined(__sw_64__))
+ 
+ #include "sanitizer_stoptheworld.h"
+ 
+@@ -498,6 +498,11 @@ typedef struct user regs_struct;
+ #  define REG_SP regs[EF_REG29]
+ # endif
+ 
++#elif defined(__sw_64__)
++typedef struct user regs_struct;
++#define REG_SP regs[EF_SP]
++#define ARCH_IOVEC_FOR_GETREGSET
++
+ #elif defined(__aarch64__)
+ typedef struct user_pt_regs regs_struct;
+ #define REG_SP sp
+diff --git a/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp b/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+index 3b19a6836..a5c7252cb 100644
+--- a/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
++++ b/libsanitizer/sanitizer_common/sanitizer_symbolizer_libcdep.cpp
+@@ -270,6 +270,8 @@ class LLVMSymbolizerProcess : public SymbolizerProcess {
+     const char* const kSymbolizerArch = "--default-arch=s390x";
+ #elif defined(__s390__)
+     const char* const kSymbolizerArch = "--default-arch=s390";
++#elif defined(__sw_64__)
++    const char* const kSymbolizerArch = "--default-arch=sw_64";
+ #else
+     const char* const kSymbolizerArch = "--default-arch=unknown";
+ #endif
+diff --git a/libsanitizer/tsan/Makefile.am b/libsanitizer/tsan/Makefile.am
+index 5d37abd20..32b87fc6f 100644
+--- a/libsanitizer/tsan/Makefile.am
++++ b/libsanitizer/tsan/Makefile.am
+@@ -49,7 +49,7 @@ tsan_files = \
+ 	tsan_sync.cpp 
+ 
+ libtsan_la_SOURCES = $(tsan_files)
+-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S
++EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_sw64.S
+ libtsan_la_LIBADD = $(top_builddir)/sanitizer_common/libsanitizer_common.la $(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
+ libtsan_la_DEPENDENCIES = $(top_builddir)/sanitizer_common/libsanitizer_common.la $(top_builddir)/interception/libinterception.la $(TSAN_TARGET_DEPENDENT_OBJECTS)
+ if LIBBACKTRACE_SUPPORTED
+diff --git a/libsanitizer/tsan/Makefile.in b/libsanitizer/tsan/Makefile.in
+index 74896427e..6448de255 100644
+--- a/libsanitizer/tsan/Makefile.in
++++ b/libsanitizer/tsan/Makefile.in
+@@ -451,7 +451,7 @@ tsan_files = \
+ 	tsan_sync.cpp 
+ 
+ libtsan_la_SOURCES = $(tsan_files)
+-EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S
++EXTRA_libtsan_la_SOURCES = tsan_rtl_amd64.S tsan_rtl_aarch64.S tsan_rtl_mips64.S tsan_rtl_ppc64.S tsan_rtl_sw64.S
+ libtsan_la_LIBADD =  \
+ 	$(top_builddir)/sanitizer_common/libsanitizer_common.la \
+ 	$(top_builddir)/interception/libinterception.la \
+diff --git a/libsanitizer/tsan/tsan_interceptors_posix.cpp b/libsanitizer/tsan/tsan_interceptors_posix.cpp
+index 8aea1e4ec..c30ceeaf3 100644
+--- a/libsanitizer/tsan/tsan_interceptors_posix.cpp
++++ b/libsanitizer/tsan/tsan_interceptors_posix.cpp
+@@ -73,7 +73,7 @@ struct ucontext_t {
+ };
+ #endif
+ 
+-#if defined(__x86_64__) || defined(__mips__) || SANITIZER_PPC64V1
++#if defined(__x86_64__) || defined(__mips__) || defined(__sw_64__) || SANITIZER_PPC64V1
+ #define PTHREAD_ABI_BASE  "GLIBC_2.3.2"
+ #elif defined(__aarch64__) || SANITIZER_PPC64V2
+ #define PTHREAD_ABI_BASE  "GLIBC_2.17"
+@@ -142,7 +142,7 @@ typedef long long_t;
+ # define F_TLOCK 2      /* Test and lock a region for exclusive use.  */
+ # define F_TEST  3      /* Test a region for other processes locks.  */
+ 
+-#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD
++#if SANITIZER_FREEBSD || SANITIZER_MAC || SANITIZER_NETBSD || SANITIZER_SW64
+ const int SA_SIGINFO = 0x40;
+ const int SIG_SETMASK = 3;
+ #elif defined(__mips__)
+@@ -2371,7 +2371,7 @@ int sigaction_impl(int sig, const __sanitizer_sigaction *act,
+     sigactions[sig].sa_flags = *(volatile int const *)&act->sa_flags;
+     internal_memcpy(&sigactions[sig].sa_mask, &act->sa_mask,
+                     sizeof(sigactions[sig].sa_mask));
+-#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD
++#if !SANITIZER_FREEBSD && !SANITIZER_MAC && !SANITIZER_NETBSD && !SANITIZER_SW64
+     sigactions[sig].sa_restorer = act->sa_restorer;
+ #endif
+     internal_memcpy(&newact, act, sizeof(newact));
+@@ -2674,6 +2674,14 @@ void InitializeInterceptors() {
+   TSAN_INTERCEPT(pthread_timedjoin_np);
+   #endif
+ 
++  #if SANITIZER_SW64
++  // sw64 have two version of timer function, osf_xxx with @glibc2.0,
++  // which is 32bits syscall for old kernal. xxx with @glibc2.1 is 64bits
++  // syscall for new kernal, we use the new one.
++  TSAN_INTERCEPT_VER(setitimer, "GLIBC_2.1");
++  TSAN_INTERCEPT_VER(setitimer, "GLIBC_2.1");
++  #endif
++
+   TSAN_INTERCEPT_VER(pthread_cond_init, PTHREAD_ABI_BASE);
+   TSAN_INTERCEPT_VER(pthread_cond_signal, PTHREAD_ABI_BASE);
+   TSAN_INTERCEPT_VER(pthread_cond_broadcast, PTHREAD_ABI_BASE);
+diff --git a/libsanitizer/tsan/tsan_platform.h b/libsanitizer/tsan/tsan_platform.h
+index 63eb14fcd..e4e2e2961 100644
+--- a/libsanitizer/tsan/tsan_platform.h
++++ b/libsanitizer/tsan/tsan_platform.h
+@@ -352,6 +352,44 @@ struct Mapping47 {
+ 
+ // Indicates the runtime will define the memory regions at runtime.
+ #define TSAN_RUNTIME_VMA 1
++
++#elif defined(__sw_64__)
++
++// TODO(sw64_map): as sw64 kernal doesn't map such large space, we just map
++// it for test, for now it works will.
++// TODO(sw64_map_la): as sw64 map all space in low address, we set all user
++// space
++// in Lo address, perhaps there is some way to change it.
++/*
++C/C++ on linux/sw64 (52-bit VMA)
++0000 0000 0000 - 0001 2000 0000: modules and main thread stack
++0001 2000 0000 - 0008 0000 0000: main binary
++0400 0000 0000 - 0600 0000 0000: pie main binary (including heap)
++0600 0000 0000 - 4000 0000 0000: -
++4000 0000 0000 - 6000 0000 0000: shadow
++6000 0000 0000 - 7000 0000 0000: metainfo
++7000 0000 0000 - 7c00 0000 0000: trace
++*/
++
++struct Mapping {
++  static const uptr kLoAppMemBeg   = 0x0000000000000ull;
++  static const uptr kLoAppMemEnd   = 0x0600000000000ull;
++  static const uptr kShadowBeg     = 0x4000000000000ull;
++  static const uptr kShadowEnd     = 0x6000000000000ull;
++  static const uptr kHiAppMemBeg   = 0xfff0000000000ull;
++  static const uptr kHiAppMemEnd   = 0xfff0000000000ull;
++  static const uptr kAppMemMsk     = 0x0000000000000ull;
++  //distans between lo address to shadow begin
++  static const uptr kAppMemXor     = 0x1000000000000ull;
++  static const uptr kHeapMemBeg    = 0xff00000000000ull;
++  static const uptr kHeapMemEnd    = 0xff00000000000ull;
++  static const uptr kMetaShadowBeg = 0x6000000000000ull;
++  static const uptr kMetaShadowEnd = 0x7000000000000ull;
++  static const uptr kTraceMemBeg   = 0x7000000000000ull;
++  static const uptr kTraceMemEnd   = 0x7c00000000000ull;
++  static const uptr kVdsoBeg       = 0x3c00000000000000ull;
++};
++#define TSAN_RUNTIME_VMA 1
+ #endif
+ 
+ #elif SANITIZER_GO && !SANITIZER_WINDOWS && defined(__x86_64__)
+diff --git a/libsanitizer/tsan/tsan_platform_linux.cpp b/libsanitizer/tsan/tsan_platform_linux.cpp
+index 33fa586ca..7d3c2eb38 100644
+--- a/libsanitizer/tsan/tsan_platform_linux.cpp
++++ b/libsanitizer/tsan/tsan_platform_linux.cpp
+@@ -378,6 +378,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
+   return mangled_sp ^ xor_key;
+ #elif defined(__mips__)
+   return mangled_sp;
++#elif defined(__sw_64__)
++  return mangled_sp;
+ #else
+   #error "Unknown platform"
+ #endif
+@@ -394,6 +396,8 @@ static uptr UnmangleLongJmpSp(uptr mangled_sp) {
+ #  define LONG_JMP_SP_ENV_SLOT 13
+ # elif defined(__mips64)
+ #  define LONG_JMP_SP_ENV_SLOT 1
++# elif defined(__sw_64__)
++#  define LONG_JMP_SP_ENV_SLOT 8
+ # else
+ #  define LONG_JMP_SP_ENV_SLOT 6
+ # endif
+diff --git a/libsanitizer/tsan/tsan_platform_posix.cpp b/libsanitizer/tsan/tsan_platform_posix.cpp
+index 1a0faee02..546795166 100644
+--- a/libsanitizer/tsan/tsan_platform_posix.cpp
++++ b/libsanitizer/tsan/tsan_platform_posix.cpp
+@@ -89,6 +89,9 @@ void InitializeShadowMemory() {
+   } else {
+     DCHECK(0);
+   }
++#elif defined(__sw_64__)
++  uptr kMadviseRangeBeg  = 0x210000000000ull;
++  uptr kMadviseRangeSize = 0x010000000000ull;
+ #endif
+   NoHugePagesInShadow(MemToShadow(kMadviseRangeBeg),
+                       kMadviseRangeSize * kShadowMultiplier);
+diff --git a/libsanitizer/tsan/tsan_rtl.h b/libsanitizer/tsan/tsan_rtl.h
+index c38fc43a9..35f904f8f 100644
+--- a/libsanitizer/tsan/tsan_rtl.h
++++ b/libsanitizer/tsan/tsan_rtl.h
+@@ -54,7 +54,7 @@ namespace __tsan {
+ 
+ #if !SANITIZER_GO
+ struct MapUnmapCallback;
+-#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__)
++#if defined(__mips64) || defined(__aarch64__) || defined(__powerpc__) || defined(__sw_64__)
+ 
+ struct AP32 {
+   static const uptr kSpaceBeg = 0;
+diff --git a/libsanitizer/tsan/tsan_rtl_sw64.S b/libsanitizer/tsan/tsan_rtl_sw64.S
+new file mode 100644
+index 000000000..f74bfef8d
+--- /dev/null
++++ b/libsanitizer/tsan/tsan_rtl_sw64.S
+@@ -0,0 +1,236 @@
++// The content of this file is sw64-only:
++#if defined(__sw_64__)
++
++#include "sanitizer_common/sanitizer_asm.h"
++
++.section .text
++.set noreorder
++
++ASM_HIDDEN(__tsan_setjmp)
++.comm _ZN14__interception11real_setjmpE,8,8
++.globl ASM_SYMBOL_INTERCEPTOR(setjmp)
++ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(setjmp))
++ASM_SYMBOL_INTERCEPTOR(setjmp):
++	ldgp $r29, 0($r27)
++  CFI_STARTPROC
++
++  // Save frame/link register
++	ldi $sp, -32($sp)
++	stl $r26, 0($sp)
++	stl $fp, 8($sp)
++  CFI_DEF_CFA_OFFSET (32)
++  CFI_OFFSET (26, -32)
++  CFI_OFFSET (15, -24)
++
++  // Adjust the SP for previous frame
++  ldi $fp,0($sp)
++  CFI_DEF_CFA_REGISTER (15)
++
++  // Save env parameter
++	stl $r16, 16($sp)
++  CFI_OFFSET (0, -16)
++
++  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
++  ldi   $r16, 32($sp)
++
++  // call tsan interceptor
++	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
++	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
++	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
++  call $r26, ($r27), 0
++	ldgp $r29, 0($r26)
++
++  // Restore env parameter
++  ldl     $r16, 16($sp)
++  CFI_RESTORE (0)
++
++  // Restore frame/link register
++	ldl $fp, 8($sp)
++	ldl $r26, 0($sp)
++  CFI_RESTORE (15)
++  CFI_RESTORE (26)
++  CFI_DEF_CFA (31, 0)
++  ldi $sp, 32($sp)
++
++  // tail jump to libc setjmp
++	ldl $r27, _ZN14__interception11real_setjmpE($r29) !literal
++  ldl $r27, 0($r27)
++
++	jmp $r31, ($r27)
++
++  CFI_ENDPROC
++ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(setjmp))
++
++ASM_HIDDEN(__tsan_setjmp)
++.comm _ZN14__interception12real__setjmpE,8,8
++.globl ASM_SYMBOL_INTERCEPTOR(_setjmp)
++ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(_setjmp))
++ASM_SYMBOL_INTERCEPTOR(_setjmp):
++	ldgp $r29, 0($r27)
++  CFI_STARTPROC
++
++  // Save frame/link register
++	ldi $sp, -32($sp)
++	stl $r26, 0($sp)
++	stl $fp, 8($sp)
++  CFI_DEF_CFA_OFFSET (32)
++  CFI_OFFSET (26, -32)
++  CFI_OFFSET (15, -24)
++
++  // Adjust the SP for previous frame
++  ldi $fp,0($sp)
++  CFI_DEF_CFA_REGISTER (15)
++
++  // Save env parameter
++	stl $r16, 16($sp)
++  CFI_OFFSET (0, -16)
++
++  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
++  ldi   $r16, 32($sp)
++
++  // call tsan interceptor
++	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
++	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
++	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
++  call $r26, ($r27), 0
++	ldgp $r29, 0($r26)
++
++  // Restore env parameter
++  ldl     $r16, 16($sp)
++  CFI_RESTORE (0)
++
++  // Restore frame/link register
++	ldl $fp, 8($sp)
++	ldl $r26, 0($sp)
++  CFI_RESTORE (15)
++  CFI_RESTORE (26)
++  CFI_DEF_CFA (31, 0)
++  ldi $sp, 32($sp)
++
++  // tail jump to libc setjmp
++	ldl $r27, _ZN14__interception12real__setjmpE($r29) !literal
++  ldl $r27, 0($r27)
++
++	jmp $r31, ($r27)
++
++  CFI_ENDPROC
++ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(_setjmp))
++
++ASM_HIDDEN(__tsan_setjmp)
++.comm _ZN14__interception14real_sigsetjmpE,8,8
++.globl ASM_SYMBOL_INTERCEPTOR(sigsetjmp)
++ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
++ASM_SYMBOL_INTERCEPTOR(sigsetjmp):
++	ldgp $r29, 0($r27)
++  CFI_STARTPROC
++
++  // Save frame/link register
++	ldi $sp, -32($sp)
++	stl $r26, 0($sp)
++	stl $fp, 8($sp)
++  CFI_DEF_CFA_OFFSET (32)
++  CFI_OFFSET (26, -32)
++  CFI_OFFSET (15, -24)
++
++  // Adjust the SP for previous frame
++  ldi $fp,0($sp)
++  CFI_DEF_CFA_REGISTER (15)
++
++  // Save env parameter
++	stl $r16, 16($sp)
++	stl $r17, 24($sp)
++  CFI_OFFSET (16, -16)
++  CFI_OFFSET (17, -8)
++
++  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
++  ldi   $r16, 32($sp)
++
++  // call tsan interceptor
++	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
++	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
++	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
++  call $r26, ($r27), 0
++	ldgp $r29, 0($r26)
++
++  // Restore env parameter
++  ldl     $r16, 16($sp)
++  ldl     $r17, 24($sp)
++  CFI_RESTORE (0)
++  CFI_RESTORE (1)
++
++  // Restore frame/link register
++	ldl $fp, 8($sp)
++	ldl $r26, 0($sp)
++  CFI_RESTORE (15)
++  CFI_RESTORE (26)
++  CFI_DEF_CFA (31, 0)
++  ldi $sp, 32($sp)
++
++  // tail jump to libc setjmp
++	ldl $r27, _ZN14__interception14real_sigsetjmpE($r29) !literal
++  ldl $r27, 0($r27)
++
++	jmp $r31, ($r27)
++
++  CFI_ENDPROC
++ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(sigsetjmp))
++
++ASM_HIDDEN(__tsan_setjmp)
++.comm _ZN14__interception16real___sigsetjmpE,8,8
++.globl ASM_SYMBOL_INTERCEPTOR(__sigsetjmp)
++ASM_TYPE_FUNCTION(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
++ASM_SYMBOL_INTERCEPTOR(__sigsetjmp):
++	ldgp $r29, 0($r27)
++  CFI_STARTPROC
++
++  // Save frame/link register
++	ldi $sp, -32($sp)
++	stl $r26, 0($sp)
++	stl $fp, 8($sp)
++  CFI_DEF_CFA_OFFSET (32)
++  CFI_OFFSET (26, -32)
++  CFI_OFFSET (15, -24)
++
++  // Adjust the SP for previous frame
++  ldi $fp,0($sp)
++  CFI_DEF_CFA_REGISTER (15)
++
++  // Save env parameter
++	stl $r16, 16($sp)
++	stl $r17, 24($sp)
++  CFI_OFFSET (16, -16)
++  CFI_OFFSET (17, -8)
++
++  // Obtain SP, first argument to `void __tsan_setjmp(uptr sp)`
++  ldi   $r16, 32($sp)
++
++  // call tsan interceptor
++	//ldih $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprelhigh
++	//ldi $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !gprellow
++	ldl $r27, ASM_SYMBOL(__tsan_setjmp)($r29) !literal
++  call $r26, ($r27), 0
++	ldgp $r29, 0($r26)
++
++  // Restore env parameter
++  ldl     $r16, 16($sp)
++  ldl     $r17, 24($sp)
++  CFI_RESTORE (0)
++  CFI_RESTORE (1)
++
++  // Restore frame/link register
++	ldl $fp, 8($sp)
++	ldl $r26, 0($sp)
++  CFI_RESTORE (15)
++  CFI_RESTORE (26)
++  CFI_DEF_CFA (31, 0)
++  ldi $sp, 32($sp)
++
++  // tail jump to libc setjmp
++	ldl $r27, _ZN14__interception16real___sigsetjmpE($r29) !literal
++  ldl $r27, 0($r27)
++	jmp $r31, ($r27)
++
++ CFI_ENDPROC
++ASM_SIZE(ASM_SYMBOL_INTERCEPTOR(__sigsetjmp))
++
++#endif
+-- 
+2.43.0
+
diff --git a/gcc.spec b/gcc.spec
index 97017bf..12df68f 100644
--- a/gcc.spec
+++ b/gcc.spec
@@ -16,32 +16,32 @@
 %else
 %global build_libquadmath 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 sw_64
 %global build_libasan 1
 %else
 %global build_libasan 0
 %endif
-%ifarch x86_64 ppc64 ppc64le aarch64
+%ifarch x86_64 ppc64 ppc64le aarch64 sw_64
 %global build_libtsan 1
 %else
 %global build_libtsan 0
 %endif
-%ifarch x86_64 ppc64 ppc64le aarch64
+%ifarch x86_64 ppc64 ppc64le aarch64 sw_64
 %global build_liblsan 1
 %else
 %global build_liblsan 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 sw_64
 %global build_libubsan 1
 %else
 %global build_libubsan 0
 %endif
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips} riscv64 sw_64
 %global build_libatomic 1
 %else
 %global build_libatomic 0
 %endif
-%ifarch %{ix86} x86_64 %{arm} alpha ppc ppc64 ppc64le ppc64p7 s390 s390x aarch64
+%ifarch %{ix86} x86_64 %{arm} alpha ppc ppc64 ppc64le ppc64p7 s390 s390x aarch64 sw_64
 %global build_libitm 1
 %else
 %global build_libitm 0
@@ -61,7 +61,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: gcc
 Version: %{gcc_version}
-Release: 69
+Release: 70
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
 URL: https://gcc.gnu.org
 
@@ -320,6 +320,23 @@ patch209: 0209-Bugfix-remove-extra-files-in-temp-dir.patch
 Patch210: 0210-Revert-feature-mull64.patch
 Patch211: 0211-Fix-fail-in-cmtst-patterns-src-openEuler-gcc-IA52SK.patch
 
+# Part 1001-1999
+%ifarch sw_64
+Patch1001: 0001-Sw64-Port-add-gcc-compiler.patch
+Patch1002: 0002-Sw64-Port-gcc-testsuite.patch
+Patch1003: 0003-Sw64-Port-libatomic.patch
+Patch1004: 0004-Sw64-Port-libffi.patch
+Patch1005: 0005-Sw64-Port-libgcc.patch
+Patch1006: 0006-Sw64-Port-libgfortran.patch
+Patch1007: 0007-Sw64-Port-libgo.patch
+Patch1008: 0008-Sw64-Port-libgomp.patch
+Patch1009: 0009-Sw64-Port-libitm.patch
+Patch1010: 0010-Sw64-Port-libstdc.patch
+Patch1011: 0011-Sw64-Port-set-raise-FPE-when-DivbyZero-on-Sw_64-plat.patch
+Patch1012: 0012-Sw64-Port-add-lex-builtin-support-in-libcpp.patch
+Patch1013: 0013-Sw64-Port-libsanitizer.patch
+%endif
+
 %global gcc_target_platform %{_arch}-linux-gnu
 
 %if %{build_go}
@@ -984,6 +1001,22 @@ not stable, so plugins must be rebuilt any time GCC is updated.
 %patch210 -p1
 %patch211 -p1
 
+%ifarch sw_64
+%patch1001 -p1
+%patch1002 -p1
+%patch1003 -p1
+%patch1004 -p1
+%patch1005 -p1
+%patch1006 -p1
+%patch1007 -p1
+%patch1008 -p1
+%patch1009 -p1
+%patch1010 -p1
+%patch1011 -p1
+%patch1012 -p1
+%patch1013 -p1
+%endif
+
 %build
 
 export CONFIG_SITE=NONE
@@ -1066,6 +1099,10 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
 	   --with-multilib-list=lp64 \
 	   --enable-bolt
 %endif
+%ifarch sw_64
+	   --with-cpu=sw6b --disable-libquadmath --disable-multilib \
+	   --enable-tls
+%endif
 %ifarch riscv64
 	   --with-arch=rv64g --with-abi=lp64d \
 	   --disable-libquadmath --disable-multilib
@@ -1105,6 +1142,8 @@ CC="$CC" CXX="$CXX" CFLAGS="$OPT_FLAGS" \
 	--with-arch=rv64g --with-abi=lp64d \
 	--disable-libquadmath --disable-multilib
 %endif
+%ifarch sw_64
+%endif
 make %{?_smp_mflags} BOOT_CFLAGS="$OPT_FLAGS" all-gcc
 cp -a gcc/libgccjit.so* ../gcc/
 cd ../gcc/
@@ -1330,7 +1369,7 @@ mkdir -p %{buildroot}/%{_lib}
 mv -f %{buildroot}%{_prefix}/%{_lib}/libgcc_s.so.1 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}.so.1
 chmod 755 %{buildroot}/%{_lib}/libgcc_s-%{gcc_major}.so.1
 ln -sf libgcc_s-%{gcc_major}.so.1 %{buildroot}/%{_lib}/libgcc_s.so.1
-%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm} aarch64 riscv64
+%ifarch %{ix86} x86_64 ppc ppc64 ppc64p7 ppc64le %{arm} aarch64 riscv64 sw_64
 rm -f $FULLPATH/libgcc_s.so
 echo '/* GNU ld script
    Use the shared library, but some functions are only in
@@ -1427,6 +1466,18 @@ mv ../../../libasan_preinit.o libasan_preinit.o
 %if %{build_libubsan}
 ln -sf ../../../libubsan.so.1.* libubsan.so
 %endif
+%ifarch sw_64
+%if %{build_libtsan}
+rm -f libtsan.so
+echo 'INPUT ( %{_prefix}/%{_lib}/'`echo ../../../../%{_lib}/libtsan.so.2.* | sed 's,^.*libt,libt,'`' )' > libtsan.so
+mv ../../../../%{_lib}/libtsan_preinit.o libtsan_preinit.o
+%endif
+%if %{build_liblsan}
+rm -f liblsan.so
+echo 'INPUT ( %{_prefix}/%{_lib}/'`echo ../../../../%{_lib}/liblsan.so.0.* | sed 's,^.*libl,libl,'`' )' > liblsan.so
+mv ../../../../%{_lib}/liblsan_preinit.o liblsan_preinit.o
+%endif
+%endif
 else
 %if %{build_objc}
 ln -sf ../../../../%{_lib}/libobjc.so.4 libobjc.so
@@ -2227,6 +2278,10 @@ end
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/htmxlintrin.h
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/vecintrin.h
 %endif
+%ifarch sw_64
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/sw_64/sw_64-protos.h
+%{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/plugin/include/config/sw_64/sw_64.h
+%endif
 %if %{build_libasan}
 %{_prefix}/lib/gcc/%{gcc_target_platform}/%{gcc_major}/include/sanitizer
 %endif
@@ -3017,6 +3072,12 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*
 
 %changelog
+* Tue Jan 21 2025 swcompiler <lc@wxiat.com> - 10.3.1-70
+- Type: Sw64
+- ID:NA
+- SUG:NA
+- DESC: Add sw64 architecture support.
+
 * Fri Jan 17 2025 huzife <634763349@qq.com> - 10.3.1-69
 - Type:Bugfix
 - ID:NA
-- 
Gitee